immunio 1.2.1 → 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (291) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +13 -5
  3. data/ext/immunio/Rakefile +14 -6
  4. data/lib/immunio/context.rb +2 -0
  5. data/lib/immunio/plugins/action_view.rb +7 -668
  6. data/lib/immunio/plugins/action_view/action_view.rb +22 -0
  7. data/lib/immunio/plugins/action_view/active_support_hash.rb +29 -0
  8. data/lib/immunio/plugins/action_view/cache_store.rb +24 -0
  9. data/lib/immunio/plugins/action_view/erubi.rb +38 -0
  10. data/lib/immunio/plugins/action_view/erubis.rb +39 -0
  11. data/lib/immunio/plugins/action_view/fragment_caching.rb +29 -0
  12. data/lib/immunio/plugins/action_view/haml.rb +46 -0
  13. data/lib/immunio/plugins/action_view/slim.rb +42 -0
  14. data/lib/immunio/plugins/action_view/template.rb +431 -0
  15. data/lib/immunio/plugins/action_view/template_rendering.rb +45 -0
  16. data/lib/immunio/plugins/http_tracker.rb +2 -0
  17. data/lib/immunio/plugins/io.rb +34 -0
  18. data/lib/immunio/version.rb +1 -1
  19. data/lua-hooks/Makefile +36 -9
  20. data/lua-hooks/ext/luajit/COPYRIGHT +1 -1
  21. data/lua-hooks/ext/luajit/Makefile +22 -15
  22. data/lua-hooks/ext/luajit/README +2 -2
  23. data/lua-hooks/ext/luajit/doc/bluequad-print.css +1 -1
  24. data/lua-hooks/ext/luajit/doc/bluequad.css +1 -1
  25. data/lua-hooks/ext/luajit/doc/changes.html +69 -3
  26. data/lua-hooks/ext/luajit/doc/contact.html +10 -3
  27. data/lua-hooks/ext/luajit/doc/ext_c_api.html +2 -2
  28. data/lua-hooks/ext/luajit/doc/ext_ffi.html +2 -2
  29. data/lua-hooks/ext/luajit/doc/ext_ffi_api.html +2 -2
  30. data/lua-hooks/ext/luajit/doc/ext_ffi_semantics.html +3 -4
  31. data/lua-hooks/ext/luajit/doc/ext_ffi_tutorial.html +2 -2
  32. data/lua-hooks/ext/luajit/doc/ext_jit.html +3 -3
  33. data/lua-hooks/ext/luajit/doc/ext_profiler.html +2 -2
  34. data/lua-hooks/ext/luajit/doc/extensions.html +47 -20
  35. data/lua-hooks/ext/luajit/doc/faq.html +2 -2
  36. data/lua-hooks/ext/luajit/doc/install.html +74 -45
  37. data/lua-hooks/ext/luajit/doc/luajit.html +5 -5
  38. data/lua-hooks/ext/luajit/doc/running.html +3 -3
  39. data/lua-hooks/ext/luajit/doc/status.html +13 -8
  40. data/lua-hooks/ext/luajit/dynasm/dasm_arm.h +1 -1
  41. data/lua-hooks/ext/luajit/dynasm/dasm_arm.lua +1 -1
  42. data/lua-hooks/ext/luajit/dynasm/dasm_arm64.h +1 -1
  43. data/lua-hooks/ext/luajit/dynasm/dasm_arm64.lua +1 -1
  44. data/lua-hooks/ext/luajit/dynasm/dasm_mips.h +8 -5
  45. data/lua-hooks/ext/luajit/dynasm/dasm_mips.lua +66 -11
  46. data/lua-hooks/ext/luajit/dynasm/dasm_mips64.lua +12 -0
  47. data/lua-hooks/ext/luajit/dynasm/dasm_ppc.h +1 -1
  48. data/lua-hooks/ext/luajit/dynasm/dasm_ppc.lua +1 -1
  49. data/lua-hooks/ext/luajit/dynasm/dasm_proto.h +1 -1
  50. data/lua-hooks/ext/luajit/dynasm/dasm_x64.lua +1 -1
  51. data/lua-hooks/ext/luajit/dynasm/dasm_x86.h +1 -1
  52. data/lua-hooks/ext/luajit/dynasm/dasm_x86.lua +5 -1
  53. data/lua-hooks/ext/luajit/dynasm/dynasm.lua +2 -2
  54. data/lua-hooks/ext/luajit/etc/luajit.1 +1 -1
  55. data/lua-hooks/ext/luajit/etc/luajit.pc +1 -1
  56. data/lua-hooks/ext/luajit/src/Makefile +15 -11
  57. data/lua-hooks/ext/luajit/src/Makefile.dep +16 -16
  58. data/lua-hooks/ext/luajit/src/host/buildvm.c +2 -2
  59. data/lua-hooks/ext/luajit/src/host/buildvm.h +1 -1
  60. data/lua-hooks/ext/luajit/src/host/buildvm_asm.c +9 -4
  61. data/lua-hooks/ext/luajit/src/host/buildvm_fold.c +2 -2
  62. data/lua-hooks/ext/luajit/src/host/buildvm_lib.c +1 -1
  63. data/lua-hooks/ext/luajit/src/host/buildvm_libbc.h +14 -3
  64. data/lua-hooks/ext/luajit/src/host/buildvm_peobj.c +27 -3
  65. data/lua-hooks/ext/luajit/src/host/genlibbc.lua +1 -1
  66. data/lua-hooks/ext/luajit/src/host/genminilua.lua +6 -5
  67. data/lua-hooks/ext/luajit/src/host/minilua.c +1 -1
  68. data/lua-hooks/ext/luajit/src/jit/bc.lua +1 -1
  69. data/lua-hooks/ext/luajit/src/jit/bcsave.lua +8 -8
  70. data/lua-hooks/ext/luajit/src/jit/dis_arm.lua +2 -2
  71. data/lua-hooks/ext/luajit/src/jit/dis_arm64.lua +1216 -0
  72. data/lua-hooks/ext/luajit/src/jit/dis_arm64be.lua +12 -0
  73. data/lua-hooks/ext/luajit/src/jit/dis_mips.lua +35 -20
  74. data/lua-hooks/ext/luajit/src/jit/dis_mips64.lua +17 -0
  75. data/lua-hooks/ext/luajit/src/jit/dis_mips64el.lua +17 -0
  76. data/lua-hooks/ext/luajit/src/jit/dis_mipsel.lua +1 -1
  77. data/lua-hooks/ext/luajit/src/jit/dis_ppc.lua +2 -2
  78. data/lua-hooks/ext/luajit/src/jit/dis_x64.lua +1 -1
  79. data/lua-hooks/ext/luajit/src/jit/dis_x86.lua +7 -4
  80. data/lua-hooks/ext/luajit/src/jit/dump.lua +17 -12
  81. data/lua-hooks/ext/luajit/src/jit/p.lua +3 -2
  82. data/lua-hooks/ext/luajit/src/jit/v.lua +2 -2
  83. data/lua-hooks/ext/luajit/src/jit/zone.lua +1 -1
  84. data/lua-hooks/ext/luajit/src/lauxlib.h +14 -20
  85. data/lua-hooks/ext/luajit/src/lib_aux.c +38 -27
  86. data/lua-hooks/ext/luajit/src/lib_base.c +12 -5
  87. data/lua-hooks/ext/luajit/src/lib_bit.c +1 -1
  88. data/lua-hooks/ext/luajit/src/lib_debug.c +5 -5
  89. data/lua-hooks/ext/luajit/src/lib_ffi.c +2 -2
  90. data/lua-hooks/ext/luajit/src/lib_init.c +16 -16
  91. data/lua-hooks/ext/luajit/src/lib_io.c +6 -7
  92. data/lua-hooks/ext/luajit/src/lib_jit.c +14 -4
  93. data/lua-hooks/ext/luajit/src/lib_math.c +1 -5
  94. data/lua-hooks/ext/luajit/src/lib_os.c +1 -1
  95. data/lua-hooks/ext/luajit/src/lib_package.c +14 -23
  96. data/lua-hooks/ext/luajit/src/lib_string.c +1 -5
  97. data/lua-hooks/ext/luajit/src/lib_table.c +21 -1
  98. data/lua-hooks/ext/luajit/src/lj.supp +3 -3
  99. data/lua-hooks/ext/luajit/src/lj_alloc.c +174 -83
  100. data/lua-hooks/ext/luajit/src/lj_api.c +97 -18
  101. data/lua-hooks/ext/luajit/src/lj_arch.h +54 -22
  102. data/lua-hooks/ext/luajit/src/lj_asm.c +172 -53
  103. data/lua-hooks/ext/luajit/src/lj_asm.h +1 -1
  104. data/lua-hooks/ext/luajit/src/lj_asm_arm.h +19 -16
  105. data/lua-hooks/ext/luajit/src/lj_asm_arm64.h +2022 -0
  106. data/lua-hooks/ext/luajit/src/lj_asm_mips.h +564 -158
  107. data/lua-hooks/ext/luajit/src/lj_asm_ppc.h +19 -18
  108. data/lua-hooks/ext/luajit/src/lj_asm_x86.h +578 -92
  109. data/lua-hooks/ext/luajit/src/lj_bc.c +1 -1
  110. data/lua-hooks/ext/luajit/src/lj_bc.h +1 -1
  111. data/lua-hooks/ext/luajit/src/lj_bcdump.h +1 -1
  112. data/lua-hooks/ext/luajit/src/lj_bcread.c +1 -1
  113. data/lua-hooks/ext/luajit/src/lj_bcwrite.c +1 -1
  114. data/lua-hooks/ext/luajit/src/lj_buf.c +1 -1
  115. data/lua-hooks/ext/luajit/src/lj_buf.h +1 -1
  116. data/lua-hooks/ext/luajit/src/lj_carith.c +1 -1
  117. data/lua-hooks/ext/luajit/src/lj_carith.h +1 -1
  118. data/lua-hooks/ext/luajit/src/lj_ccall.c +172 -7
  119. data/lua-hooks/ext/luajit/src/lj_ccall.h +21 -5
  120. data/lua-hooks/ext/luajit/src/lj_ccallback.c +71 -17
  121. data/lua-hooks/ext/luajit/src/lj_ccallback.h +1 -1
  122. data/lua-hooks/ext/luajit/src/lj_cconv.c +4 -2
  123. data/lua-hooks/ext/luajit/src/lj_cconv.h +1 -1
  124. data/lua-hooks/ext/luajit/src/lj_cdata.c +7 -5
  125. data/lua-hooks/ext/luajit/src/lj_cdata.h +1 -1
  126. data/lua-hooks/ext/luajit/src/lj_clib.c +5 -5
  127. data/lua-hooks/ext/luajit/src/lj_clib.h +1 -1
  128. data/lua-hooks/ext/luajit/src/lj_cparse.c +11 -6
  129. data/lua-hooks/ext/luajit/src/lj_cparse.h +1 -1
  130. data/lua-hooks/ext/luajit/src/lj_crecord.c +70 -14
  131. data/lua-hooks/ext/luajit/src/lj_crecord.h +1 -1
  132. data/lua-hooks/ext/luajit/src/lj_ctype.c +1 -1
  133. data/lua-hooks/ext/luajit/src/lj_ctype.h +8 -8
  134. data/lua-hooks/ext/luajit/src/lj_debug.c +1 -1
  135. data/lua-hooks/ext/luajit/src/lj_debug.h +1 -1
  136. data/lua-hooks/ext/luajit/src/lj_def.h +6 -9
  137. data/lua-hooks/ext/luajit/src/lj_dispatch.c +3 -3
  138. data/lua-hooks/ext/luajit/src/lj_dispatch.h +2 -1
  139. data/lua-hooks/ext/luajit/src/lj_emit_arm.h +5 -4
  140. data/lua-hooks/ext/luajit/src/lj_emit_arm64.h +419 -0
  141. data/lua-hooks/ext/luajit/src/lj_emit_mips.h +100 -20
  142. data/lua-hooks/ext/luajit/src/lj_emit_ppc.h +4 -4
  143. data/lua-hooks/ext/luajit/src/lj_emit_x86.h +116 -25
  144. data/lua-hooks/ext/luajit/src/lj_err.c +34 -13
  145. data/lua-hooks/ext/luajit/src/lj_err.h +1 -1
  146. data/lua-hooks/ext/luajit/src/lj_errmsg.h +1 -1
  147. data/lua-hooks/ext/luajit/src/lj_ff.h +1 -1
  148. data/lua-hooks/ext/luajit/src/lj_ffrecord.c +58 -49
  149. data/lua-hooks/ext/luajit/src/lj_ffrecord.h +1 -1
  150. data/lua-hooks/ext/luajit/src/lj_frame.h +33 -6
  151. data/lua-hooks/ext/luajit/src/lj_func.c +4 -2
  152. data/lua-hooks/ext/luajit/src/lj_func.h +1 -1
  153. data/lua-hooks/ext/luajit/src/lj_gc.c +16 -7
  154. data/lua-hooks/ext/luajit/src/lj_gc.h +1 -1
  155. data/lua-hooks/ext/luajit/src/lj_gdbjit.c +31 -1
  156. data/lua-hooks/ext/luajit/src/lj_gdbjit.h +1 -1
  157. data/lua-hooks/ext/luajit/src/lj_ir.c +69 -96
  158. data/lua-hooks/ext/luajit/src/lj_ir.h +29 -18
  159. data/lua-hooks/ext/luajit/src/lj_ircall.h +24 -30
  160. data/lua-hooks/ext/luajit/src/lj_iropt.h +9 -9
  161. data/lua-hooks/ext/luajit/src/lj_jit.h +67 -9
  162. data/lua-hooks/ext/luajit/src/lj_lex.c +1 -1
  163. data/lua-hooks/ext/luajit/src/lj_lex.h +1 -1
  164. data/lua-hooks/ext/luajit/src/lj_lib.c +1 -1
  165. data/lua-hooks/ext/luajit/src/lj_lib.h +1 -1
  166. data/lua-hooks/ext/luajit/src/lj_load.c +1 -1
  167. data/lua-hooks/ext/luajit/src/lj_mcode.c +11 -10
  168. data/lua-hooks/ext/luajit/src/lj_mcode.h +1 -1
  169. data/lua-hooks/ext/luajit/src/lj_meta.c +1 -1
  170. data/lua-hooks/ext/luajit/src/lj_meta.h +1 -1
  171. data/lua-hooks/ext/luajit/src/lj_obj.c +1 -1
  172. data/lua-hooks/ext/luajit/src/lj_obj.h +7 -3
  173. data/lua-hooks/ext/luajit/src/lj_opt_dce.c +1 -1
  174. data/lua-hooks/ext/luajit/src/lj_opt_fold.c +84 -17
  175. data/lua-hooks/ext/luajit/src/lj_opt_loop.c +1 -1
  176. data/lua-hooks/ext/luajit/src/lj_opt_mem.c +3 -3
  177. data/lua-hooks/ext/luajit/src/lj_opt_narrow.c +24 -22
  178. data/lua-hooks/ext/luajit/src/lj_opt_sink.c +11 -6
  179. data/lua-hooks/ext/luajit/src/lj_opt_split.c +11 -2
  180. data/lua-hooks/ext/luajit/src/lj_parse.c +9 -7
  181. data/lua-hooks/ext/luajit/src/lj_parse.h +1 -1
  182. data/lua-hooks/ext/luajit/src/lj_profile.c +1 -1
  183. data/lua-hooks/ext/luajit/src/lj_profile.h +1 -1
  184. data/lua-hooks/ext/luajit/src/lj_record.c +201 -117
  185. data/lua-hooks/ext/luajit/src/lj_record.h +1 -1
  186. data/lua-hooks/ext/luajit/src/lj_snap.c +72 -26
  187. data/lua-hooks/ext/luajit/src/lj_snap.h +1 -1
  188. data/lua-hooks/ext/luajit/src/lj_state.c +6 -6
  189. data/lua-hooks/ext/luajit/src/lj_state.h +2 -2
  190. data/lua-hooks/ext/luajit/src/lj_str.c +1 -1
  191. data/lua-hooks/ext/luajit/src/lj_str.h +1 -1
  192. data/lua-hooks/ext/luajit/src/lj_strfmt.c +7 -3
  193. data/lua-hooks/ext/luajit/src/lj_strfmt.h +1 -1
  194. data/lua-hooks/ext/luajit/src/lj_strfmt_num.c +4 -3
  195. data/lua-hooks/ext/luajit/src/lj_strscan.c +1 -1
  196. data/lua-hooks/ext/luajit/src/lj_strscan.h +1 -1
  197. data/lua-hooks/ext/luajit/src/lj_tab.c +1 -2
  198. data/lua-hooks/ext/luajit/src/lj_tab.h +1 -1
  199. data/lua-hooks/ext/luajit/src/lj_target.h +3 -3
  200. data/lua-hooks/ext/luajit/src/lj_target_arm.h +1 -1
  201. data/lua-hooks/ext/luajit/src/lj_target_arm64.h +239 -7
  202. data/lua-hooks/ext/luajit/src/lj_target_mips.h +111 -22
  203. data/lua-hooks/ext/luajit/src/lj_target_ppc.h +1 -1
  204. data/lua-hooks/ext/luajit/src/lj_target_x86.h +21 -4
  205. data/lua-hooks/ext/luajit/src/lj_trace.c +63 -18
  206. data/lua-hooks/ext/luajit/src/lj_trace.h +2 -1
  207. data/lua-hooks/ext/luajit/src/lj_traceerr.h +1 -1
  208. data/lua-hooks/ext/luajit/src/lj_udata.c +1 -1
  209. data/lua-hooks/ext/luajit/src/lj_udata.h +1 -1
  210. data/lua-hooks/ext/luajit/src/lj_vm.h +5 -1
  211. data/lua-hooks/ext/luajit/src/lj_vmevent.c +1 -1
  212. data/lua-hooks/ext/luajit/src/lj_vmevent.h +1 -1
  213. data/lua-hooks/ext/luajit/src/lj_vmmath.c +1 -1
  214. data/lua-hooks/ext/luajit/src/ljamalg.c +1 -1
  215. data/lua-hooks/ext/luajit/src/lua.h +9 -1
  216. data/lua-hooks/ext/luajit/src/luaconf.h +3 -7
  217. data/lua-hooks/ext/luajit/src/luajit.c +69 -54
  218. data/lua-hooks/ext/luajit/src/luajit.h +4 -4
  219. data/lua-hooks/ext/luajit/src/lualib.h +1 -1
  220. data/lua-hooks/ext/luajit/src/msvcbuild.bat +12 -4
  221. data/lua-hooks/ext/luajit/src/vm_arm.dasc +1 -1
  222. data/lua-hooks/ext/luajit/src/vm_arm64.dasc +255 -32
  223. data/lua-hooks/ext/luajit/src/vm_mips.dasc +26 -23
  224. data/lua-hooks/ext/luajit/src/vm_mips64.dasc +5062 -0
  225. data/lua-hooks/ext/luajit/src/vm_ppc.dasc +1 -1
  226. data/lua-hooks/ext/luajit/src/vm_x64.dasc +24 -25
  227. data/lua-hooks/ext/luajit/src/vm_x86.dasc +77 -4
  228. data/lua-hooks/libluahooks.darwin.a +0 -0
  229. data/lua-hooks/libluahooks.linux.a +0 -0
  230. data/lua-hooks/options.mk +1 -1
  231. metadata +37 -77
  232. data/lua-hooks/ext/all.c +0 -69
  233. data/lua-hooks/ext/libinjection/COPYING +0 -37
  234. data/lua-hooks/ext/libinjection/libinjection.h +0 -65
  235. data/lua-hooks/ext/libinjection/libinjection_html5.c +0 -847
  236. data/lua-hooks/ext/libinjection/libinjection_html5.h +0 -54
  237. data/lua-hooks/ext/libinjection/libinjection_sqli.c +0 -2301
  238. data/lua-hooks/ext/libinjection/libinjection_sqli.h +0 -295
  239. data/lua-hooks/ext/libinjection/libinjection_sqli_data.h +0 -9349
  240. data/lua-hooks/ext/libinjection/libinjection_xss.c +0 -531
  241. data/lua-hooks/ext/libinjection/libinjection_xss.h +0 -21
  242. data/lua-hooks/ext/libinjection/lualib.c +0 -145
  243. data/lua-hooks/ext/libinjection/module.mk +0 -5
  244. data/lua-hooks/ext/lpeg/HISTORY +0 -96
  245. data/lua-hooks/ext/lpeg/lpcap.c +0 -537
  246. data/lua-hooks/ext/lpeg/lpcap.h +0 -56
  247. data/lua-hooks/ext/lpeg/lpcode.c +0 -1014
  248. data/lua-hooks/ext/lpeg/lpcode.h +0 -40
  249. data/lua-hooks/ext/lpeg/lpeg-128.gif +0 -0
  250. data/lua-hooks/ext/lpeg/lpeg.html +0 -1445
  251. data/lua-hooks/ext/lpeg/lpprint.c +0 -244
  252. data/lua-hooks/ext/lpeg/lpprint.h +0 -36
  253. data/lua-hooks/ext/lpeg/lptree.c +0 -1303
  254. data/lua-hooks/ext/lpeg/lptree.h +0 -82
  255. data/lua-hooks/ext/lpeg/lptypes.h +0 -149
  256. data/lua-hooks/ext/lpeg/lpvm.c +0 -364
  257. data/lua-hooks/ext/lpeg/lpvm.h +0 -58
  258. data/lua-hooks/ext/lpeg/makefile +0 -55
  259. data/lua-hooks/ext/lpeg/module.mk +0 -6
  260. data/lua-hooks/ext/lpeg/re.html +0 -498
  261. data/lua-hooks/ext/lua-cmsgpack/.gitignore +0 -13
  262. data/lua-hooks/ext/lua-cmsgpack/CMakeLists.txt +0 -45
  263. data/lua-hooks/ext/lua-cmsgpack/README.md +0 -115
  264. data/lua-hooks/ext/lua-cmsgpack/lua_cmsgpack.c +0 -970
  265. data/lua-hooks/ext/lua-cmsgpack/module.mk +0 -2
  266. data/lua-hooks/ext/lua-cmsgpack/test.lua +0 -570
  267. data/lua-hooks/ext/lua-snapshot/LICENSE +0 -7
  268. data/lua-hooks/ext/lua-snapshot/Makefile +0 -12
  269. data/lua-hooks/ext/lua-snapshot/README.md +0 -18
  270. data/lua-hooks/ext/lua-snapshot/dump.lua +0 -15
  271. data/lua-hooks/ext/lua-snapshot/module.mk +0 -2
  272. data/lua-hooks/ext/lua-snapshot/snapshot.c +0 -462
  273. data/lua-hooks/ext/luautf8/README.md +0 -152
  274. data/lua-hooks/ext/luautf8/lutf8lib.c +0 -1274
  275. data/lua-hooks/ext/luautf8/module.mk +0 -2
  276. data/lua-hooks/ext/luautf8/unidata.h +0 -3064
  277. data/lua-hooks/ext/module.mk +0 -15
  278. data/lua-hooks/ext/modules.h +0 -17
  279. data/lua-hooks/ext/perf/luacpu.c +0 -114
  280. data/lua-hooks/ext/perf/lualoadavg.c +0 -40
  281. data/lua-hooks/ext/perf/luameminfo.c +0 -38
  282. data/lua-hooks/ext/perf/luaoslib.c +0 -203
  283. data/lua-hooks/ext/perf/module.mk +0 -5
  284. data/lua-hooks/ext/sha1/luasha1.c +0 -74
  285. data/lua-hooks/ext/sha1/module.mk +0 -5
  286. data/lua-hooks/ext/sha1/sha1.c +0 -145
  287. data/lua-hooks/ext/sha2/luasha256.c +0 -77
  288. data/lua-hooks/ext/sha2/module.mk +0 -5
  289. data/lua-hooks/ext/sha2/sha256.c +0 -196
  290. data/lua-hooks/ext/sysutils/lua_utils.c +0 -56
  291. data/lua-hooks/ext/sysutils/module.mk +0 -2
@@ -1,40 +0,0 @@
1
- /*
2
- ** $Id: lpcode.h,v 1.8 2016/09/15 17:46:13 roberto Exp $
3
- */
4
-
5
- #if !defined(lpcode_h)
6
- #define lpcode_h
7
-
8
- #include "lua.h"
9
-
10
- #include "lptypes.h"
11
- #include "lptree.h"
12
- #include "lpvm.h"
13
-
14
- int tocharset (TTree *tree, Charset *cs);
15
- int checkaux (TTree *tree, int pred);
16
- int fixedlen (TTree *tree);
17
- int hascaptures (TTree *tree);
18
- int lp_gc (lua_State *L);
19
- Instruction *compile (lua_State *L, Pattern *p);
20
- void realloccode (lua_State *L, Pattern *p, int nsize);
21
- int sizei (const Instruction *i);
22
-
23
-
24
- #define PEnullable 0
25
- #define PEnofail 1
26
-
27
- /*
28
- ** nofail(t) implies that 't' cannot fail with any input
29
- */
30
- #define nofail(t) checkaux(t, PEnofail)
31
-
32
- /*
33
- ** (not nullable(t)) implies 't' cannot match without consuming
34
- ** something
35
- */
36
- #define nullable(t) checkaux(t, PEnullable)
37
-
38
-
39
-
40
- #endif
@@ -1,1445 +0,0 @@
1
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3
- <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4
- <head>
5
- <title>LPeg - Parsing Expression Grammars For Lua</title>
6
- <link rel="stylesheet"
7
- href="http://www.inf.puc-rio.br/~roberto/lpeg/doc.css"
8
- type="text/css"/>
9
- <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
10
- </head>
11
- <body>
12
-
13
- <!-- $Id: lpeg.html,v 1.77 2017/01/13 13:40:05 roberto Exp $ -->
14
-
15
- <div id="container">
16
-
17
- <div id="product">
18
- <div id="product_logo">
19
- <a href="http://www.inf.puc-rio.br/~roberto/lpeg/">
20
- <img alt="LPeg logo" src="lpeg-128.gif"/></a>
21
-
22
- </div>
23
- <div id="product_name"><big><strong>LPeg</strong></big></div>
24
- <div id="product_description">
25
- Parsing Expression Grammars For Lua, version 1.0
26
- </div>
27
- </div> <!-- id="product" -->
28
-
29
- <div id="main">
30
-
31
- <div id="navigation">
32
- <h1>LPeg</h1>
33
-
34
- <ul>
35
- <li><strong>Home</strong>
36
- <ul>
37
- <li><a href="#intro">Introduction</a></li>
38
- <li><a href="#func">Functions</a></li>
39
- <li><a href="#basic">Basic Constructions</a></li>
40
- <li><a href="#grammar">Grammars</a></li>
41
- <li><a href="#captures">Captures</a></li>
42
- <li><a href="#ex">Some Examples</a></li>
43
- <li><a href="re.html">The <code>re</code> Module</a></li>
44
- <li><a href="#download">Download</a></li>
45
- <li><a href="#license">License</a></li>
46
- </ul>
47
- </li>
48
- </ul>
49
- </div> <!-- id="navigation" -->
50
-
51
- <div id="content">
52
-
53
-
54
- <h2><a name="intro">Introduction</a></h2>
55
-
56
- <p>
57
- <em>LPeg</em> is a new pattern-matching library for Lua,
58
- based on
59
- <a href="http://pdos.csail.mit.edu/%7Ebaford/packrat/">
60
- Parsing Expression Grammars</a> (PEGs).
61
- This text is a reference manual for the library.
62
- For a more formal treatment of LPeg,
63
- as well as some discussion about its implementation,
64
- see
65
- <a href="http://www.inf.puc-rio.br/~roberto/docs/peg.pdf">
66
- A Text Pattern-Matching Tool based on Parsing Expression Grammars</a>.
67
- (You may also be interested in my
68
- <a href="http://vimeo.com/1485123">talk about LPeg</a>
69
- given at the III Lua Workshop.)
70
- </p>
71
-
72
- <p>
73
- Following the Snobol tradition,
74
- LPeg defines patterns as first-class objects.
75
- That is, patterns are regular Lua values
76
- (represented by userdata).
77
- The library offers several functions to create
78
- and compose patterns.
79
- With the use of metamethods,
80
- several of these functions are provided as infix or prefix
81
- operators.
82
- On the one hand,
83
- the result is usually much more verbose than the typical
84
- encoding of patterns using the so called
85
- <em>regular expressions</em>
86
- (which typically are not regular expressions in the formal sense).
87
- On the other hand,
88
- first-class patterns allow much better documentation
89
- (as it is easy to comment the code,
90
- to break complex definitions in smaller parts, etc.)
91
- and are extensible,
92
- as we can define new functions to create and compose patterns.
93
- </p>
94
-
95
- <p>
96
- For a quick glance of the library,
97
- the following table summarizes its basic operations
98
- for creating patterns:
99
- </p>
100
- <table border="1">
101
- <tbody><tr><td><b>Operator</b></td><td><b>Description</b></td></tr>
102
- <tr><td><a href="#op-p"><code>lpeg.P(string)</code></a></td>
103
- <td>Matches <code>string</code> literally</td></tr>
104
- <tr><td><a href="#op-p"><code>lpeg.P(n)</code></a></td>
105
- <td>Matches exactly <code>n</code> characters</td></tr>
106
- <tr><td><a href="#op-s"><code>lpeg.S(string)</code></a></td>
107
- <td>Matches any character in <code>string</code> (Set)</td></tr>
108
- <tr><td><a href="#op-r"><code>lpeg.R("<em>xy</em>")</code></a></td>
109
- <td>Matches any character between <em>x</em> and <em>y</em> (Range)</td></tr>
110
- <tr><td><a href="#op-pow"><code>patt^n</code></a></td>
111
- <td>Matches at least <code>n</code> repetitions of <code>patt</code></td></tr>
112
- <tr><td><a href="#op-pow"><code>patt^-n</code></a></td>
113
- <td>Matches at most <code>n</code> repetitions of <code>patt</code></td></tr>
114
- <tr><td><a href="#op-mul"><code>patt1 * patt2</code></a></td>
115
- <td>Matches <code>patt1</code> followed by <code>patt2</code></td></tr>
116
- <tr><td><a href="#op-add"><code>patt1 + patt2</code></a></td>
117
- <td>Matches <code>patt1</code> or <code>patt2</code>
118
- (ordered choice)</td></tr>
119
- <tr><td><a href="#op-sub"><code>patt1 - patt2</code></a></td>
120
- <td>Matches <code>patt1</code> if <code>patt2</code> does not match</td></tr>
121
- <tr><td><a href="#op-unm"><code>-patt</code></a></td>
122
- <td>Equivalent to <code>("" - patt)</code></td></tr>
123
- <tr><td><a href="#op-len"><code>#patt</code></a></td>
124
- <td>Matches <code>patt</code> but consumes no input</td></tr>
125
- <tr><td><a href="#op-behind"><code>lpeg.B(patt)</code></a></td>
126
- <td>Matches <code>patt</code> behind the current position,
127
- consuming no input</td></tr>
128
- </tbody></table>
129
-
130
- <p>As a very simple example,
131
- <code>lpeg.R("09")^1</code> creates a pattern that
132
- matches a non-empty sequence of digits.
133
- As a not so simple example,
134
- <code>-lpeg.P(1)</code>
135
- (which can be written as <code>lpeg.P(-1)</code>,
136
- or simply <code>-1</code> for operations expecting a pattern)
137
- matches an empty string only if it cannot match a single character;
138
- so, it succeeds only at the end of the subject.
139
- </p>
140
-
141
- <p>
142
- LPeg also offers the <a href="re.html"><code>re</code> module</a>,
143
- which implements patterns following a regular-expression style
144
- (e.g., <code>[09]+</code>).
145
- (This module is 260 lines of Lua code,
146
- and of course it uses LPeg to parse regular expressions and
147
- translate them to regular LPeg patterns.)
148
- </p>
149
-
150
-
151
- <h2><a name="func">Functions</a></h2>
152
-
153
-
154
- <h3><a name="f-match"></a><code>lpeg.match (pattern, subject [, init])</code></h3>
155
- <p>
156
- The matching function.
157
- It attempts to match the given pattern against the subject string.
158
- If the match succeeds,
159
- returns the index in the subject of the first character after the match,
160
- or the <a href="#captures">captured values</a>
161
- (if the pattern captured any value).
162
- </p>
163
-
164
- <p>
165
- An optional numeric argument <code>init</code> makes the match
166
- start at that position in the subject string.
167
- As usual in Lua libraries,
168
- a negative value counts from the end.
169
- </p>
170
-
171
- <p>
172
- Unlike typical pattern-matching functions,
173
- <code>match</code> works only in <em>anchored</em> mode;
174
- that is, it tries to match the pattern with a prefix of
175
- the given subject string (at position <code>init</code>),
176
- not with an arbitrary substring of the subject.
177
- So, if we want to find a pattern anywhere in a string,
178
- we must either write a loop in Lua or write a pattern that
179
- matches anywhere.
180
- This second approach is easy and quite efficient;
181
- see <a href="#ex">examples</a>.
182
- </p>
183
-
184
- <h3><a name="f-type"></a><code>lpeg.type (value)</code></h3>
185
- <p>
186
- If the given value is a pattern,
187
- returns the string <code>"pattern"</code>.
188
- Otherwise returns nil.
189
- </p>
190
-
191
- <h3><a name="f-version"></a><code>lpeg.version ()</code></h3>
192
- <p>
193
- Returns a string with the running version of LPeg.
194
- </p>
195
-
196
- <h3><a name="f-setstack"></a><code>lpeg.setmaxstack (max)</code></h3>
197
- <p>
198
- Sets a limit for the size of the backtrack stack used by LPeg to
199
- track calls and choices.
200
- (The default limit is 400.)
201
- Most well-written patterns need little backtrack levels and
202
- therefore you seldom need to change this limit;
203
- before changing it you should try to rewrite your
204
- pattern to avoid the need for extra space.
205
- Nevertheless, a few useful patterns may overflow.
206
- Also, with recursive grammars,
207
- subjects with deep recursion may also need larger limits.
208
- </p>
209
-
210
-
211
- <h2><a name="basic">Basic Constructions</a></h2>
212
-
213
- <p>
214
- The following operations build patterns.
215
- All operations that expect a pattern as an argument
216
- may receive also strings, tables, numbers, booleans, or functions,
217
- which are translated to patterns according to
218
- the rules of function <a href="#op-p"><code>lpeg.P</code></a>.
219
- </p>
220
-
221
-
222
-
223
- <h3><a name="op-p"></a><code>lpeg.P (value)</code></h3>
224
- <p>
225
- Converts the given value into a proper pattern,
226
- according to the following rules:
227
- </p>
228
- <ul>
229
-
230
- <li><p>
231
- If the argument is a pattern,
232
- it is returned unmodified.
233
- </p></li>
234
-
235
- <li><p>
236
- If the argument is a string,
237
- it is translated to a pattern that matches the string literally.
238
- </p></li>
239
-
240
- <li><p>
241
- If the argument is a non-negative number <em>n</em>,
242
- the result is a pattern that matches exactly <em>n</em> characters.
243
- </p></li>
244
-
245
- <li><p>
246
- If the argument is a negative number <em>-n</em>,
247
- the result is a pattern that
248
- succeeds only if the input string has less than <em>n</em> characters left:
249
- <code>lpeg.P(-n)</code>
250
- is equivalent to <code>-lpeg.P(n)</code>
251
- (see the <a href="#op-unm">unary minus operation</a>).
252
- </p></li>
253
-
254
- <li><p>
255
- If the argument is a boolean,
256
- the result is a pattern that always succeeds or always fails
257
- (according to the boolean value),
258
- without consuming any input.
259
- </p></li>
260
-
261
- <li><p>
262
- If the argument is a table,
263
- it is interpreted as a grammar
264
- (see <a href="#grammar">Grammars</a>).
265
- </p></li>
266
-
267
- <li><p>
268
- If the argument is a function,
269
- returns a pattern equivalent to a
270
- <a href="#matchtime">match-time capture</a> over the empty string.
271
- </p></li>
272
-
273
- </ul>
274
-
275
-
276
- <h3><a name="op-behind"></a><code>lpeg.B(patt)</code></h3>
277
- <p>
278
- Returns a pattern that
279
- matches only if the input string at the current position
280
- is preceded by <code>patt</code>.
281
- Pattern <code>patt</code> must match only strings
282
- with some fixed length,
283
- and it cannot contain captures.
284
- </p>
285
-
286
- <p>
287
- Like the <a href="#op-len">and predicate</a>,
288
- this pattern never consumes any input,
289
- independently of success or failure.
290
- </p>
291
-
292
-
293
- <h3><a name="op-r"></a><code>lpeg.R ({range})</code></h3>
294
- <p>
295
- Returns a pattern that matches any single character
296
- belonging to one of the given <em>ranges</em>.
297
- Each <code>range</code> is a string <em>xy</em> of length 2,
298
- representing all characters with code
299
- between the codes of <em>x</em> and <em>y</em>
300
- (both inclusive).
301
- </p>
302
-
303
- <p>
304
- As an example, the pattern
305
- <code>lpeg.R("09")</code> matches any digit,
306
- and <code>lpeg.R("az", "AZ")</code> matches any ASCII letter.
307
- </p>
308
-
309
-
310
- <h3><a name="op-s"></a><code>lpeg.S (string)</code></h3>
311
- <p>
312
- Returns a pattern that matches any single character that
313
- appears in the given string.
314
- (The <code>S</code> stands for <em>Set</em>.)
315
- </p>
316
-
317
- <p>
318
- As an example, the pattern
319
- <code>lpeg.S("+-*/")</code> matches any arithmetic operator.
320
- </p>
321
-
322
- <p>
323
- Note that, if <code>s</code> is a character
324
- (that is, a string of length 1),
325
- then <code>lpeg.P(s)</code> is equivalent to <code>lpeg.S(s)</code>
326
- which is equivalent to <code>lpeg.R(s..s)</code>.
327
- Note also that both <code>lpeg.S("")</code> and <code>lpeg.R()</code>
328
- are patterns that always fail.
329
- </p>
330
-
331
-
332
- <h3><a name="op-v"></a><code>lpeg.V (v)</code></h3>
333
- <p>
334
- This operation creates a non-terminal (a <em>variable</em>)
335
- for a grammar.
336
- The created non-terminal refers to the rule indexed by <code>v</code>
337
- in the enclosing grammar.
338
- (See <a href="#grammar">Grammars</a> for details.)
339
- </p>
340
-
341
-
342
- <h3><a name="op-locale"></a><code>lpeg.locale ([table])</code></h3>
343
- <p>
344
- Returns a table with patterns for matching some character classes
345
- according to the current locale.
346
- The table has fields named
347
- <code>alnum</code>,
348
- <code>alpha</code>,
349
- <code>cntrl</code>,
350
- <code>digit</code>,
351
- <code>graph</code>,
352
- <code>lower</code>,
353
- <code>print</code>,
354
- <code>punct</code>,
355
- <code>space</code>,
356
- <code>upper</code>, and
357
- <code>xdigit</code>,
358
- each one containing a correspondent pattern.
359
- Each pattern matches any single character that belongs to its class.
360
- </p>
361
-
362
- <p>
363
- If called with an argument <code>table</code>,
364
- then it creates those fields inside the given table and
365
- returns that table.
366
- </p>
367
-
368
-
369
- <h3><a name="op-len"></a><code>#patt</code></h3>
370
- <p>
371
- Returns a pattern that
372
- matches only if the input string matches <code>patt</code>,
373
- but without consuming any input,
374
- independently of success or failure.
375
- (This pattern is called an <em>and predicate</em>
376
- and it is equivalent to
377
- <em>&amp;patt</em> in the original PEG notation.)
378
- </p>
379
-
380
-
381
- <p>
382
- This pattern never produces any capture.
383
- </p>
384
-
385
-
386
- <h3><a name="op-unm"></a><code>-patt</code></h3>
387
- <p>
388
- Returns a pattern that
389
- matches only if the input string does not match <code>patt</code>.
390
- It does not consume any input,
391
- independently of success or failure.
392
- (This pattern is equivalent to
393
- <em>!patt</em> in the original PEG notation.)
394
- </p>
395
-
396
- <p>
397
- As an example, the pattern
398
- <code>-lpeg.P(1)</code> matches only the end of string.
399
- </p>
400
-
401
- <p>
402
- This pattern never produces any captures,
403
- because either <code>patt</code> fails
404
- or <code>-patt</code> fails.
405
- (A failing pattern never produces captures.)
406
- </p>
407
-
408
-
409
- <h3><a name="op-add"></a><code>patt1 + patt2</code></h3>
410
- <p>
411
- Returns a pattern equivalent to an <em>ordered choice</em>
412
- of <code>patt1</code> and <code>patt2</code>.
413
- (This is denoted by <em>patt1 / patt2</em> in the original PEG notation,
414
- not to be confused with the <code>/</code> operation in LPeg.)
415
- It matches either <code>patt1</code> or <code>patt2</code>,
416
- with no backtracking once one of them succeeds.
417
- The identity element for this operation is the pattern
418
- <code>lpeg.P(false)</code>,
419
- which always fails.
420
- </p>
421
-
422
- <p>
423
- If both <code>patt1</code> and <code>patt2</code> are
424
- character sets,
425
- this operation is equivalent to set union.
426
- </p>
427
- <pre class="example">
428
- lower = lpeg.R("az")
429
- upper = lpeg.R("AZ")
430
- letter = lower + upper
431
- </pre>
432
-
433
-
434
- <h3><a name="op-sub"></a><code>patt1 - patt2</code></h3>
435
- <p>
436
- Returns a pattern equivalent to <em>!patt2 patt1</em>.
437
- This pattern asserts that the input does not match
438
- <code>patt2</code> and then matches <code>patt1</code>.
439
- </p>
440
-
441
- <p>
442
- When successful,
443
- this pattern produces all captures from <code>patt1</code>.
444
- It never produces any capture from <code>patt2</code>
445
- (as either <code>patt2</code> fails or
446
- <code>patt1 - patt2</code> fails).
447
- </p>
448
-
449
- <p>
450
- If both <code>patt1</code> and <code>patt2</code> are
451
- character sets,
452
- this operation is equivalent to set difference.
453
- Note that <code>-patt</code> is equivalent to <code>"" - patt</code>
454
- (or <code>0 - patt</code>).
455
- If <code>patt</code> is a character set,
456
- <code>1 - patt</code> is its complement.
457
- </p>
458
-
459
-
460
- <h3><a name="op-mul"></a><code>patt1 * patt2</code></h3>
461
- <p>
462
- Returns a pattern that matches <code>patt1</code>
463
- and then matches <code>patt2</code>,
464
- starting where <code>patt1</code> finished.
465
- The identity element for this operation is the
466
- pattern <code>lpeg.P(true)</code>,
467
- which always succeeds.
468
- </p>
469
-
470
- <p>
471
- (LPeg uses the <code>*</code> operator
472
- [instead of the more obvious <code>..</code>]
473
- both because it has
474
- the right priority and because in formal languages it is
475
- common to use a dot for denoting concatenation.)
476
- </p>
477
-
478
-
479
- <h3><a name="op-pow"></a><code>patt^n</code></h3>
480
- <p>
481
- If <code>n</code> is nonnegative,
482
- this pattern is
483
- equivalent to <em>patt<sup>n</sup> patt*</em>:
484
- It matches <code>n</code> or more occurrences of <code>patt</code>.
485
- </p>
486
-
487
- <p>
488
- Otherwise, when <code>n</code> is negative,
489
- this pattern is equivalent to <em>(patt?)<sup>-n</sup></em>:
490
- It matches at most <code>|n|</code>
491
- occurrences of <code>patt</code>.
492
- </p>
493
-
494
- <p>
495
- In particular, <code>patt^0</code> is equivalent to <em>patt*</em>,
496
- <code>patt^1</code> is equivalent to <em>patt+</em>,
497
- and <code>patt^-1</code> is equivalent to <em>patt?</em>
498
- in the original PEG notation.
499
- </p>
500
-
501
- <p>
502
- In all cases,
503
- the resulting pattern is greedy with no backtracking
504
- (also called a <em>possessive</em> repetition).
505
- That is, it matches only the longest possible sequence
506
- of matches for <code>patt</code>.
507
- </p>
508
-
509
-
510
-
511
- <h2><a name="grammar">Grammars</a></h2>
512
-
513
- <p>
514
- With the use of Lua variables,
515
- it is possible to define patterns incrementally,
516
- with each new pattern using previously defined ones.
517
- However, this technique does not allow the definition of
518
- recursive patterns.
519
- For recursive patterns,
520
- we need real grammars.
521
- </p>
522
-
523
- <p>
524
- LPeg represents grammars with tables,
525
- where each entry is a rule.
526
- </p>
527
-
528
- <p>
529
- The call <code>lpeg.V(v)</code>
530
- creates a pattern that represents the nonterminal
531
- (or <em>variable</em>) with index <code>v</code> in a grammar.
532
- Because the grammar still does not exist when
533
- this function is evaluated,
534
- the result is an <em>open reference</em> to the respective rule.
535
- </p>
536
-
537
- <p>
538
- A table is <em>fixed</em> when it is converted to a pattern
539
- (either by calling <code>lpeg.P</code> or by using it wherein a
540
- pattern is expected).
541
- Then every open reference created by <code>lpeg.V(v)</code>
542
- is corrected to refer to the rule indexed by <code>v</code> in the table.
543
- </p>
544
-
545
- <p>
546
- When a table is fixed,
547
- the result is a pattern that matches its <em>initial rule</em>.
548
- The entry with index 1 in the table defines its initial rule.
549
- If that entry is a string,
550
- it is assumed to be the name of the initial rule.
551
- Otherwise, LPeg assumes that the entry 1 itself is the initial rule.
552
- </p>
553
-
554
- <p>
555
- As an example,
556
- the following grammar matches strings of a's and b's that
557
- have the same number of a's and b's:
558
- </p>
559
- <pre class="example">
560
- equalcount = lpeg.P{
561
- "S"; -- initial rule name
562
- S = "a" * lpeg.V"B" + "b" * lpeg.V"A" + "",
563
- A = "a" * lpeg.V"S" + "b" * lpeg.V"A" * lpeg.V"A",
564
- B = "b" * lpeg.V"S" + "a" * lpeg.V"B" * lpeg.V"B",
565
- } * -1
566
- </pre>
567
- <p>
568
- It is equivalent to the following grammar in standard PEG notation:
569
- </p>
570
- <pre class="example">
571
- S <- 'a' B / 'b' A / ''
572
- A <- 'a' S / 'b' A A
573
- B <- 'b' S / 'a' B B
574
- </pre>
575
-
576
-
577
- <h2><a name="captures">Captures</a></h2>
578
-
579
- <p>
580
- A <em>capture</em> is a pattern that produces values
581
- (the so called <em>semantic information</em>)
582
- according to what it matches.
583
- LPeg offers several kinds of captures,
584
- which produces values based on matches and combine these values to
585
- produce new values.
586
- Each capture may produce zero or more values.
587
- </p>
588
-
589
- <p>
590
- The following table summarizes the basic captures:
591
- </p>
592
- <table border="1">
593
- <tbody><tr><td><b>Operation</b></td><td><b>What it Produces</b></td></tr>
594
- <tr><td><a href="#cap-c"><code>lpeg.C(patt)</code></a></td>
595
- <td>the match for <code>patt</code> plus all captures
596
- made by <code>patt</code></td></tr>
597
- <tr><td><a href="#cap-arg"><code>lpeg.Carg(n)</code></a></td>
598
- <td>the value of the n<sup>th</sup> extra argument to
599
- <code>lpeg.match</code> (matches the empty string)</td></tr>
600
- <tr><td><a href="#cap-b"><code>lpeg.Cb(name)</code></a></td>
601
- <td>the values produced by the previous
602
- group capture named <code>name</code>
603
- (matches the empty string)</td></tr>
604
- <tr><td><a href="#cap-cc"><code>lpeg.Cc(values)</code></a></td>
605
- <td>the given values (matches the empty string)</td></tr>
606
- <tr><td><a href="#cap-f"><code>lpeg.Cf(patt, func)</code></a></td>
607
- <td>a <em>folding</em> of the captures from <code>patt</code></td></tr>
608
- <tr><td><a href="#cap-g"><code>lpeg.Cg(patt [, name])</code></a></td>
609
- <td>the values produced by <code>patt</code>,
610
- optionally tagged with <code>name</code></td></tr>
611
- <tr><td><a href="#cap-p"><code>lpeg.Cp()</code></a></td>
612
- <td>the current position (matches the empty string)</td></tr>
613
- <tr><td><a href="#cap-s"><code>lpeg.Cs(patt)</code></a></td>
614
- <td>the match for <code>patt</code>
615
- with the values from nested captures replacing their matches</td></tr>
616
- <tr><td><a href="#cap-t"><code>lpeg.Ct(patt)</code></a></td>
617
- <td>a table with all captures from <code>patt</code></td></tr>
618
- <tr><td><a href="#cap-string"><code>patt / string</code></a></td>
619
- <td><code>string</code>, with some marks replaced by captures
620
- of <code>patt</code></td></tr>
621
- <tr><td><a href="#cap-num"><code>patt / number</code></a></td>
622
- <td>the n-th value captured by <code>patt</code>,
623
- or no value when <code>number</code> is zero.</td></tr>
624
- <tr><td><a href="#cap-query"><code>patt / table</code></a></td>
625
- <td><code>table[c]</code>, where <code>c</code> is the (first)
626
- capture of <code>patt</code></td></tr>
627
- <tr><td><a href="#cap-func"><code>patt / function</code></a></td>
628
- <td>the returns of <code>function</code> applied to the captures
629
- of <code>patt</code></td></tr>
630
- <tr><td><a href="#matchtime"><code>lpeg.Cmt(patt, function)</code></a></td>
631
- <td>the returns of <code>function</code> applied to the captures
632
- of <code>patt</code>; the application is done at match time</td></tr>
633
- </tbody></table>
634
-
635
- <p>
636
- A capture pattern produces its values only when it succeeds.
637
- For instance,
638
- the pattern <code>lpeg.C(lpeg.P"a"^-1)</code>
639
- produces the empty string when there is no <code>"a"</code>
640
- (because the pattern <code>"a"?</code> succeeds),
641
- while the pattern <code>lpeg.C("a")^-1</code>
642
- does not produce any value when there is no <code>"a"</code>
643
- (because the pattern <code>"a"</code> fails).
644
- A pattern inside a loop or inside a recursive structure
645
- produces values for each match.
646
- </p>
647
-
648
- <p>
649
- Usually,
650
- LPeg does not specify when (and if) it evaluates its captures.
651
- (As an example,
652
- consider the pattern <code>lpeg.P"a" / func / 0</code>.
653
- Because the "division" by 0 instructs LPeg to throw away the
654
- results from the pattern,
655
- LPeg may or may not call <code>func</code>.)
656
- Therefore, captures should avoid side effects.
657
- Moreover,
658
- most captures cannot affect the way a pattern matches a subject.
659
- The only exception to this rule is the
660
- so-called <a href="#matchtime"><em>match-time capture</em></a>.
661
- When a match-time capture matches,
662
- it forces the immediate evaluation of all its nested captures
663
- and then calls its corresponding function,
664
- which defines whether the match succeeds and also
665
- what values are produced.
666
- </p>
667
-
668
- <h3><a name="cap-c"></a><code>lpeg.C (patt)</code></h3>
669
- <p>
670
- Creates a <em>simple capture</em>,
671
- which captures the substring of the subject that matches <code>patt</code>.
672
- The captured value is a string.
673
- If <code>patt</code> has other captures,
674
- their values are returned after this one.
675
- </p>
676
-
677
-
678
- <h3><a name="cap-arg"></a><code>lpeg.Carg (n)</code></h3>
679
- <p>
680
- Creates an <em>argument capture</em>.
681
- This pattern matches the empty string and
682
- produces the value given as the n<sup>th</sup> extra
683
- argument given in the call to <code>lpeg.match</code>.
684
- </p>
685
-
686
-
687
- <h3><a name="cap-b"></a><code>lpeg.Cb (name)</code></h3>
688
- <p>
689
- Creates a <em>back capture</em>.
690
- This pattern matches the empty string and
691
- produces the values produced by the <em>most recent</em>
692
- <a href="#cap-g">group capture</a> named <code>name</code>
693
- (where <code>name</code> can be any Lua value).
694
- </p>
695
-
696
- <p>
697
- <em>Most recent</em> means the last
698
- <em>complete</em>
699
- <em>outermost</em>
700
- group capture with the given name.
701
- A <em>Complete</em> capture means that the entire pattern
702
- corresponding to the capture has matched.
703
- An <em>Outermost</em> capture means that the capture is not inside
704
- another complete capture.
705
- </p>
706
-
707
- <p>
708
- In the same way that LPeg does not specify when it evaluates captures,
709
- it does not specify whether it reuses
710
- values previously produced by the group
711
- or re-evaluates them.
712
- </p>
713
-
714
- <h3><a name="cap-cc"></a><code>lpeg.Cc ([value, ...])</code></h3>
715
- <p>
716
- Creates a <em>constant capture</em>.
717
- This pattern matches the empty string and
718
- produces all given values as its captured values.
719
- </p>
720
-
721
-
722
- <h3><a name="cap-f"></a><code>lpeg.Cf (patt, func)</code></h3>
723
- <p>
724
- Creates a <em>fold capture</em>.
725
- If <code>patt</code> produces a list of captures
726
- <em>C<sub>1</sub> C<sub>2</sub> ... C<sub>n</sub></em>,
727
- this capture will produce the value
728
- <em>func(...func(func(C<sub>1</sub>, C<sub>2</sub>), C<sub>3</sub>)...,
729
- C<sub>n</sub>)</em>,
730
- that is, it will <em>fold</em>
731
- (or <em>accumulate</em>, or <em>reduce</em>)
732
- the captures from <code>patt</code> using function <code>func</code>.
733
- </p>
734
-
735
- <p>
736
- This capture assumes that <code>patt</code> should produce
737
- at least one capture with at least one value (of any type),
738
- which becomes the initial value of an <em>accumulator</em>.
739
- (If you need a specific initial value,
740
- you may prefix a <a href="#cap-cc">constant capture</a> to <code>patt</code>.)
741
- For each subsequent capture,
742
- LPeg calls <code>func</code>
743
- with this accumulator as the first argument and all values produced
744
- by the capture as extra arguments;
745
- the first result from this call
746
- becomes the new value for the accumulator.
747
- The final value of the accumulator becomes the captured value.
748
- </p>
749
-
750
- <p>
751
- As an example,
752
- the following pattern matches a list of numbers separated
753
- by commas and returns their addition:
754
- </p>
755
- <pre class="example">
756
- -- matches a numeral and captures its numerical value
757
- number = lpeg.R"09"^1 / tonumber
758
-
759
- -- matches a list of numbers, capturing their values
760
- list = number * ("," * number)^0
761
-
762
- -- auxiliary function to add two numbers
763
- function add (acc, newvalue) return acc + newvalue end
764
-
765
- -- folds the list of numbers adding them
766
- sum = lpeg.Cf(list, add)
767
-
768
- -- example of use
769
- print(sum:match("10,30,43")) --&gt; 83
770
- </pre>
771
-
772
-
773
- <h3><a name="cap-g"></a><code>lpeg.Cg (patt [, name])</code></h3>
774
- <p>
775
- Creates a <em>group capture</em>.
776
- It groups all values returned by <code>patt</code>
777
- into a single capture.
778
- The group may be anonymous (if no name is given)
779
- or named with the given name
780
- (which can be any non-nil Lua value).
781
- </p>
782
-
783
- <p>
784
- An anonymous group serves to join values from several captures into
785
- a single capture.
786
- A named group has a different behavior.
787
- In most situations, a named group returns no values at all.
788
- Its values are only relevant for a following
789
- <a href="#cap-b">back capture</a> or when used
790
- inside a <a href="#cap-t">table capture</a>.
791
- </p>
792
-
793
-
794
- <h3><a name="cap-p"></a><code>lpeg.Cp ()</code></h3>
795
- <p>
796
- Creates a <em>position capture</em>.
797
- It matches the empty string and
798
- captures the position in the subject where the match occurs.
799
- The captured value is a number.
800
- </p>
801
-
802
-
803
- <h3><a name="cap-s"></a><code>lpeg.Cs (patt)</code></h3>
804
- <p>
805
- Creates a <em>substitution capture</em>,
806
- which captures the substring of the subject that matches <code>patt</code>,
807
- with <em>substitutions</em>.
808
- For any capture inside <code>patt</code> with a value,
809
- the substring that matched the capture is replaced by the capture value
810
- (which should be a string).
811
- The final captured value is the string resulting from
812
- all replacements.
813
- </p>
814
-
815
-
816
- <h3><a name="cap-t"></a><code>lpeg.Ct (patt)</code></h3>
817
- <p>
818
- Creates a <em>table capture</em>.
819
- This capture returns a table with all values from all anonymous captures
820
- made by <code>patt</code> inside this table in successive integer keys,
821
- starting at 1.
822
- Moreover,
823
- for each named capture group created by <code>patt</code>,
824
- the first value of the group is put into the table
825
- with the group name as its key.
826
- The captured value is only the table.
827
- </p>
828
-
829
-
830
- <h3><a name="cap-string"></a><code>patt / string</code></h3>
831
- <p>
832
- Creates a <em>string capture</em>.
833
- It creates a capture string based on <code>string</code>.
834
- The captured value is a copy of <code>string</code>,
835
- except that the character <code>%</code> works as an escape character:
836
- any sequence in <code>string</code> of the form <code>%<em>n</em></code>,
837
- with <em>n</em> between 1 and 9,
838
- stands for the match of the <em>n</em>-th capture in <code>patt</code>.
839
- The sequence <code>%0</code> stands for the whole match.
840
- The sequence <code>%%</code> stands for a single&nbsp;<code>%</code>.
841
- </p>
842
-
843
-
844
- <h3><a name="cap-num"></a><code>patt / number</code></h3>
845
- <p>
846
- Creates a <em>numbered capture</em>.
847
- For a non-zero number,
848
- the captured value is the n-th value
849
- captured by <code>patt</code>.
850
- When <code>number</code> is zero,
851
- there are no captured values.
852
- </p>
853
-
854
-
855
- <h3><a name="cap-query"></a><code>patt / table</code></h3>
856
- <p>
857
- Creates a <em>query capture</em>.
858
- It indexes the given table using as key the first value captured by
859
- <code>patt</code>,
860
- or the whole match if <code>patt</code> produced no value.
861
- The value at that index is the final value of the capture.
862
- If the table does not have that key,
863
- there is no captured value.
864
- </p>
865
-
866
-
867
- <h3><a name="cap-func"></a><code>patt / function</code></h3>
868
- <p>
869
- Creates a <em>function capture</em>.
870
- It calls the given function passing all captures made by
871
- <code>patt</code> as arguments,
872
- or the whole match if <code>patt</code> made no capture.
873
- The values returned by the function
874
- are the final values of the capture.
875
- In particular,
876
- if <code>function</code> returns no value,
877
- there is no captured value.
878
- </p>
879
-
880
-
881
- <h3><a name="matchtime"></a><code>lpeg.Cmt(patt, function)</code></h3>
882
- <p>
883
- Creates a <em>match-time capture</em>.
884
- Unlike all other captures,
885
- this one is evaluated immediately when a match occurs
886
- (even if it is part of a larger pattern that fails later).
887
- It forces the immediate evaluation of all its nested captures
888
- and then calls <code>function</code>.
889
- </p>
890
-
891
- <p>
892
- The given function gets as arguments the entire subject,
893
- the current position (after the match of <code>patt</code>),
894
- plus any capture values produced by <code>patt</code>.
895
- </p>
896
-
897
- <p>
898
- The first value returned by <code>function</code>
899
- defines how the match happens.
900
- If the call returns a number,
901
- the match succeeds
902
- and the returned number becomes the new current position.
903
- (Assuming a subject <em>s</em> and current position <em>i</em>,
904
- the returned number must be in the range <em>[i, len(s) + 1]</em>.)
905
- If the call returns <b>true</b>,
906
- the match succeeds without consuming any input.
907
- (So, to return <b>true</b> is equivalent to return <em>i</em>.)
908
- If the call returns <b>false</b>, <b>nil</b>, or no value,
909
- the match fails.
910
- </p>
911
-
912
- <p>
913
- Any extra values returned by the function become the
914
- values produced by the capture.
915
- </p>
916
-
917
-
918
-
919
-
920
- <h2><a name="ex">Some Examples</a></h2>
921
-
922
- <h3>Using a Pattern</h3>
923
- <p>
924
- This example shows a very simple but complete program
925
- that builds and uses a pattern:
926
- </p>
927
- <pre class="example">
928
- local lpeg = require "lpeg"
929
-
930
- -- matches a word followed by end-of-string
931
- p = lpeg.R"az"^1 * -1
932
-
933
- print(p:match("hello")) --> 6
934
- print(lpeg.match(p, "hello")) --> 6
935
- print(p:match("1 hello")) --> nil
936
- </pre>
937
- <p>
938
- The pattern is simply a sequence of one or more lower-case letters
939
- followed by the end of string (-1).
940
- The program calls <code>match</code> both as a method
941
- and as a function.
942
- In both sucessful cases,
943
- the match returns
944
- the index of the first character after the match,
945
- which is the string length plus one.
946
- </p>
947
-
948
-
949
- <h3>Name-value lists</h3>
950
- <p>
951
- This example parses a list of name-value pairs and returns a table
952
- with those pairs:
953
- </p>
954
- <pre class="example">
955
- lpeg.locale(lpeg) -- adds locale entries into 'lpeg' table
956
-
957
- local space = lpeg.space^0
958
- local name = lpeg.C(lpeg.alpha^1) * space
959
- local sep = lpeg.S(",;") * space
960
- local pair = lpeg.Cg(name * "=" * space * name) * sep^-1
961
- local list = lpeg.Cf(lpeg.Ct("") * pair^0, rawset)
962
- t = list:match("a=b, c = hi; next = pi") --> { a = "b", c = "hi", next = "pi" }
963
- </pre>
964
- <p>
965
- Each pair has the format <code>name = name</code> followed by
966
- an optional separator (a comma or a semicolon).
967
- The <code>pair</code> pattern encloses the pair in a group pattern,
968
- so that the names become the values of a single capture.
969
- The <code>list</code> pattern then folds these captures.
970
- It starts with an empty table,
971
- created by a table capture matching an empty string;
972
- then for each capture (a pair of names) it applies <code>rawset</code>
973
- over the accumulator (the table) and the capture values (the pair of names).
974
- <code>rawset</code> returns the table itself,
975
- so the accumulator is always the table.
976
- </p>
977
-
978
- <h3>Splitting a string</h3>
979
- <p>
980
- The following code builds a pattern that
981
- splits a string using a given pattern
982
- <code>sep</code> as a separator:
983
- </p>
984
- <pre class="example">
985
- function split (s, sep)
986
- sep = lpeg.P(sep)
987
- local elem = lpeg.C((1 - sep)^0)
988
- local p = elem * (sep * elem)^0
989
- return lpeg.match(p, s)
990
- end
991
- </pre>
992
- <p>
993
- First the function ensures that <code>sep</code> is a proper pattern.
994
- The pattern <code>elem</code> is a repetition of zero of more
995
- arbitrary characters as long as there is not a match against
996
- the separator.
997
- It also captures its match.
998
- The pattern <code>p</code> matches a list of elements separated
999
- by <code>sep</code>.
1000
- </p>
1001
-
1002
- <p>
1003
- If the split results in too many values,
1004
- it may overflow the maximum number of values
1005
- that can be returned by a Lua function.
1006
- In this case,
1007
- we can collect these values in a table:
1008
- </p>
1009
- <pre class="example">
1010
- function split (s, sep)
1011
- sep = lpeg.P(sep)
1012
- local elem = lpeg.C((1 - sep)^0)
1013
- local p = lpeg.Ct(elem * (sep * elem)^0) -- make a table capture
1014
- return lpeg.match(p, s)
1015
- end
1016
- </pre>
1017
-
1018
-
1019
- <h3>Searching for a pattern</h3>
1020
- <p>
1021
- The primitive <code>match</code> works only in anchored mode.
1022
- If we want to find a pattern anywhere in a string,
1023
- we must write a pattern that matches anywhere.
1024
- </p>
1025
-
1026
- <p>
1027
- Because patterns are composable,
1028
- we can write a function that,
1029
- given any arbitrary pattern <code>p</code>,
1030
- returns a new pattern that searches for <code>p</code>
1031
- anywhere in a string.
1032
- There are several ways to do the search.
1033
- One way is like this:
1034
- </p>
1035
- <pre class="example">
1036
- function anywhere (p)
1037
- return lpeg.P{ p + 1 * lpeg.V(1) }
1038
- end
1039
- </pre>
1040
- <p>
1041
- This grammar has a straight reading:
1042
- it matches <code>p</code> or skips one character and tries again.
1043
- </p>
1044
-
1045
- <p>
1046
- If we want to know where the pattern is in the string
1047
- (instead of knowing only that it is there somewhere),
1048
- we can add position captures to the pattern:
1049
- </p>
1050
- <pre class="example">
1051
- local I = lpeg.Cp()
1052
- function anywhere (p)
1053
- return lpeg.P{ I * p * I + 1 * lpeg.V(1) }
1054
- end
1055
-
1056
- print(anywhere("world"):match("hello world!")) -> 7 12
1057
- </pre>
1058
-
1059
- <p>
1060
- Another option for the search is like this:
1061
- </p>
1062
- <pre class="example">
1063
- local I = lpeg.Cp()
1064
- function anywhere (p)
1065
- return (1 - lpeg.P(p))^0 * I * p * I
1066
- end
1067
- </pre>
1068
- <p>
1069
- Again the pattern has a straight reading:
1070
- it skips as many characters as possible while not matching <code>p</code>,
1071
- and then matches <code>p</code> (plus appropriate captures).
1072
- </p>
1073
-
1074
- <p>
1075
- If we want to look for a pattern only at word boundaries,
1076
- we can use the following transformer:
1077
- </p>
1078
-
1079
- <pre class="example">
1080
- local t = lpeg.locale()
1081
-
1082
- function atwordboundary (p)
1083
- return lpeg.P{
1084
- [1] = p + t.alpha^0 * (1 - t.alpha)^1 * lpeg.V(1)
1085
- }
1086
- end
1087
- </pre>
1088
-
1089
-
1090
- <h3><a name="balanced"></a>Balanced parentheses</h3>
1091
- <p>
1092
- The following pattern matches only strings with balanced parentheses:
1093
- </p>
1094
- <pre class="example">
1095
- b = lpeg.P{ "(" * ((1 - lpeg.S"()") + lpeg.V(1))^0 * ")" }
1096
- </pre>
1097
- <p>
1098
- Reading the first (and only) rule of the given grammar,
1099
- we have that a balanced string is
1100
- an open parenthesis,
1101
- followed by zero or more repetitions of either
1102
- a non-parenthesis character or
1103
- a balanced string (<code>lpeg.V(1)</code>),
1104
- followed by a closing parenthesis.
1105
- </p>
1106
-
1107
-
1108
- <h3>Global substitution</h3>
1109
- <p>
1110
- The next example does a job somewhat similar to <code>string.gsub</code>.
1111
- It receives a pattern and a replacement value,
1112
- and substitutes the replacement value for all occurrences of the pattern
1113
- in a given string:
1114
- </p>
1115
- <pre class="example">
1116
- function gsub (s, patt, repl)
1117
- patt = lpeg.P(patt)
1118
- patt = lpeg.Cs((patt / repl + 1)^0)
1119
- return lpeg.match(patt, s)
1120
- end
1121
- </pre>
1122
- <p>
1123
- As in <code>string.gsub</code>,
1124
- the replacement value can be a string,
1125
- a function, or a table.
1126
- </p>
1127
-
1128
-
1129
- <h3><a name="CSV"></a>Comma-Separated Values (CSV)</h3>
1130
- <p>
1131
- This example breaks a string into comma-separated values,
1132
- returning all fields:
1133
- </p>
1134
- <pre class="example">
1135
- local field = '"' * lpeg.Cs(((lpeg.P(1) - '"') + lpeg.P'""' / '"')^0) * '"' +
1136
- lpeg.C((1 - lpeg.S',\n"')^0)
1137
-
1138
- local record = field * (',' * field)^0 * (lpeg.P'\n' + -1)
1139
-
1140
- function csv (s)
1141
- return lpeg.match(record, s)
1142
- end
1143
- </pre>
1144
- <p>
1145
- A field is either a quoted field
1146
- (which may contain any character except an individual quote,
1147
- which may be written as two quotes that are replaced by one)
1148
- or an unquoted field
1149
- (which cannot contain commas, newlines, or quotes).
1150
- A record is a list of fields separated by commas,
1151
- ending with a newline or the string end (-1).
1152
- </p>
1153
-
1154
- <p>
1155
- As it is,
1156
- the previous pattern returns each field as a separated result.
1157
- If we add a table capture in the definition of <code>record</code>,
1158
- the pattern will return instead a single table
1159
- containing all fields:
1160
- </p>
1161
- <pre>
1162
- local record = lpeg.Ct(field * (',' * field)^0) * (lpeg.P'\n' + -1)
1163
- </pre>
1164
-
1165
-
1166
- <h3>UTF-8 and Latin 1</h3>
1167
- <p>
1168
- It is not difficult to use LPeg to convert a string from
1169
- UTF-8 encoding to Latin 1 (ISO 8859-1):
1170
- </p>
1171
-
1172
- <pre class="example">
1173
- -- convert a two-byte UTF-8 sequence to a Latin 1 character
1174
- local function f2 (s)
1175
- local c1, c2 = string.byte(s, 1, 2)
1176
- return string.char(c1 * 64 + c2 - 12416)
1177
- end
1178
-
1179
- local utf8 = lpeg.R("\0\127")
1180
- + lpeg.R("\194\195") * lpeg.R("\128\191") / f2
1181
-
1182
- local decode_pattern = lpeg.Cs(utf8^0) * -1
1183
- </pre>
1184
- <p>
1185
- In this code,
1186
- the definition of UTF-8 is already restricted to the
1187
- Latin 1 range (from 0 to 255).
1188
- Any encoding outside this range (as well as any invalid encoding)
1189
- will not match that pattern.
1190
- </p>
1191
-
1192
- <p>
1193
- As the definition of <code>decode_pattern</code> demands that
1194
- the pattern matches the whole input (because of the -1 at its end),
1195
- any invalid string will simply fail to match,
1196
- without any useful information about the problem.
1197
- We can improve this situation redefining <code>decode_pattern</code>
1198
- as follows:
1199
- </p>
1200
- <pre class="example">
1201
- local function er (_, i) error("invalid encoding at position " .. i) end
1202
-
1203
- local decode_pattern = lpeg.Cs(utf8^0) * (-1 + lpeg.P(er))
1204
- </pre>
1205
- <p>
1206
- Now, if the pattern <code>utf8^0</code> stops
1207
- before the end of the string,
1208
- an appropriate error function is called.
1209
- </p>
1210
-
1211
-
1212
- <h3>UTF-8 and Unicode</h3>
1213
- <p>
1214
- We can extend the previous patterns to handle all Unicode code points.
1215
- Of course,
1216
- we cannot translate them to Latin 1 or any other one-byte encoding.
1217
- Instead, our translation results in a array with the code points
1218
- represented as numbers.
1219
- The full code is here:
1220
- </p>
1221
- <pre class="example">
1222
- -- decode a two-byte UTF-8 sequence
1223
- local function f2 (s)
1224
- local c1, c2 = string.byte(s, 1, 2)
1225
- return c1 * 64 + c2 - 12416
1226
- end
1227
-
1228
- -- decode a three-byte UTF-8 sequence
1229
- local function f3 (s)
1230
- local c1, c2, c3 = string.byte(s, 1, 3)
1231
- return (c1 * 64 + c2) * 64 + c3 - 925824
1232
- end
1233
-
1234
- -- decode a four-byte UTF-8 sequence
1235
- local function f4 (s)
1236
- local c1, c2, c3, c4 = string.byte(s, 1, 4)
1237
- return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168
1238
- end
1239
-
1240
- local cont = lpeg.R("\128\191") -- continuation byte
1241
-
1242
- local utf8 = lpeg.R("\0\127") / string.byte
1243
- + lpeg.R("\194\223") * cont / f2
1244
- + lpeg.R("\224\239") * cont * cont / f3
1245
- + lpeg.R("\240\244") * cont * cont * cont / f4
1246
-
1247
- local decode_pattern = lpeg.Ct(utf8^0) * -1
1248
- </pre>
1249
-
1250
-
1251
- <h3>Lua's long strings</h3>
1252
- <p>
1253
- A long string in Lua starts with the pattern <code>[=*[</code>
1254
- and ends at the first occurrence of <code>]=*]</code> with
1255
- exactly the same number of equal signs.
1256
- If the opening brackets are followed by a newline,
1257
- this newline is discarded
1258
- (that is, it is not part of the string).
1259
- </p>
1260
-
1261
- <p>
1262
- To match a long string in Lua,
1263
- the pattern must capture the first repetition of equal signs and then,
1264
- whenever it finds a candidate for closing the string,
1265
- check whether it has the same number of equal signs.
1266
- </p>
1267
-
1268
- <pre class="example">
1269
- equals = lpeg.P"="^0
1270
- open = "[" * lpeg.Cg(equals, "init") * "[" * lpeg.P"\n"^-1
1271
- close = "]" * lpeg.C(equals) * "]"
1272
- closeeq = lpeg.Cmt(close * lpeg.Cb("init"), function (s, i, a, b) return a == b end)
1273
- string = open * lpeg.C((lpeg.P(1) - closeeq)^0) * close / 1
1274
- </pre>
1275
-
1276
- <p>
1277
- The <code>open</code> pattern matches <code>[=*[</code>,
1278
- capturing the repetitions of equal signs in a group named <code>init</code>;
1279
- it also discharges an optional newline, if present.
1280
- The <code>close</code> pattern matches <code>]=*]</code>,
1281
- also capturing the repetitions of equal signs.
1282
- The <code>closeeq</code> pattern first matches <code>close</code>;
1283
- then it uses a back capture to recover the capture made
1284
- by the previous <code>open</code>,
1285
- which is named <code>init</code>;
1286
- finally it uses a match-time capture to check
1287
- whether both captures are equal.
1288
- The <code>string</code> pattern starts with an <code>open</code>,
1289
- then it goes as far as possible until matching <code>closeeq</code>,
1290
- and then matches the final <code>close</code>.
1291
- The final numbered capture simply discards
1292
- the capture made by <code>close</code>.
1293
- </p>
1294
-
1295
-
1296
- <h3>Arithmetic expressions</h3>
1297
- <p>
1298
- This example is a complete parser and evaluator for simple
1299
- arithmetic expressions.
1300
- We write it in two styles.
1301
- The first approach first builds a syntax tree and then
1302
- traverses this tree to compute the expression value:
1303
- </p>
1304
- <pre class="example">
1305
- -- Lexical Elements
1306
- local Space = lpeg.S(" \n\t")^0
1307
- local Number = lpeg.C(lpeg.P"-"^-1 * lpeg.R("09")^1) * Space
1308
- local TermOp = lpeg.C(lpeg.S("+-")) * Space
1309
- local FactorOp = lpeg.C(lpeg.S("*/")) * Space
1310
- local Open = "(" * Space
1311
- local Close = ")" * Space
1312
-
1313
- -- Grammar
1314
- local Exp, Term, Factor = lpeg.V"Exp", lpeg.V"Term", lpeg.V"Factor"
1315
- G = lpeg.P{ Exp,
1316
- Exp = lpeg.Ct(Term * (TermOp * Term)^0);
1317
- Term = lpeg.Ct(Factor * (FactorOp * Factor)^0);
1318
- Factor = Number + Open * Exp * Close;
1319
- }
1320
-
1321
- G = Space * G * -1
1322
-
1323
- -- Evaluator
1324
- function eval (x)
1325
- if type(x) == "string" then
1326
- return tonumber(x)
1327
- else
1328
- local op1 = eval(x[1])
1329
- for i = 2, #x, 2 do
1330
- local op = x[i]
1331
- local op2 = eval(x[i + 1])
1332
- if (op == "+") then op1 = op1 + op2
1333
- elseif (op == "-") then op1 = op1 - op2
1334
- elseif (op == "*") then op1 = op1 * op2
1335
- elseif (op == "/") then op1 = op1 / op2
1336
- end
1337
- end
1338
- return op1
1339
- end
1340
- end
1341
-
1342
- -- Parser/Evaluator
1343
- function evalExp (s)
1344
- local t = lpeg.match(G, s)
1345
- if not t then error("syntax error", 2) end
1346
- return eval(t)
1347
- end
1348
-
1349
- -- small example
1350
- print(evalExp"3 + 5*9 / (1+1) - 12") --> 13.5
1351
- </pre>
1352
-
1353
- <p>
1354
- The second style computes the expression value on the fly,
1355
- without building the syntax tree.
1356
- The following grammar takes this approach.
1357
- (It assumes the same lexical elements as before.)
1358
- </p>
1359
- <pre class="example">
1360
- -- Auxiliary function
1361
- function eval (v1, op, v2)
1362
- if (op == "+") then return v1 + v2
1363
- elseif (op == "-") then return v1 - v2
1364
- elseif (op == "*") then return v1 * v2
1365
- elseif (op == "/") then return v1 / v2
1366
- end
1367
- end
1368
-
1369
- -- Grammar
1370
- local V = lpeg.V
1371
- G = lpeg.P{ "Exp",
1372
- Exp = lpeg.Cf(V"Term" * lpeg.Cg(TermOp * V"Term")^0, eval);
1373
- Term = lpeg.Cf(V"Factor" * lpeg.Cg(FactorOp * V"Factor")^0, eval);
1374
- Factor = Number / tonumber + Open * V"Exp" * Close;
1375
- }
1376
-
1377
- -- small example
1378
- print(lpeg.match(G, "3 + 5*9 / (1+1) - 12")) --> 13.5
1379
- </pre>
1380
- <p>
1381
- Note the use of the fold (accumulator) capture.
1382
- To compute the value of an expression,
1383
- the accumulator starts with the value of the first term,
1384
- and then applies <code>eval</code> over
1385
- the accumulator, the operator,
1386
- and the new term for each repetition.
1387
- </p>
1388
-
1389
-
1390
-
1391
- <h2><a name="download"></a>Download</h2>
1392
-
1393
- <p>LPeg
1394
- <a href="http://www.inf.puc-rio.br/~roberto/lpeg/lpeg-1.0.1.tar.gz">source code</a>.</p>
1395
-
1396
-
1397
- <h2><a name="license">License</a></h2>
1398
-
1399
- <p>
1400
- Copyright &copy; 2007-2017 Lua.org, PUC-Rio.
1401
- </p>
1402
- <p>
1403
- Permission is hereby granted, free of charge,
1404
- to any person obtaining a copy of this software and
1405
- associated documentation files (the "Software"),
1406
- to deal in the Software without restriction,
1407
- including without limitation the rights to use,
1408
- copy, modify, merge, publish, distribute, sublicense,
1409
- and/or sell copies of the Software,
1410
- and to permit persons to whom the Software is
1411
- furnished to do so,
1412
- subject to the following conditions:
1413
- </p>
1414
-
1415
- <p>
1416
- The above copyright notice and this permission notice
1417
- shall be included in all copies or substantial portions of the Software.
1418
- </p>
1419
-
1420
- <p>
1421
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1422
- EXPRESS OR IMPLIED,
1423
- INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1424
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
1425
- IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
1426
- DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
1427
- TORT OR OTHERWISE, ARISING FROM,
1428
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
1429
- THE SOFTWARE.
1430
- </p>
1431
-
1432
- </div> <!-- id="content" -->
1433
-
1434
- </div> <!-- id="main" -->
1435
-
1436
- <div id="about">
1437
- <p><small>
1438
- $Id: lpeg.html,v 1.77 2017/01/13 13:40:05 roberto Exp $
1439
- </small></p>
1440
- </div> <!-- id="about" -->
1441
-
1442
- </div> <!-- id="container" -->
1443
-
1444
- </body>
1445
- </html>