immunio 0.15.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +234 -0
- data/README.md +147 -0
- data/bin/immunio +5 -0
- data/lib/immunio.rb +29 -0
- data/lib/immunio/agent.rb +260 -0
- data/lib/immunio/authentication.rb +96 -0
- data/lib/immunio/blocked_app.rb +38 -0
- data/lib/immunio/channel.rb +432 -0
- data/lib/immunio/cli.rb +39 -0
- data/lib/immunio/context.rb +114 -0
- data/lib/immunio/errors.rb +43 -0
- data/lib/immunio/immunio_ca.crt +45 -0
- data/lib/immunio/logger.rb +87 -0
- data/lib/immunio/plugins/action_dispatch.rb +45 -0
- data/lib/immunio/plugins/action_view.rb +431 -0
- data/lib/immunio/plugins/active_record.rb +707 -0
- data/lib/immunio/plugins/active_record_relation.rb +370 -0
- data/lib/immunio/plugins/authlogic.rb +80 -0
- data/lib/immunio/plugins/csrf.rb +24 -0
- data/lib/immunio/plugins/devise.rb +40 -0
- data/lib/immunio/plugins/environment_reporter.rb +69 -0
- data/lib/immunio/plugins/eval.rb +51 -0
- data/lib/immunio/plugins/exception_handler.rb +55 -0
- data/lib/immunio/plugins/gems_tracker.rb +5 -0
- data/lib/immunio/plugins/haml.rb +36 -0
- data/lib/immunio/plugins/http_finisher.rb +50 -0
- data/lib/immunio/plugins/http_tracker.rb +203 -0
- data/lib/immunio/plugins/io.rb +96 -0
- data/lib/immunio/plugins/redirect.rb +42 -0
- data/lib/immunio/plugins/warden.rb +66 -0
- data/lib/immunio/processor.rb +234 -0
- data/lib/immunio/rails.rb +26 -0
- data/lib/immunio/request.rb +139 -0
- data/lib/immunio/rufus_lua_ext/ref.rb +27 -0
- data/lib/immunio/rufus_lua_ext/state.rb +157 -0
- data/lib/immunio/rufus_lua_ext/table.rb +137 -0
- data/lib/immunio/rufus_lua_ext/utils.rb +13 -0
- data/lib/immunio/version.rb +5 -0
- data/lib/immunio/vm.rb +291 -0
- data/lua-hooks/ext/all.c +78 -0
- data/lua-hooks/ext/bitop/README +22 -0
- data/lua-hooks/ext/bitop/bit.c +189 -0
- data/lua-hooks/ext/extconf.rb +38 -0
- data/lua-hooks/ext/libinjection/COPYING +37 -0
- data/lua-hooks/ext/libinjection/libinjection.h +65 -0
- data/lua-hooks/ext/libinjection/libinjection_html5.c +847 -0
- data/lua-hooks/ext/libinjection/libinjection_html5.h +54 -0
- data/lua-hooks/ext/libinjection/libinjection_sqli.c +2301 -0
- data/lua-hooks/ext/libinjection/libinjection_sqli.h +295 -0
- data/lua-hooks/ext/libinjection/libinjection_sqli_data.h +9349 -0
- data/lua-hooks/ext/libinjection/libinjection_xss.c +531 -0
- data/lua-hooks/ext/libinjection/libinjection_xss.h +21 -0
- data/lua-hooks/ext/libinjection/lualib.c +109 -0
- data/lua-hooks/ext/lpeg/HISTORY +90 -0
- data/lua-hooks/ext/lpeg/lpcap.c +537 -0
- data/lua-hooks/ext/lpeg/lpcap.h +43 -0
- data/lua-hooks/ext/lpeg/lpcode.c +986 -0
- data/lua-hooks/ext/lpeg/lpcode.h +34 -0
- data/lua-hooks/ext/lpeg/lpeg-128.gif +0 -0
- data/lua-hooks/ext/lpeg/lpeg.html +1429 -0
- data/lua-hooks/ext/lpeg/lpprint.c +244 -0
- data/lua-hooks/ext/lpeg/lpprint.h +35 -0
- data/lua-hooks/ext/lpeg/lptree.c +1238 -0
- data/lua-hooks/ext/lpeg/lptree.h +77 -0
- data/lua-hooks/ext/lpeg/lptypes.h +149 -0
- data/lua-hooks/ext/lpeg/lpvm.c +355 -0
- data/lua-hooks/ext/lpeg/lpvm.h +58 -0
- data/lua-hooks/ext/lpeg/makefile +55 -0
- data/lua-hooks/ext/lpeg/re.html +498 -0
- data/lua-hooks/ext/lpeg/test.lua +1409 -0
- data/lua-hooks/ext/lua-cmsgpack/CMakeLists.txt +45 -0
- data/lua-hooks/ext/lua-cmsgpack/README.md +115 -0
- data/lua-hooks/ext/lua-cmsgpack/lua_cmsgpack.c +957 -0
- data/lua-hooks/ext/lua-cmsgpack/test.lua +570 -0
- data/lua-hooks/ext/lua-snapshot/LICENSE +7 -0
- data/lua-hooks/ext/lua-snapshot/Makefile +12 -0
- data/lua-hooks/ext/lua-snapshot/README.md +18 -0
- data/lua-hooks/ext/lua-snapshot/dump.lua +15 -0
- data/lua-hooks/ext/lua-snapshot/snapshot.c +455 -0
- data/lua-hooks/ext/lua/COPYRIGHT +34 -0
- data/lua-hooks/ext/lua/lapi.c +1087 -0
- data/lua-hooks/ext/lua/lapi.h +16 -0
- data/lua-hooks/ext/lua/lauxlib.c +652 -0
- data/lua-hooks/ext/lua/lauxlib.h +174 -0
- data/lua-hooks/ext/lua/lbaselib.c +659 -0
- data/lua-hooks/ext/lua/lcode.c +831 -0
- data/lua-hooks/ext/lua/lcode.h +76 -0
- data/lua-hooks/ext/lua/ldblib.c +398 -0
- data/lua-hooks/ext/lua/ldebug.c +638 -0
- data/lua-hooks/ext/lua/ldebug.h +33 -0
- data/lua-hooks/ext/lua/ldo.c +519 -0
- data/lua-hooks/ext/lua/ldo.h +57 -0
- data/lua-hooks/ext/lua/ldump.c +164 -0
- data/lua-hooks/ext/lua/lfunc.c +174 -0
- data/lua-hooks/ext/lua/lfunc.h +34 -0
- data/lua-hooks/ext/lua/lgc.c +710 -0
- data/lua-hooks/ext/lua/lgc.h +110 -0
- data/lua-hooks/ext/lua/linit.c +38 -0
- data/lua-hooks/ext/lua/liolib.c +556 -0
- data/lua-hooks/ext/lua/llex.c +463 -0
- data/lua-hooks/ext/lua/llex.h +81 -0
- data/lua-hooks/ext/lua/llimits.h +128 -0
- data/lua-hooks/ext/lua/lmathlib.c +263 -0
- data/lua-hooks/ext/lua/lmem.c +86 -0
- data/lua-hooks/ext/lua/lmem.h +49 -0
- data/lua-hooks/ext/lua/loadlib.c +705 -0
- data/lua-hooks/ext/lua/loadlib_rel.c +760 -0
- data/lua-hooks/ext/lua/lobject.c +214 -0
- data/lua-hooks/ext/lua/lobject.h +381 -0
- data/lua-hooks/ext/lua/lopcodes.c +102 -0
- data/lua-hooks/ext/lua/lopcodes.h +268 -0
- data/lua-hooks/ext/lua/loslib.c +243 -0
- data/lua-hooks/ext/lua/lparser.c +1339 -0
- data/lua-hooks/ext/lua/lparser.h +82 -0
- data/lua-hooks/ext/lua/lstate.c +214 -0
- data/lua-hooks/ext/lua/lstate.h +169 -0
- data/lua-hooks/ext/lua/lstring.c +111 -0
- data/lua-hooks/ext/lua/lstring.h +31 -0
- data/lua-hooks/ext/lua/lstrlib.c +871 -0
- data/lua-hooks/ext/lua/ltable.c +588 -0
- data/lua-hooks/ext/lua/ltable.h +40 -0
- data/lua-hooks/ext/lua/ltablib.c +287 -0
- data/lua-hooks/ext/lua/ltm.c +75 -0
- data/lua-hooks/ext/lua/ltm.h +54 -0
- data/lua-hooks/ext/lua/lua.c +392 -0
- data/lua-hooks/ext/lua/lua.def +131 -0
- data/lua-hooks/ext/lua/lua.h +388 -0
- data/lua-hooks/ext/lua/lua.rc +28 -0
- data/lua-hooks/ext/lua/lua_dll.rc +26 -0
- data/lua-hooks/ext/lua/luac.c +200 -0
- data/lua-hooks/ext/lua/luac.rc +1 -0
- data/lua-hooks/ext/lua/luaconf.h +763 -0
- data/lua-hooks/ext/lua/luaconf.h.in +724 -0
- data/lua-hooks/ext/lua/luaconf.h.orig +763 -0
- data/lua-hooks/ext/lua/lualib.h +53 -0
- data/lua-hooks/ext/lua/lundump.c +227 -0
- data/lua-hooks/ext/lua/lundump.h +36 -0
- data/lua-hooks/ext/lua/lvm.c +767 -0
- data/lua-hooks/ext/lua/lvm.h +36 -0
- data/lua-hooks/ext/lua/lzio.c +82 -0
- data/lua-hooks/ext/lua/lzio.h +67 -0
- data/lua-hooks/ext/lua/print.c +227 -0
- data/lua-hooks/ext/luautf8/README.md +152 -0
- data/lua-hooks/ext/luautf8/lutf8lib.c +1274 -0
- data/lua-hooks/ext/luautf8/unidata.h +3064 -0
- data/lua-hooks/lib/boot.lua +254 -0
- data/lua-hooks/lib/encode.lua +4 -0
- data/lua-hooks/lib/lexers/LICENSE +21 -0
- data/lua-hooks/lib/lexers/bash.lua +134 -0
- data/lua-hooks/lib/lexers/bash_dqstr.lua +62 -0
- data/lua-hooks/lib/lexers/css.lua +216 -0
- data/lua-hooks/lib/lexers/html.lua +106 -0
- data/lua-hooks/lib/lexers/javascript.lua +68 -0
- data/lua-hooks/lib/lexers/lexer.lua +1575 -0
- data/lua-hooks/lib/lexers/markers.lua +33 -0
- metadata +308 -0
@@ -0,0 +1,58 @@
|
|
1
|
+
/*
|
2
|
+
** $Id: lpvm.h,v 1.3 2014/02/21 13:06:41 roberto Exp $
|
3
|
+
*/
|
4
|
+
|
5
|
+
#if !defined(lpvm_h)
|
6
|
+
#define lpvm_h
|
7
|
+
|
8
|
+
#include "lpcap.h"
|
9
|
+
|
10
|
+
|
11
|
+
/* Virtual Machine's LpegInstructions */
|
12
|
+
typedef enum Opcode {
|
13
|
+
IAny, /* if no char, fail */
|
14
|
+
IChar, /* if char != aux, fail */
|
15
|
+
ISet, /* if char not in buff, fail */
|
16
|
+
ITestAny, /* in no char, jump to 'offset' */
|
17
|
+
ITestChar, /* if char != aux, jump to 'offset' */
|
18
|
+
ITestSet, /* if char not in buff, jump to 'offset' */
|
19
|
+
ISpan, /* read a span of chars in buff */
|
20
|
+
IBehind, /* walk back 'aux' characters (fail if not possible) */
|
21
|
+
IRet, /* return from a rule */
|
22
|
+
IEnd, /* end of pattern */
|
23
|
+
IChoice, /* stack a choice; next fail will jump to 'offset' */
|
24
|
+
IJmp, /* jump to 'offset' */
|
25
|
+
ICall, /* call rule at 'offset' */
|
26
|
+
IOpenCall, /* call rule number 'key' (must be closed to a ICall) */
|
27
|
+
ICommit, /* pop choice and jump to 'offset' */
|
28
|
+
IPartialCommit, /* update top choice to current position and jump */
|
29
|
+
IBackCommit, /* "fails" but jump to its own 'offset' */
|
30
|
+
IFailTwice, /* pop one choice and then fail */
|
31
|
+
IFail, /* go back to saved state on choice and jump to saved offset */
|
32
|
+
IGiveup, /* internal use */
|
33
|
+
IFullCapture, /* complete capture of last 'off' chars */
|
34
|
+
IOpenCapture, /* start a capture */
|
35
|
+
ICloseCapture,
|
36
|
+
ICloseRunTime
|
37
|
+
} Opcode;
|
38
|
+
|
39
|
+
|
40
|
+
|
41
|
+
typedef union LpegInstruction {
|
42
|
+
struct Inst {
|
43
|
+
byte code;
|
44
|
+
byte aux;
|
45
|
+
short key;
|
46
|
+
} i;
|
47
|
+
int offset;
|
48
|
+
byte buff[1];
|
49
|
+
} LpegInstruction;
|
50
|
+
|
51
|
+
|
52
|
+
void printpatt (LpegInstruction *p, int n);
|
53
|
+
const char *lpeg_match (lua_State *L, const char *o, const char *s, const char *e,
|
54
|
+
LpegInstruction *op, Capture *capture, int ptop);
|
55
|
+
|
56
|
+
|
57
|
+
#endif
|
58
|
+
|
@@ -0,0 +1,55 @@
|
|
1
|
+
LIBNAME = lpeg
|
2
|
+
LUADIR = ../lua/
|
3
|
+
|
4
|
+
COPT = -O2
|
5
|
+
# COPT = -DLPEG_DEBUG -g
|
6
|
+
|
7
|
+
CWARNS = -Wall -Wextra -pedantic \
|
8
|
+
-Waggregate-return \
|
9
|
+
-Wcast-align \
|
10
|
+
-Wcast-qual \
|
11
|
+
-Wdisabled-optimization \
|
12
|
+
-Wpointer-arith \
|
13
|
+
-Wshadow \
|
14
|
+
-Wsign-compare \
|
15
|
+
-Wundef \
|
16
|
+
-Wwrite-strings \
|
17
|
+
-Wbad-function-cast \
|
18
|
+
-Wdeclaration-after-statement \
|
19
|
+
-Wmissing-prototypes \
|
20
|
+
-Wnested-externs \
|
21
|
+
-Wstrict-prototypes \
|
22
|
+
# -Wunreachable-code \
|
23
|
+
|
24
|
+
|
25
|
+
CFLAGS = $(CWARNS) $(COPT) -std=c99 -I$(LUADIR) -fPIC
|
26
|
+
CC = gcc
|
27
|
+
|
28
|
+
FILES = lpvm.o lpcap.o lptree.o lpcode.o lpprint.o
|
29
|
+
|
30
|
+
# For Linux
|
31
|
+
linux:
|
32
|
+
make lpeg.so "DLLFLAGS = -shared -fPIC"
|
33
|
+
|
34
|
+
# For Mac OS
|
35
|
+
macosx:
|
36
|
+
make lpeg.so "DLLFLAGS = -bundle -undefined dynamic_lookup"
|
37
|
+
|
38
|
+
lpeg.so: $(FILES)
|
39
|
+
env $(CC) $(DLLFLAGS) $(FILES) -o lpeg.so
|
40
|
+
|
41
|
+
$(FILES): makefile
|
42
|
+
|
43
|
+
test: test.lua re.lua lpeg.so
|
44
|
+
./test.lua
|
45
|
+
|
46
|
+
clean:
|
47
|
+
rm -f $(FILES) lpeg.so
|
48
|
+
|
49
|
+
|
50
|
+
lpcap.o: lpcap.c lpcap.h lptypes.h
|
51
|
+
lpcode.o: lpcode.c lptypes.h lpcode.h lptree.h lpvm.h lpcap.h
|
52
|
+
lpprint.o: lpprint.c lptypes.h lpprint.h lptree.h lpvm.h lpcap.h
|
53
|
+
lptree.o: lptree.c lptypes.h lpcap.h lpcode.h lptree.h lpvm.h lpprint.h
|
54
|
+
lpvm.o: lpvm.c lpcap.h lptypes.h lpvm.h lpprint.h lptree.h
|
55
|
+
|
@@ -0,0 +1,498 @@
|
|
1
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
2
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
3
|
+
<html>
|
4
|
+
<head>
|
5
|
+
<title>LPeg.re - Regex syntax for LPEG</title>
|
6
|
+
<link rel="stylesheet"
|
7
|
+
href="http://www.inf.puc-rio.br/~roberto/lpeg/doc.css"
|
8
|
+
type="text/css"/>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
|
10
|
+
</head>
|
11
|
+
<body>
|
12
|
+
|
13
|
+
<!-- $Id: re.html,v 1.21 2013/03/28 20:43:30 roberto Exp $ -->
|
14
|
+
|
15
|
+
<div id="container">
|
16
|
+
|
17
|
+
<div id="product">
|
18
|
+
<div id="product_logo">
|
19
|
+
<a href="http://www.inf.puc-rio.br/~roberto/lpeg/">
|
20
|
+
<img alt="LPeg logo" src="lpeg-128.gif"/>
|
21
|
+
</a>
|
22
|
+
</div>
|
23
|
+
<div id="product_name"><big><strong>LPeg.re</strong></big></div>
|
24
|
+
<div id="product_description">
|
25
|
+
Regex syntax for LPEG
|
26
|
+
</div>
|
27
|
+
</div> <!-- id="product" -->
|
28
|
+
|
29
|
+
<div id="main">
|
30
|
+
|
31
|
+
<div id="navigation">
|
32
|
+
<h1>re</h1>
|
33
|
+
|
34
|
+
<ul>
|
35
|
+
<li><a href="#basic">Basic Constructions</a></li>
|
36
|
+
<li><a href="#func">Functions</a></li>
|
37
|
+
<li><a href="#ex">Some Examples</a></li>
|
38
|
+
<li><a href="#license">License</a></li>
|
39
|
+
</ul>
|
40
|
+
</li>
|
41
|
+
</ul>
|
42
|
+
</div> <!-- id="navigation" -->
|
43
|
+
|
44
|
+
<div id="content">
|
45
|
+
|
46
|
+
<h2><a name="basic"></a>The <code>re</code> Module</h2>
|
47
|
+
|
48
|
+
<p>
|
49
|
+
The <code>re</code> module
|
50
|
+
(provided by file <code>re.lua</code> in the distribution)
|
51
|
+
supports a somewhat conventional regex syntax
|
52
|
+
for pattern usage within <a href="lpeg.html">LPeg</a>.
|
53
|
+
</p>
|
54
|
+
|
55
|
+
<p>
|
56
|
+
The next table summarizes <code>re</code>'s syntax.
|
57
|
+
A <code>p</code> represents an arbitrary pattern;
|
58
|
+
<code>num</code> represents a number (<code>[0-9]+</code>);
|
59
|
+
<code>name</code> represents an identifier
|
60
|
+
(<code>[a-zA-Z][a-zA-Z0-9_]*</code>).
|
61
|
+
Constructions are listed in order of decreasing precedence.
|
62
|
+
<table border="1">
|
63
|
+
<tbody><tr><td><b>Syntax</b></td><td><b>Description</b></td></tr>
|
64
|
+
<tr><td><code>( p )</code></td> <td>grouping</td></tr>
|
65
|
+
<tr><td><code>'string'</code></td> <td>literal string</td></tr>
|
66
|
+
<tr><td><code>"string"</code></td> <td>literal string</td></tr>
|
67
|
+
<tr><td><code>[class]</code></td> <td>character class</td></tr>
|
68
|
+
<tr><td><code>.</code></td> <td>any character</td></tr>
|
69
|
+
<tr><td><code>%name</code></td>
|
70
|
+
<td>pattern <code>defs[name]</code> or a pre-defined pattern</td></tr>
|
71
|
+
<tr><td><code>name</code></td><td>non terminal</td></tr>
|
72
|
+
<tr><td><code><name></code></td><td>non terminal</td></tr>
|
73
|
+
<tr><td><code>{}</code></td> <td>position capture</td></tr>
|
74
|
+
<tr><td><code>{ p }</code></td> <td>simple capture</td></tr>
|
75
|
+
<tr><td><code>{: p :}</code></td> <td>anonymous group capture</td></tr>
|
76
|
+
<tr><td><code>{:name: p :}</code></td> <td>named group capture</td></tr>
|
77
|
+
<tr><td><code>{~ p ~}</code></td> <td>substitution capture</td></tr>
|
78
|
+
<tr><td><code>{| p |}</code></td> <td>table capture</td></tr>
|
79
|
+
<tr><td><code>=name</code></td> <td>back reference
|
80
|
+
</td></tr>
|
81
|
+
<tr><td><code>p ?</code></td> <td>optional match</td></tr>
|
82
|
+
<tr><td><code>p *</code></td> <td>zero or more repetitions</td></tr>
|
83
|
+
<tr><td><code>p +</code></td> <td>one or more repetitions</td></tr>
|
84
|
+
<tr><td><code>p^num</code></td> <td>exactly <code>n</code> repetitions</td></tr>
|
85
|
+
<tr><td><code>p^+num</code></td>
|
86
|
+
<td>at least <code>n</code> repetitions</td></tr>
|
87
|
+
<tr><td><code>p^-num</code></td>
|
88
|
+
<td>at most <code>n</code> repetitions</td></tr>
|
89
|
+
<tr><td><code>p -> 'string'</code></td> <td>string capture</td></tr>
|
90
|
+
<tr><td><code>p -> "string"</code></td> <td>string capture</td></tr>
|
91
|
+
<tr><td><code>p -> num</code></td> <td>numbered capture</td></tr>
|
92
|
+
<tr><td><code>p -> name</code></td> <td>function/query/string capture
|
93
|
+
equivalent to <code>p / defs[name]</code></td></tr>
|
94
|
+
<tr><td><code>p => name</code></td> <td>match-time capture
|
95
|
+
equivalent to <code>lpeg.Cmt(p, defs[name])</code></td></tr>
|
96
|
+
<tr><td><code>& p</code></td> <td>and predicate</td></tr>
|
97
|
+
<tr><td><code>! p</code></td> <td>not predicate</td></tr>
|
98
|
+
<tr><td><code>p1 p2</code></td> <td>concatenation</td></tr>
|
99
|
+
<tr><td><code>p1 / p2</code></td> <td>ordered choice</td></tr>
|
100
|
+
<tr><td>(<code>name <- p</code>)<sup>+</sup></td> <td>grammar</td></tr>
|
101
|
+
</tbody></table>
|
102
|
+
<p>
|
103
|
+
Any space appearing in a syntax description can be
|
104
|
+
replaced by zero or more space characters and Lua-style comments
|
105
|
+
(<code>--</code> until end of line).
|
106
|
+
</p>
|
107
|
+
|
108
|
+
<p>
|
109
|
+
Character classes define sets of characters.
|
110
|
+
An initial <code>^</code> complements the resulting set.
|
111
|
+
A range <em>x</em><code>-</code><em>y</em> includes in the set
|
112
|
+
all characters with codes between the codes of <em>x</em> and <em>y</em>.
|
113
|
+
A pre-defined class <code>%</code><em>name</em> includes all
|
114
|
+
characters of that class.
|
115
|
+
A simple character includes itself in the set.
|
116
|
+
The only special characters inside a class are <code>^</code>
|
117
|
+
(special only if it is the first character);
|
118
|
+
<code>]</code>
|
119
|
+
(can be included in the set as the first character,
|
120
|
+
after the optional <code>^</code>);
|
121
|
+
<code>%</code> (special only if followed by a letter);
|
122
|
+
and <code>-</code>
|
123
|
+
(can be included in the set as the first or the last character).
|
124
|
+
</p>
|
125
|
+
|
126
|
+
<p>
|
127
|
+
Currently the pre-defined classes are similar to those from the
|
128
|
+
Lua's string library
|
129
|
+
(<code>%a</code> for letters,
|
130
|
+
<code>%A</code> for non letters, etc.).
|
131
|
+
There is also a class <code>%nl</code>
|
132
|
+
containing only the newline character,
|
133
|
+
which is particularly handy for grammars written inside long strings,
|
134
|
+
as long strings do not interpret escape sequences like <code>\n</code>.
|
135
|
+
</p>
|
136
|
+
|
137
|
+
|
138
|
+
<h2><a name="func">Functions</a></h2>
|
139
|
+
|
140
|
+
<h3><code>re.compile (string, [, defs])</code></h3>
|
141
|
+
<p>
|
142
|
+
Compiles the given string and
|
143
|
+
returns an equivalent LPeg pattern.
|
144
|
+
The given string may define either an expression or a grammar.
|
145
|
+
The optional <code>defs</code> table provides extra Lua values
|
146
|
+
to be used by the pattern.
|
147
|
+
</p>
|
148
|
+
|
149
|
+
<h3><code>re.find (subject, pattern [, init])</code></h3>
|
150
|
+
<p>
|
151
|
+
Searches the given pattern in the given subject.
|
152
|
+
If it finds a match,
|
153
|
+
returns the index where this occurrence starts and
|
154
|
+
the index where it ends.
|
155
|
+
Otherwise, returns nil.
|
156
|
+
</p>
|
157
|
+
|
158
|
+
<p>
|
159
|
+
An optional numeric argument <code>init</code> makes the search
|
160
|
+
starts at that position in the subject string.
|
161
|
+
As usual in Lua libraries,
|
162
|
+
a negative value counts from the end.
|
163
|
+
</p>
|
164
|
+
|
165
|
+
<h3><code>re.gsub (subject, pattern, replacement)</code></h3>
|
166
|
+
<p>
|
167
|
+
Does a <em>global substitution</em>,
|
168
|
+
replacing all occurrences of <code>pattern</code>
|
169
|
+
in the given <code>subject</code> by <code>replacement</code>.
|
170
|
+
|
171
|
+
<h3><code>re.match (subject, pattern)</code></h3>
|
172
|
+
<p>
|
173
|
+
Matches the given pattern against the given subject,
|
174
|
+
returning all captures.
|
175
|
+
</p>
|
176
|
+
|
177
|
+
<h3><code>re.updatelocale ()</code></h3>
|
178
|
+
<p>
|
179
|
+
Updates the pre-defined character classes to the current locale.
|
180
|
+
</p>
|
181
|
+
|
182
|
+
|
183
|
+
<h2><a name="ex">Some Examples</a></h2>
|
184
|
+
|
185
|
+
<h3>A complete simple program</h3>
|
186
|
+
<p>
|
187
|
+
The next code shows a simple complete Lua program using
|
188
|
+
the <code>re</code> module:
|
189
|
+
</p>
|
190
|
+
<pre class="example">
|
191
|
+
local re = require"re"
|
192
|
+
|
193
|
+
-- find the position of the first numeral in a string
|
194
|
+
print(re.find("the number 423 is odd", "[0-9]+")) --> 12 14
|
195
|
+
|
196
|
+
-- returns all words in a string
|
197
|
+
print(re.match("the number 423 is odd", "({%a+} / .)*"))
|
198
|
+
--> the number is odd
|
199
|
+
|
200
|
+
-- returns the first numeral in a string
|
201
|
+
print(re.match("the number 423 is odd", "s <- {%d+} / . s"))
|
202
|
+
--> 423
|
203
|
+
|
204
|
+
print(re.gsub("hello World", "[aeiou]", "."))
|
205
|
+
--> h.ll. W.rld
|
206
|
+
</pre>
|
207
|
+
|
208
|
+
|
209
|
+
<h3>Balanced parentheses</h3>
|
210
|
+
<p>
|
211
|
+
The following call will produce the same pattern produced by the
|
212
|
+
Lua expression in the
|
213
|
+
<a href="lpeg.html#balanced">balanced parentheses</a> example:
|
214
|
+
</p>
|
215
|
+
<pre class="example">
|
216
|
+
b = re.compile[[ balanced <- "(" ([^()] / balanced)* ")" ]]
|
217
|
+
</pre>
|
218
|
+
|
219
|
+
<h3>String reversal</h3>
|
220
|
+
<p>
|
221
|
+
The next example reverses a string:
|
222
|
+
</p>
|
223
|
+
<pre class="example">
|
224
|
+
rev = re.compile[[ R <- (!.) -> '' / ({.} R) -> '%2%1']]
|
225
|
+
print(rev:match"0123456789") --> 9876543210
|
226
|
+
</pre>
|
227
|
+
|
228
|
+
<h3>CSV decoder</h3>
|
229
|
+
<p>
|
230
|
+
The next example replicates the <a href="lpeg.html#CSV">CSV decoder</a>:
|
231
|
+
</p>
|
232
|
+
<pre class="example">
|
233
|
+
record = re.compile[[
|
234
|
+
record <- {| field (',' field)* |} (%nl / !.)
|
235
|
+
field <- escaped / nonescaped
|
236
|
+
nonescaped <- { [^,"%nl]* }
|
237
|
+
escaped <- '"' {~ ([^"] / '""' -> '"')* ~} '"'
|
238
|
+
]]
|
239
|
+
</pre>
|
240
|
+
|
241
|
+
<h3>Lua's long strings</h3>
|
242
|
+
<p>
|
243
|
+
The next example matches Lua long strings:
|
244
|
+
</p>
|
245
|
+
<pre class="example">
|
246
|
+
c = re.compile([[
|
247
|
+
longstring <- ('[' {:eq: '='* :} '[' close)
|
248
|
+
close <- ']' =eq ']' / . close
|
249
|
+
]])
|
250
|
+
|
251
|
+
print(c:match'[==[]]===]]]]==]===[]') --> 17
|
252
|
+
</pre>
|
253
|
+
|
254
|
+
<h3>Abstract Syntax Trees</h3>
|
255
|
+
<p>
|
256
|
+
This example shows a simple way to build an
|
257
|
+
abstract syntax tree (AST) for a given grammar.
|
258
|
+
To keep our example simple,
|
259
|
+
let us consider the following grammar
|
260
|
+
for lists of names:
|
261
|
+
</p>
|
262
|
+
<pre class="example">
|
263
|
+
p = re.compile[[
|
264
|
+
listname <- (name s)*
|
265
|
+
name <- [a-z][a-z]*
|
266
|
+
s <- %s*
|
267
|
+
]]
|
268
|
+
</pre>
|
269
|
+
<p>
|
270
|
+
Now, we will add captures to build a corresponding AST.
|
271
|
+
As a first step, the pattern will build a table to
|
272
|
+
represent each non terminal;
|
273
|
+
terminals will be represented by their corresponding strings:
|
274
|
+
</p>
|
275
|
+
<pre class="example">
|
276
|
+
c = re.compile[[
|
277
|
+
listname <- {| (name s)* |}
|
278
|
+
name <- {| {[a-z][a-z]*} |}
|
279
|
+
s <- %s*
|
280
|
+
]]
|
281
|
+
</pre>
|
282
|
+
<p>
|
283
|
+
Now, a match against <code>"hi hello bye"</code>
|
284
|
+
results in the table
|
285
|
+
<code>{{"hi"}, {"hello"}, {"bye"}}</code>.
|
286
|
+
</p>
|
287
|
+
<p>
|
288
|
+
For such a simple grammar,
|
289
|
+
this AST is more than enough;
|
290
|
+
actually, the tables around each single name
|
291
|
+
are already overkilling.
|
292
|
+
More complex grammars,
|
293
|
+
however, may need some more structure.
|
294
|
+
Specifically,
|
295
|
+
it would be useful if each table had
|
296
|
+
a <code>tag</code> field telling what non terminal
|
297
|
+
that table represents.
|
298
|
+
We can add such a tag using
|
299
|
+
<a href="lpeg.html/#cap-g">named group captures</a>:
|
300
|
+
</p>
|
301
|
+
<pre class="example">
|
302
|
+
x = re.compile[[
|
303
|
+
listname <- {| {:tag: '' -> 'list':} (name s)* |}
|
304
|
+
name <- {| {:tag: '' -> 'id':} {[a-z][a-z]*} |}
|
305
|
+
s <- ' '*
|
306
|
+
]]
|
307
|
+
</pre>
|
308
|
+
<p>
|
309
|
+
With these group captures,
|
310
|
+
a match against <code>"hi hello bye"</code>
|
311
|
+
results in the following table:
|
312
|
+
</p>
|
313
|
+
<pre class="example">
|
314
|
+
{tag="list",
|
315
|
+
{tag="id", "hi"},
|
316
|
+
{tag="id", "hello"},
|
317
|
+
{tag="id", "bye"}
|
318
|
+
}
|
319
|
+
</pre>
|
320
|
+
|
321
|
+
|
322
|
+
<h3>Indented blocks</h3>
|
323
|
+
<p>
|
324
|
+
This example breaks indented blocks into tables,
|
325
|
+
respecting the indentation:
|
326
|
+
</p>
|
327
|
+
<pre class="example">
|
328
|
+
p = re.compile[[
|
329
|
+
block <- {| {:ident:' '*:} line
|
330
|
+
((=ident !' ' line) / &(=ident ' ') block)* |}
|
331
|
+
line <- {[^%nl]*} %nl
|
332
|
+
]]
|
333
|
+
</pre>
|
334
|
+
<p>
|
335
|
+
As an example,
|
336
|
+
consider the following text:
|
337
|
+
</p>
|
338
|
+
<pre class="example">
|
339
|
+
t = p:match[[
|
340
|
+
first line
|
341
|
+
subline 1
|
342
|
+
subline 2
|
343
|
+
second line
|
344
|
+
third line
|
345
|
+
subline 3.1
|
346
|
+
subline 3.1.1
|
347
|
+
subline 3.2
|
348
|
+
]]
|
349
|
+
</pre>
|
350
|
+
<p>
|
351
|
+
The resulting table <code>t</code> will be like this:
|
352
|
+
</p>
|
353
|
+
<pre class="example">
|
354
|
+
{'first line'; {'subline 1'; 'subline 2'; ident = ' '};
|
355
|
+
'second line';
|
356
|
+
'third line'; { 'subline 3.1'; {'subline 3.1.1'; ident = ' '};
|
357
|
+
'subline 3.2'; ident = ' '};
|
358
|
+
ident = ''}
|
359
|
+
</pre>
|
360
|
+
|
361
|
+
<h3>Macro expander</h3>
|
362
|
+
<p>
|
363
|
+
This example implements a simple macro expander.
|
364
|
+
Macros must be defined as part of the pattern,
|
365
|
+
following some simple rules:
|
366
|
+
</p>
|
367
|
+
<pre class="example">
|
368
|
+
p = re.compile[[
|
369
|
+
text <- {~ item* ~}
|
370
|
+
item <- macro / [^()] / '(' item* ')'
|
371
|
+
arg <- ' '* {~ (!',' item)* ~}
|
372
|
+
args <- '(' arg (',' arg)* ')'
|
373
|
+
-- now we define some macros
|
374
|
+
macro <- ('apply' args) -> '%1(%2)'
|
375
|
+
/ ('add' args) -> '%1 + %2'
|
376
|
+
/ ('mul' args) -> '%1 * %2'
|
377
|
+
]]
|
378
|
+
|
379
|
+
print(p:match"add(mul(a,b), apply(f,x))") --> a * b + f(x)
|
380
|
+
</pre>
|
381
|
+
<p>
|
382
|
+
A <code>text</code> is a sequence of items,
|
383
|
+
wherein we apply a substitution capture to expand any macros.
|
384
|
+
An <code>item</code> is either a macro,
|
385
|
+
any character different from parentheses,
|
386
|
+
or a parenthesized expression.
|
387
|
+
A macro argument (<code>arg</code>) is a sequence
|
388
|
+
of items different from a comma.
|
389
|
+
(Note that a comma may appear inside an item,
|
390
|
+
e.g., inside a parenthesized expression.)
|
391
|
+
Again we do a substitution capture to expand any macro
|
392
|
+
in the argument before expanding the outer macro.
|
393
|
+
<code>args</code> is a list of arguments separated by commas.
|
394
|
+
Finally we define the macros.
|
395
|
+
Each macro is a string substitution;
|
396
|
+
it replaces the macro name and its arguments by its corresponding string,
|
397
|
+
with each <code>%</code><em>n</em> replaced by the <em>n</em>-th argument.
|
398
|
+
</p>
|
399
|
+
|
400
|
+
<h3>Patterns</h3>
|
401
|
+
<p>
|
402
|
+
This example shows the complete syntax
|
403
|
+
of patterns accepted by <code>re</code>.
|
404
|
+
</p>
|
405
|
+
<pre class="example">
|
406
|
+
p = [=[
|
407
|
+
|
408
|
+
pattern <- exp !.
|
409
|
+
exp <- S (alternative / grammar)
|
410
|
+
|
411
|
+
alternative <- seq ('/' S seq)*
|
412
|
+
seq <- prefix*
|
413
|
+
prefix <- '&' S prefix / '!' S prefix / suffix
|
414
|
+
suffix <- primary S (([+*?]
|
415
|
+
/ '^' [+-]? num
|
416
|
+
/ '->' S (string / '{}' / name)
|
417
|
+
/ '=>' S name) S)*
|
418
|
+
|
419
|
+
primary <- '(' exp ')' / string / class / defined
|
420
|
+
/ '{:' (name ':')? exp ':}'
|
421
|
+
/ '=' name
|
422
|
+
/ '{}'
|
423
|
+
/ '{~' exp '~}'
|
424
|
+
/ '{' exp '}'
|
425
|
+
/ '.'
|
426
|
+
/ name S !arrow
|
427
|
+
/ '<' name '>' -- old-style non terminals
|
428
|
+
|
429
|
+
grammar <- definition+
|
430
|
+
definition <- name S arrow exp
|
431
|
+
|
432
|
+
class <- '[' '^'? item (!']' item)* ']'
|
433
|
+
item <- defined / range / .
|
434
|
+
range <- . '-' [^]]
|
435
|
+
|
436
|
+
S <- (%s / '--' [^%nl]*)* -- spaces and comments
|
437
|
+
name <- [A-Za-z][A-Za-z0-9_]*
|
438
|
+
arrow <- '<-'
|
439
|
+
num <- [0-9]+
|
440
|
+
string <- '"' [^"]* '"' / "'" [^']* "'"
|
441
|
+
defined <- '%' name
|
442
|
+
|
443
|
+
]=]
|
444
|
+
|
445
|
+
print(re.match(p, p)) -- a self description must match itself
|
446
|
+
</pre>
|
447
|
+
|
448
|
+
|
449
|
+
|
450
|
+
<h2><a name="license">License</a></h2>
|
451
|
+
|
452
|
+
<p>
|
453
|
+
Copyright © 2008-2010 Lua.org, PUC-Rio.
|
454
|
+
</p>
|
455
|
+
<p>
|
456
|
+
Permission is hereby granted, free of charge,
|
457
|
+
to any person obtaining a copy of this software and
|
458
|
+
associated documentation files (the "Software"),
|
459
|
+
to deal in the Software without restriction,
|
460
|
+
including without limitation the rights to use,
|
461
|
+
copy, modify, merge, publish, distribute, sublicense,
|
462
|
+
and/or sell copies of the Software,
|
463
|
+
and to permit persons to whom the Software is
|
464
|
+
furnished to do so,
|
465
|
+
subject to the following conditions:
|
466
|
+
</p>
|
467
|
+
|
468
|
+
<p>
|
469
|
+
The above copyright notice and this permission notice
|
470
|
+
shall be included in all copies or substantial portions of the Software.
|
471
|
+
</p>
|
472
|
+
|
473
|
+
<p>
|
474
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
475
|
+
EXPRESS OR IMPLIED,
|
476
|
+
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
477
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
478
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
479
|
+
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
480
|
+
TORT OR OTHERWISE, ARISING FROM,
|
481
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
482
|
+
THE SOFTWARE.
|
483
|
+
</p>
|
484
|
+
|
485
|
+
</div> <!-- id="content" -->
|
486
|
+
|
487
|
+
</div> <!-- id="main" -->
|
488
|
+
|
489
|
+
<div id="about">
|
490
|
+
<p><small>
|
491
|
+
$Id: re.html,v 1.21 2013/03/28 20:43:30 roberto Exp $
|
492
|
+
</small></p>
|
493
|
+
</div> <!-- id="about" -->
|
494
|
+
|
495
|
+
</div> <!-- id="container" -->
|
496
|
+
|
497
|
+
</body>
|
498
|
+
</html>
|