immunio 1.1.13 → 1.1.15

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,13 @@
1
1
  MODULES := lib/hooks/xss
2
2
 
3
- LUA_SRC += \
3
+
4
+ LUA_PROTECT_SRC += \
5
+ lib/hooks/file_io.lua \
6
+ lib/hooks/framework_redirect.lua \
7
+ lib/hooks/template_render_done.lua \
8
+ lib/hooks/sql_execute.lua \
9
+
10
+ LUA_BASE_SRC += \
4
11
  lib/hooks/authenticate.lua \
5
12
  lib/hooks/bad_cookie.lua \
6
13
  lib/hooks/custom_event.lua \
@@ -8,24 +15,22 @@ LUA_SRC += \
8
15
  lib/hooks/encode.lua \
9
16
  lib/hooks/eval.lua \
10
17
  lib/hooks/exception.lua \
11
- lib/hooks/file_io.lua \
12
18
  lib/hooks/framework_csrf_check.lua \
13
19
  lib/hooks/framework_login.lua \
14
20
  lib/hooks/framework_password_reset.lua \
15
21
  lib/hooks/framework_account_created.lua \
16
- lib/hooks/framework_redirect.lua \
17
22
  lib/hooks/framework_session.lua \
18
23
  lib/hooks/framework_user.lua \
19
24
  lib/hooks/framework_route.lua \
20
25
  lib/hooks/framework_bad_response_header.lua \
26
+ lib/hooks/framework_input_params.lua \
27
+ lib/hooks/get_telemetry_config.lua \
28
+ lib/hooks/headers/header_validation.lua \
29
+ lib/hooks/headers/useragent.lua \
21
30
  lib/hooks/http_request_finish.lua \
22
31
  lib/hooks/http_request_start.lua \
23
32
  lib/hooks/http_response_start.lua \
24
33
  lib/hooks/mongodb_execute.lua \
25
34
  lib/hooks/should_report.lua \
26
- lib/hooks/sql_execute.lua \
27
- lib/hooks/template_render_done.lua \
28
- lib/hooks/xss/escape.lua \
29
- lib/hooks/xss/escape_js.lua \
30
35
 
31
36
  include $(patsubst %, %/module.mk,$(MODULES))
@@ -1,4 +1,4 @@
1
- LUA_SRC += \
1
+ LUA_PROTECT_SRC += \
2
2
  lib/hooks/xss/escape.lua \
3
3
  lib/hooks/xss/escape_js.lua \
4
- lib/hooks/xss/html_const.lua
4
+ lib/hooks/xss/html_const.lua
@@ -1,5 +1,4 @@
1
- LUA_SRC += \
2
- lib/lexers/bash_dqstr.lua \
1
+ LUA_BASE_SRC += \
3
2
  lib/lexers/bash.lua \
4
3
  lib/lexers/css_attr.lua \
5
4
  lib/lexers/css.lua \
@@ -7,4 +6,4 @@ LUA_SRC += \
7
6
  lib/lexers/html_entities.lua \
8
7
  lib/lexers/html_entities_ws.lua \
9
8
  lib/lexers/javascript.lua \
10
- lib/lexers/markers.lua
9
+ lib/lexers/markers.lua
@@ -1,6 +1,6 @@
1
- MODULES := lib/hooks lib/lexers lib/schema
1
+ MODULES := lib/hooks lib/lexers
2
2
 
3
- LUA_SRC += \
3
+ LUA_BASE_SRC += \
4
4
  lib/base64.lua \
5
5
  lib/bit.lua \
6
6
  lib/cookie.lua \
@@ -9,12 +9,17 @@ LUA_SRC += \
9
9
  lib/date.lua \
10
10
  lib/defence.lua \
11
11
  lib/diag.lua \
12
+ lib/dkjson.lua \
12
13
  lib/extensions.lua \
13
14
  lib/globtopattern.lua \
15
+ lib/hkdf.lua \
14
16
  lib/hmac.lua \
15
17
  lib/hooks.lua \
16
18
  lib/idn.lua \
19
+ lib/immunio-schemas/immunio_schemas/schemas/request_schema.lua \
20
+ lib/immunio-schemas/immunio_schemas/schemas/validation.lua \
17
21
  lib/ip.lua \
22
+ lib/learn.lua \
18
23
  lib/lexgraph.lua \
19
24
  lib/lexer.lua \
20
25
  lib/lru.lua \
@@ -31,7 +36,6 @@ LUA_SRC += \
31
36
  lib/snap.lua \
32
37
  lib/term.lua \
33
38
  lib/tracking.lua \
34
- lib/useragent.lua \
35
39
  lib/utils.lua \
36
40
  lib/verb_tamper.lua
37
41
 
data/lua-hooks/options.mk CHANGED
@@ -13,6 +13,11 @@ else
13
13
  HOST_SYS= Windows
14
14
  HOST_MSYS= mingw
15
15
  endif
16
+ ifneq (,$(findstring MSYS,$(HOST_SYS)))
17
+ # MSYS is an alias for MINGW
18
+ HOST_SYS= Windows
19
+ HOST_MSYS= mingw
20
+ endif
16
21
  ifneq (,$(findstring CYGWIN,$(HOST_SYS)))
17
22
  HOST_SYS= Windows
18
23
  HOST_MSYS= cygwin
@@ -23,6 +28,7 @@ else
23
28
  endif
24
29
  endif
25
30
 
31
+ TARGET_SYS ?= $(HOST_SYS)
26
32
  CROSS =
27
33
  CC = $(CROSS)cc
28
34
  AR = $(CROSS)ar
@@ -47,13 +53,13 @@ endif
47
53
 
48
54
 
49
55
  XCFLAGS =
50
- CFLAGS = -DLUA_USE_APICHECK -DLUAJIT -Dlua_assert=assert -Wall -fPIC ${INCS} ${XCFLAGS}
56
+ CFLAGS = -DLUA_USE_APICHECK -DLUAJIT -Dlua_assert=assert -Wall -fPIC -fstack-protector ${INCS} ${XCFLAGS}
51
57
  CXXFLAGS = -std=c++11 ${CFLAGS}
52
- LDFLAGS =
58
+ LDFLAGS =
53
59
 
54
60
 
55
61
  LUAJIT_XCFLAGS = -fPIC
56
- ifeq (${HOST_SYS}, Darwin)
62
+ ifeq (${TARGET_SYS}, Darwin)
57
63
  # Disable the JIT on OS X
58
64
  LUAJIT_XCFLAGS += -DLUAJIT_ENABLE_GC64
59
65
  endif
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: immunio
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.13
4
+ version: 1.1.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Immunio
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-20 00:00:00.000000000 Z
11
+ date: 2017-05-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -58,14 +58,14 @@ dependencies:
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '0.5'
61
+ version: 1.1.0
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '0.5'
68
+ version: 1.1.0
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: faraday
71
71
  requirement: !ruby/object:Gem::Requirement
@@ -196,7 +196,6 @@ files:
196
196
  - lua-hooks/ext/lpeg/makefile
197
197
  - lua-hooks/ext/lpeg/module.mk
198
198
  - lua-hooks/ext/lpeg/re.html
199
- - lua-hooks/ext/lpeg/test.lua
200
199
  - lua-hooks/ext/lua-cmsgpack/.gitignore
201
200
  - lua-hooks/ext/lua-cmsgpack/CMakeLists.txt
202
201
  - lua-hooks/ext/lua-cmsgpack/README.md
@@ -443,7 +442,6 @@ files:
443
442
  - lua-hooks/lib/hooks/xss/module.mk
444
443
  - lua-hooks/lib/lexers/module.mk
445
444
  - lua-hooks/lib/module.mk
446
- - lua-hooks/lib/schema/module.mk
447
445
  - lua-hooks/options.mk
448
446
  homepage: http://immun.io/
449
447
  licenses:
@@ -466,8 +464,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
466
464
  version: '0'
467
465
  requirements: []
468
466
  rubyforge_project:
469
- rubygems_version: 2.6.10
467
+ rubygems_version: 2.6.11
470
468
  signing_key:
471
469
  specification_version: 4
472
470
  summary: Immunio Ruby agent
473
471
  test_files: []
472
+ has_rdoc:
@@ -1,1409 +0,0 @@
1
- #!/usr/bin/env lua5.1
2
-
3
- -- $Id: test.lua,v 1.105 2014/12/12 17:00:39 roberto Exp $
4
-
5
- -- require"strict" -- just to be pedantic
6
-
7
- local m = require"lpeg"
8
-
9
-
10
- -- for general use
11
- local a, b, c, d, e, f, g, p, t
12
-
13
-
14
- -- compatibility with Lua 5.2
15
- local unpack = rawget(table, "unpack") or unpack
16
- local loadstring = rawget(_G, "loadstring") or load
17
-
18
-
19
- -- most tests here do not need much stack space
20
- m.setmaxstack(5)
21
-
22
- local any = m.P(1)
23
- local space = m.S" \t\n"^0
24
-
25
- local function checkeq (x, y, p)
26
- if p then print(x,y) end
27
- if type(x) ~= "table" then assert(x == y)
28
- else
29
- for k,v in pairs(x) do checkeq(v, y[k], p) end
30
- for k,v in pairs(y) do checkeq(v, x[k], p) end
31
- end
32
- end
33
-
34
-
35
- local mt = getmetatable(m.P(1))
36
-
37
-
38
- local allchar = {}
39
- for i=0,255 do allchar[i + 1] = i end
40
- allchar = string.char(unpack(allchar))
41
- assert(#allchar == 256)
42
-
43
- local function cs2str (c)
44
- return m.match(m.Cs((c + m.P(1)/"")^0), allchar)
45
- end
46
-
47
- local function eqcharset (c1, c2)
48
- assert(cs2str(c1) == cs2str(c2))
49
- end
50
-
51
-
52
- print"General tests for LPeg library"
53
-
54
- assert(type(m.version()) == "string")
55
- print("version " .. m.version())
56
- assert(m.type("alo") ~= "pattern")
57
- assert(m.type(io.input) ~= "pattern")
58
- assert(m.type(m.P"alo") == "pattern")
59
-
60
- -- tests for some basic optimizations
61
- assert(m.match(m.P(false) + "a", "a") == 2)
62
- assert(m.match(m.P(true) + "a", "a") == 1)
63
- assert(m.match("a" + m.P(false), "b") == nil)
64
- assert(m.match("a" + m.P(true), "b") == 1)
65
-
66
- assert(m.match(m.P(false) * "a", "a") == nil)
67
- assert(m.match(m.P(true) * "a", "a") == 2)
68
- assert(m.match("a" * m.P(false), "a") == nil)
69
- assert(m.match("a" * m.P(true), "a") == 2)
70
-
71
- assert(m.match(#m.P(false) * "a", "a") == nil)
72
- assert(m.match(#m.P(true) * "a", "a") == 2)
73
- assert(m.match("a" * #m.P(false), "a") == nil)
74
- assert(m.match("a" * #m.P(true), "a") == 2)
75
-
76
-
77
- -- tests for locale
78
- do
79
- assert(m.locale(m) == m)
80
- local t = {}
81
- assert(m.locale(t, m) == t)
82
- local x = m.locale()
83
- for n,v in pairs(x) do
84
- assert(type(n) == "string")
85
- eqcharset(v, m[n])
86
- end
87
- end
88
-
89
-
90
- assert(m.match(3, "aaaa"))
91
- assert(m.match(4, "aaaa"))
92
- assert(not m.match(5, "aaaa"))
93
- assert(m.match(-3, "aa"))
94
- assert(not m.match(-3, "aaa"))
95
- assert(not m.match(-3, "aaaa"))
96
- assert(not m.match(-4, "aaaa"))
97
- assert(m.P(-5):match"aaaa")
98
-
99
- assert(m.match("a", "alo") == 2)
100
- assert(m.match("al", "alo") == 3)
101
- assert(not m.match("alu", "alo"))
102
- assert(m.match(true, "") == 1)
103
-
104
- local digit = m.S"0123456789"
105
- local upper = m.S"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
106
- local lower = m.S"abcdefghijklmnopqrstuvwxyz"
107
- local letter = m.S"" + upper + lower
108
- local alpha = letter + digit + m.R()
109
-
110
- eqcharset(m.S"", m.P(false))
111
- eqcharset(upper, m.R("AZ"))
112
- eqcharset(lower, m.R("az"))
113
- eqcharset(upper + lower, m.R("AZ", "az"))
114
- eqcharset(upper + lower, m.R("AZ", "cz", "aa", "bb", "90"))
115
- eqcharset(digit, m.S"01234567" + "8" + "9")
116
- eqcharset(upper, letter - lower)
117
- eqcharset(m.S(""), m.R())
118
- assert(cs2str(m.S("")) == "")
119
-
120
- eqcharset(m.S"\0", "\0")
121
- eqcharset(m.S"\1\0\2", m.R"\0\2")
122
- eqcharset(m.S"\1\0\2", m.R"\1\2" + "\0")
123
- eqcharset(m.S"\1\0\2" - "\0", m.R"\1\2")
124
-
125
- local word = alpha^1 * (1 - alpha)^0
126
-
127
- assert((word^0 * -1):match"alo alo")
128
- assert(m.match(word^1 * -1, "alo alo"))
129
- assert(m.match(word^2 * -1, "alo alo"))
130
- assert(not m.match(word^3 * -1, "alo alo"))
131
-
132
- assert(not m.match(word^-1 * -1, "alo alo"))
133
- assert(m.match(word^-2 * -1, "alo alo"))
134
- assert(m.match(word^-3 * -1, "alo alo"))
135
-
136
- local eos = m.P(-1)
137
-
138
- assert(m.match(digit^0 * letter * digit * eos, "1298a1"))
139
- assert(not m.match(digit^0 * letter * eos, "1257a1"))
140
-
141
- b = {
142
- [1] = "(" * (((1 - m.S"()") + #m.P"(" * m.V(1))^0) * ")"
143
- }
144
-
145
- assert(m.match(b, "(al())()"))
146
- assert(not m.match(b * eos, "(al())()"))
147
- assert(m.match(b * eos, "((al())()(é))"))
148
- assert(not m.match(b, "(al()()"))
149
-
150
- assert(not m.match(letter^1 - "for", "foreach"))
151
- assert(m.match(letter^1 - ("for" * eos), "foreach"))
152
- assert(not m.match(letter^1 - ("for" * eos), "for"))
153
-
154
- function basiclookfor (p)
155
- return m.P {
156
- [1] = p + (1 * m.V(1))
157
- }
158
- end
159
-
160
- function caplookfor (p)
161
- return basiclookfor(p:C())
162
- end
163
-
164
- assert(m.match(caplookfor(letter^1), " 4achou123...") == "achou")
165
- a = {m.match(caplookfor(letter^1)^0, " two words, one more ")}
166
- checkeq(a, {"two", "words", "one", "more"})
167
-
168
- assert(m.match( basiclookfor((#m.P(b) * 1) * m.Cp()), " ( (a)") == 7)
169
-
170
- a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "123")}
171
- checkeq(a, {"123", "d"})
172
-
173
- -- bug in LPeg 0.12 (nil value does not create a 'ktable')
174
- assert(m.match(m.Cc(nil), "") == nil)
175
-
176
- a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "abcd")}
177
- checkeq(a, {"abcd", "l"})
178
-
179
- a = {m.match(m.Cc(10,20,30) * 'a' * m.Cp(), 'aaa')}
180
- checkeq(a, {10,20,30,2})
181
- a = {m.match(m.Cp() * m.Cc(10,20,30) * 'a' * m.Cp(), 'aaa')}
182
- checkeq(a, {1,10,20,30,2})
183
- a = m.match(m.Ct(m.Cp() * m.Cc(10,20,30) * 'a' * m.Cp()), 'aaa')
184
- checkeq(a, {1,10,20,30,2})
185
- a = m.match(m.Ct(m.Cp() * m.Cc(7,8) * m.Cc(10,20,30) * 'a' * m.Cp()), 'aaa')
186
- checkeq(a, {1,7,8,10,20,30,2})
187
- a = {m.match(m.Cc() * m.Cc() * m.Cc(1) * m.Cc(2,3,4) * m.Cc() * 'a', 'aaa')}
188
- checkeq(a, {1,2,3,4})
189
-
190
- a = {m.match(m.Cp() * letter^1 * m.Cp(), "abcd")}
191
- checkeq(a, {1, 5})
192
-
193
-
194
- t = {m.match({[1] = m.C(m.C(1) * m.V(1) + -1)}, "abc")}
195
- checkeq(t, {"abc", "a", "bc", "b", "c", "c", ""})
196
-
197
- -- bug in 0.12 ('hascapture' did not check for captures inside a rule)
198
- do
199
- local pat = m.P{
200
- 'S';
201
- S1 = m.C('abc') + 3,
202
- S = #m.V('S1') -- rule has capture, but '#' must ignore it
203
- }
204
- assert(pat:match'abc' == 1)
205
- end
206
-
207
-
208
- -- test for small capture boundary
209
- for i = 250,260 do
210
- assert(#m.match(m.C(i), string.rep('a', i)) == i)
211
- assert(#m.match(m.C(m.C(i)), string.rep('a', i)) == i)
212
- end
213
-
214
- -- tests for any*n and any*-n
215
- for n = 1, 550, 13 do
216
- local x_1 = string.rep('x', n - 1)
217
- local x = x_1 .. 'a'
218
- assert(not m.P(n):match(x_1))
219
- assert(m.P(n):match(x) == n + 1)
220
- assert(n < 4 or m.match(m.P(n) + "xxx", x_1) == 4)
221
- assert(m.C(n):match(x) == x)
222
- assert(m.C(m.C(n)):match(x) == x)
223
- assert(m.P(-n):match(x_1) == 1)
224
- assert(not m.P(-n):match(x))
225
- assert(n < 13 or m.match(m.Cc(20) * ((n - 13) * m.P(10)) * 3, x) == 20)
226
- local n3 = math.floor(n/3)
227
- assert(m.match(n3 * m.Cp() * n3 * n3, x) == n3 + 1)
228
- end
229
-
230
- -- true values
231
- assert(m.P(0):match("x") == 1)
232
- assert(m.P(0):match("") == 1)
233
- assert(m.C(0):match("x") == "")
234
-
235
- assert(m.match(m.Cc(0) * m.P(10) + m.Cc(1) * "xuxu", "xuxu") == 1)
236
- assert(m.match(m.Cc(0) * m.P(10) + m.Cc(1) * "xuxu", "xuxuxuxuxu") == 0)
237
- assert(m.match(m.C(m.P(2)^1), "abcde") == "abcd")
238
- p = m.Cc(0) * 1 + m.Cc(1) * 2 + m.Cc(2) * 3 + m.Cc(3) * 4
239
-
240
-
241
- -- test for alternation optimization
242
- assert(m.match(m.P"a"^1 + "ab" + m.P"x"^0, "ab") == 2)
243
- assert(m.match((m.P"a"^1 + "ab" + m.P"x"^0 * 1)^0, "ab") == 3)
244
- assert(m.match(m.P"ab" + "cd" + "" + "cy" + "ak", "98") == 1)
245
- assert(m.match(m.P"ab" + "cd" + "ax" + "cy", "ax") == 3)
246
- assert(m.match("a" * m.P"b"^0 * "c" + "cd" + "ax" + "cy", "ax") == 3)
247
- assert(m.match((m.P"ab" + "cd" + "ax" + "cy")^0, "ax") == 3)
248
- assert(m.match(m.P(1) * "x" + m.S"" * "xu" + "ay", "ay") == 3)
249
- assert(m.match(m.P"abc" + "cde" + "aka", "aka") == 4)
250
- assert(m.match(m.S"abc" * "x" + "cde" + "aka", "ax") == 3)
251
- assert(m.match(m.S"abc" * "x" + "cde" + "aka", "aka") == 4)
252
- assert(m.match(m.S"abc" * "x" + "cde" + "aka", "cde") == 4)
253
- assert(m.match(m.S"abc" * "x" + "ide" + m.S"ab" * "ka", "aka") == 4)
254
- assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "ax") == 3)
255
- assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "aka") == 4)
256
- assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "cde") == 4)
257
- assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "ide" + m.S"ab" * "ka", "aka") == 4)
258
- assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "ide" + m.S"ab" * "ka", "ax") == 3)
259
- assert(m.match(m.P(1) * "x" + "cde" + m.S"ab" * "ka", "aka") == 4)
260
- assert(m.match(m.P(1) * "x" + "cde" + m.P(1) * "ka", "aka") == 4)
261
- assert(m.match(m.P(1) * "x" + "cde" + m.P(1) * "ka", "cde") == 4)
262
- assert(m.match(m.P"eb" + "cd" + m.P"e"^0 + "x", "ee") == 3)
263
- assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "abcd") == 3)
264
- assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "eeex") == 4)
265
- assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "cd") == 3)
266
- assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "x") == 1)
267
- assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x" + "", "zee") == 1)
268
- assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "abcd") == 3)
269
- assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "eeex") == 4)
270
- assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "cd") == 3)
271
- assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "x") == 2)
272
- assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x" + "", "zee") == 1)
273
- assert(not m.match(("aa" * m.P"bc"^-1 + "aab") * "e", "aabe"))
274
-
275
- assert(m.match("alo" * (m.P"\n" + -1), "alo") == 4)
276
-
277
-
278
- -- bug in 0.12 (rc1)
279
- assert(m.match((m.P"\128\187\191" + m.S"abc")^0, "\128\187\191") == 4)
280
-
281
- assert(m.match(m.S"\0\128\255\127"^0, string.rep("\0\128\255\127", 10)) ==
282
- 4*10 + 1)
283
-
284
- -- optimizations with optional parts
285
- assert(m.match(("ab" * -m.P"c")^-1, "abc") == 1)
286
- assert(m.match(("ab" * #m.P"c")^-1, "abd") == 1)
287
- assert(m.match(("ab" * m.B"c")^-1, "ab") == 1)
288
- assert(m.match(("ab" * m.P"cd"^0)^-1, "abcdcdc") == 7)
289
-
290
- assert(m.match(m.P"ab"^-1 - "c", "abcd") == 3)
291
-
292
- p = ('Aa' * ('Bb' * ('Cc' * m.P'Dd'^0)^0)^0)^-1
293
- assert(p:match("AaBbCcDdBbCcDdDdDdBb") == 21)
294
-
295
-
296
- pi = "3.14159 26535 89793 23846 26433 83279 50288 41971 69399 37510"
297
- assert(m.match(m.Cs((m.P"1" / "a" + m.P"5" / "b" + m.P"9" / "c" + 1)^0), pi) ==
298
- m.match(m.Cs((m.P(1) / {["1"] = "a", ["5"] = "b", ["9"] = "c"})^0), pi))
299
- print"+"
300
-
301
-
302
- -- tests for capture optimizations
303
- assert(m.match((m.P(3) + 4 * m.Cp()) * "a", "abca") == 5)
304
- t = {m.match(((m.P"a" + m.Cp()) * m.P"x")^0, "axxaxx")}
305
- checkeq(t, {3, 6})
306
-
307
-
308
- -- tests for numbered captures
309
- p = m.C(1)
310
- assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 3, "abcdefgh") == "a")
311
- assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 1, "abcdefgh") == "abcdef")
312
- assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 4, "abcdefgh") == "bc")
313
- assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 0, "abcdefgh") == 7)
314
-
315
- a, b, c = m.match(p * (m.C(p * m.C(2)) * m.C(3) / 4) * p, "abcdefgh")
316
- assert(a == "a" and b == "efg" and c == "h")
317
-
318
- -- test for table captures
319
- t = m.match(m.Ct(letter^1), "alo")
320
- checkeq(t, {})
321
-
322
- t, n = m.match(m.Ct(m.C(letter)^1) * m.Cc"t", "alo")
323
- assert(n == "t" and table.concat(t) == "alo")
324
-
325
- t = m.match(m.Ct(m.C(m.C(letter)^1)), "alo")
326
- assert(table.concat(t, ";") == "alo;a;l;o")
327
-
328
- t = m.match(m.Ct(m.C(m.C(letter)^1)), "alo")
329
- assert(table.concat(t, ";") == "alo;a;l;o")
330
-
331
- t = m.match(m.Ct(m.Ct((m.Cp() * letter * m.Cp())^1)), "alo")
332
- assert(table.concat(t[1], ";") == "1;2;2;3;3;4")
333
-
334
- t = m.match(m.Ct(m.C(m.C(1) * 1 * m.C(1))), "alo")
335
- checkeq(t, {"alo", "a", "o"})
336
-
337
-
338
- -- tests for groups
339
- p = m.Cg(1) -- no capture
340
- assert(p:match('x') == 'x')
341
- p = m.Cg(m.P(true)/function () end * 1) -- no value
342
- assert(p:match('x') == 'x')
343
- p = m.Cg(m.Cg(m.Cg(m.C(1))))
344
- assert(p:match('x') == 'x')
345
- p = m.Cg(m.Cg(m.Cg(m.C(1))^0) * m.Cg(m.Cc(1) * m.Cc(2)))
346
- t = {p:match'abc'}
347
- checkeq(t, {'a', 'b', 'c', 1, 2})
348
-
349
- p = m.Ct(m.Cg(m.Cc(10), "hi") * m.C(1)^0 * m.Cg(m.Cc(20), "ho"))
350
- t = p:match''
351
- checkeq(t, {hi = 10, ho = 20})
352
- t = p:match'abc'
353
- checkeq(t, {hi = 10, ho = 20, 'a', 'b', 'c'})
354
-
355
-
356
- -- test for error messages
357
- local function checkerr (msg, f, ...)
358
- local st, err = pcall(f, ...)
359
- assert(not st and m.match({ m.P(msg) + 1 * m.V(1) }, err))
360
- end
361
-
362
- checkerr("rule '1' may be left recursive", m.match, { m.V(1) * 'a' }, "a")
363
- checkerr("rule '1' used outside a grammar", m.match, m.V(1), "")
364
- checkerr("rule 'hiii' used outside a grammar", m.match, m.V('hiii'), "")
365
- checkerr("rule 'hiii' undefined in given grammar", m.match, { m.V('hiii') }, "")
366
- checkerr("undefined in given grammar", m.match, { m.V{} }, "")
367
-
368
- checkerr("rule 'A' is not a pattern", m.P, { m.P(1), A = {} })
369
- checkerr("grammar has no initial rule", m.P, { [print] = {} })
370
-
371
- -- grammar with a long call chain before left recursion
372
- p = {'a',
373
- a = m.V'b' * m.V'c' * m.V'd' * m.V'a',
374
- b = m.V'c',
375
- c = m.V'd',
376
- d = m.V'e',
377
- e = m.V'f',
378
- f = m.V'g',
379
- g = m.P''
380
- }
381
- checkerr("rule 'a' may be left recursive", m.match, p, "a")
382
-
383
- -- Bug in peephole optimization of LPeg 0.12 (IJmp -> ICommit)
384
- -- the next grammar has an original sequence IJmp -> ICommit -> IJmp L1
385
- -- that is optimized to ICommit L1
386
-
387
- p = m.P { (m.P {m.P'abc'} + 'ayz') * m.V'y'; y = m.P'x' }
388
- assert(p:match('abcx') == 5 and p:match('ayzx') == 5 and not p:match'abc')
389
-
390
-
391
- -- tests for non-pattern as arguments to pattern functions
392
-
393
- p = { ('a' * m.V(1))^-1 } * m.P'b' * { 'a' * m.V(2); m.V(1)^-1 }
394
- assert(m.match(p, "aaabaac") == 7)
395
-
396
- p = m.P'abc' * 2 * -5 * true * 'de' -- mix of numbers and strings and booleans
397
-
398
- assert(p:match("abc01de") == 8)
399
- assert(p:match("abc01de3456") == nil)
400
-
401
- p = 'abc' * (2 * (-5 * (true * m.P'de')))
402
-
403
- assert(p:match("abc01de") == 8)
404
- assert(p:match("abc01de3456") == nil)
405
-
406
- p = { m.V(2), m.P"abc" } *
407
- (m.P{ "xx", xx = m.P"xx" } + { "x", x = m.P"a" * m.V"x" + "" })
408
- assert(p:match("abcaaaxx") == 7)
409
- assert(p:match("abcxx") == 6)
410
-
411
-
412
- -- a large table capture
413
- t = m.match(m.Ct(m.C('a')^0), string.rep("a", 10000))
414
- assert(#t == 10000 and t[1] == 'a' and t[#t] == 'a')
415
-
416
- print('+')
417
-
418
-
419
- -- bug in 0.10 (rechecking a grammar, after tail-call optimization)
420
- m.P{ m.P { (m.P(3) + "xuxu")^0 * m.V"xuxu", xuxu = m.P(1) } }
421
-
422
- local V = m.V
423
-
424
- local Space = m.S(" \n\t")^0
425
- local Number = m.C(m.R("09")^1) * Space
426
- local FactorOp = m.C(m.S("+-")) * Space
427
- local TermOp = m.C(m.S("*/")) * Space
428
- local Open = "(" * Space
429
- local Close = ")" * Space
430
-
431
-
432
- local function f_factor (v1, op, v2, d)
433
- assert(d == nil)
434
- if op == "+" then return v1 + v2
435
- else return v1 - v2
436
- end
437
- end
438
-
439
-
440
- local function f_term (v1, op, v2, d)
441
- assert(d == nil)
442
- if op == "*" then return v1 * v2
443
- else return v1 / v2
444
- end
445
- end
446
-
447
- G = m.P{ "Exp",
448
- Exp = m.Cf(V"Factor" * m.Cg(FactorOp * V"Factor")^0, f_factor);
449
- Factor = m.Cf(V"Term" * m.Cg(TermOp * V"Term")^0, f_term);
450
- Term = Number / tonumber + Open * V"Exp" * Close;
451
- }
452
-
453
- G = Space * G * -1
454
-
455
- for _, s in ipairs{" 3 + 5*9 / (1+1) ", "3+4/2", "3+3-3- 9*2+3*9/1- 8"} do
456
- assert(m.match(G, s) == loadstring("return "..s)())
457
- end
458
-
459
-
460
- -- test for grammars (errors deep in calling non-terminals)
461
- g = m.P{
462
- [1] = m.V(2) + "a",
463
- [2] = "a" * m.V(3) * "x",
464
- [3] = "b" * m.V(3) + "c"
465
- }
466
-
467
- assert(m.match(g, "abbbcx") == 7)
468
- assert(m.match(g, "abbbbx") == 2)
469
-
470
-
471
- -- tests for \0
472
- assert(m.match(m.R("\0\1")^1, "\0\1\0") == 4)
473
- assert(m.match(m.S("\0\1ab")^1, "\0\1\0a") == 5)
474
- assert(m.match(m.P(1)^3, "\0\1\0a") == 5)
475
- assert(not m.match(-4, "\0\1\0a"))
476
- assert(m.match("\0\1\0a", "\0\1\0a") == 5)
477
- assert(m.match("\0\0\0", "\0\0\0") == 4)
478
- assert(not m.match("\0\0\0", "\0\0"))
479
-
480
-
481
- -- tests for predicates
482
- assert(not m.match(-m.P("a") * 2, "alo"))
483
- assert(m.match(- -m.P("a") * 2, "alo") == 3)
484
- assert(m.match(#m.P("a") * 2, "alo") == 3)
485
- assert(m.match(##m.P("a") * 2, "alo") == 3)
486
- assert(not m.match(##m.P("c") * 2, "alo"))
487
- assert(m.match(m.Cs((##m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
488
- assert(m.match(m.Cs((#((#m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
489
- assert(m.match(m.Cs((- -m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
490
- assert(m.match(m.Cs((-((-m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
491
-
492
- p = -m.P'a' * m.Cc(1) + -m.P'b' * m.Cc(2) + -m.P'c' * m.Cc(3)
493
- assert(p:match('a') == 2 and p:match('') == 1 and p:match('b') == 1)
494
-
495
- p = -m.P'a' * m.Cc(10) + #m.P'a' * m.Cc(20)
496
- assert(p:match('a') == 20 and p:match('') == 10 and p:match('b') == 10)
497
-
498
-
499
-
500
- -- look-behind predicate
501
- assert(not m.match(m.B'a', 'a'))
502
- assert(m.match(1 * m.B'a', 'a') == 2)
503
- assert(not m.match(m.B(1), 'a'))
504
- assert(m.match(1 * m.B(1), 'a') == 2)
505
- assert(m.match(-m.B(1), 'a') == 1)
506
- assert(m.match(m.B(250), string.rep('a', 250)) == nil)
507
- assert(m.match(250 * m.B(250), string.rep('a', 250)) == 251)
508
-
509
- -- look-behind with an open call
510
- checkerr("pattern may not have fixed length", m.B, m.V'S1')
511
- checkerr("too long to look behind", m.B, 260)
512
-
513
- B = #letter * -m.B(letter) + -letter * m.B(letter)
514
- x = m.Ct({ (B * m.Cp())^-1 * (1 * m.V(1) + m.P(true)) })
515
- checkeq(m.match(x, 'ar cal c'), {1,3,4,7,9,10})
516
- checkeq(m.match(x, ' ar cal '), {2,4,5,8})
517
- checkeq(m.match(x, ' '), {})
518
- checkeq(m.match(x, 'aloalo'), {1,7})
519
-
520
- assert(m.match(B, "a") == 1)
521
- assert(m.match(1 * B, "a") == 2)
522
- assert(not m.B(1 - letter):match(""))
523
- assert((-m.B(letter)):match("") == 1)
524
-
525
- assert((4 * m.B(letter, 4)):match("aaaaaaaa") == 5)
526
- assert(not (4 * m.B(#letter * 5)):match("aaaaaaaa"))
527
- assert((4 * -m.B(#letter * 5)):match("aaaaaaaa") == 5)
528
-
529
- -- look-behind with grammars
530
- assert(m.match('a' * m.B{'x', x = m.P(3)}, 'aaa') == nil)
531
- assert(m.match('aa' * m.B{'x', x = m.P('aaa')}, 'aaaa') == nil)
532
- assert(m.match('aaa' * m.B{'x', x = m.P('aaa')}, 'aaaaa') == 4)
533
-
534
-
535
-
536
- -- bug in 0.9
537
- assert(m.match(('a' * #m.P'b'), "ab") == 2)
538
- assert(not m.match(('a' * #m.P'b'), "a"))
539
-
540
- assert(not m.match(#m.S'567', ""))
541
- assert(m.match(#m.S'567' * 1, "6") == 2)
542
-
543
-
544
- -- tests for Tail Calls
545
-
546
- p = m.P{ 'a' * m.V(1) + '' }
547
- assert(p:match(string.rep('a', 1000)) == 1001)
548
-
549
- -- create a grammar for a simple DFA for even number of 0s and 1s
550
- --
551
- -- ->1 <---0---> 2
552
- -- ^ ^
553
- -- | |
554
- -- 1 1
555
- -- | |
556
- -- V V
557
- -- 3 <---0---> 4
558
- --
559
- -- this grammar should keep no backtracking information
560
-
561
- p = m.P{
562
- [1] = '0' * m.V(2) + '1' * m.V(3) + -1,
563
- [2] = '0' * m.V(1) + '1' * m.V(4),
564
- [3] = '0' * m.V(4) + '1' * m.V(1),
565
- [4] = '0' * m.V(3) + '1' * m.V(2),
566
- }
567
-
568
- assert(p:match(string.rep("00", 10000)))
569
- assert(p:match(string.rep("01", 10000)))
570
- assert(p:match(string.rep("011", 10000)))
571
- assert(not p:match(string.rep("011", 10000) .. "1"))
572
- assert(not p:match(string.rep("011", 10001)))
573
-
574
-
575
- -- this grammar does need backtracking info.
576
- local lim = 10000
577
- p = m.P{ '0' * m.V(1) + '0' }
578
- checkerr("too many pending", m.match, p, string.rep("0", lim))
579
- m.setmaxstack(2*lim)
580
- checkerr("too many pending", m.match, p, string.rep("0", lim))
581
- m.setmaxstack(2*lim + 4)
582
- assert(m.match(p, string.rep("0", lim)) == lim + 1)
583
-
584
- -- this repetition should not need stack space (only the call does)
585
- p = m.P{ ('a' * m.V(1))^0 * 'b' + 'c' }
586
- m.setmaxstack(200)
587
- assert(p:match(string.rep('a', 180) .. 'c' .. string.rep('b', 180)) == 362)
588
-
589
- m.setmaxstack(5) -- restore original limit
590
-
591
- -- tests for optional start position
592
- assert(m.match("a", "abc", 1))
593
- assert(m.match("b", "abc", 2))
594
- assert(m.match("c", "abc", 3))
595
- assert(not m.match(1, "abc", 4))
596
- assert(m.match("a", "abc", -3))
597
- assert(m.match("b", "abc", -2))
598
- assert(m.match("c", "abc", -1))
599
- assert(m.match("abc", "abc", -4)) -- truncate to position 1
600
-
601
- assert(m.match("", "abc", 10)) -- empty string is everywhere!
602
- assert(m.match("", "", 10))
603
- assert(not m.match(1, "", 1))
604
- assert(not m.match(1, "", -1))
605
- assert(not m.match(1, "", 0))
606
-
607
- print("+")
608
-
609
-
610
- -- tests for argument captures
611
- checkerr("invalid argument", m.Carg, 0)
612
- checkerr("invalid argument", m.Carg, -1)
613
- checkerr("invalid argument", m.Carg, 2^18)
614
- checkerr("absent extra argument #1", m.match, m.Carg(1), 'a', 1)
615
- assert(m.match(m.Carg(1), 'a', 1, print) == print)
616
- x = {m.match(m.Carg(1) * m.Carg(2), '', 1, 10, 20)}
617
- checkeq(x, {10, 20})
618
-
619
- assert(m.match(m.Cmt(m.Cg(m.Carg(3), "a") *
620
- m.Cmt(m.Cb("a"), function (s,i,x)
621
- assert(s == "a" and i == 1);
622
- return i, x+1
623
- end) *
624
- m.Carg(2), function (s,i,a,b,c)
625
- assert(s == "a" and i == 1 and c == nil);
626
- return i, 2*a + 3*b
627
- end) * "a",
628
- "a", 1, false, 100, 1000) == 2*1001 + 3*100)
629
-
630
-
631
- -- tests for Lua functions
632
-
633
- t = {}
634
- s = ""
635
- p = m.P(function (s1, i) assert(s == s1); t[#t + 1] = i; return nil end) * false
636
- s = "hi, this is a test"
637
- assert(m.match(((p - m.P(-1)) + 2)^0, s) == string.len(s) + 1)
638
- assert(#t == string.len(s)/2 and t[1] == 1 and t[2] == 3)
639
-
640
- assert(not m.match(p, s))
641
-
642
- p = mt.__add(function (s, i) return i end, function (s, i) return nil end)
643
- assert(m.match(p, "alo"))
644
-
645
- p = mt.__mul(function (s, i) return i end, function (s, i) return nil end)
646
- assert(not m.match(p, "alo"))
647
-
648
-
649
- t = {}
650
- p = function (s1, i) assert(s == s1); t[#t + 1] = i; return i end
651
- s = "hi, this is a test"
652
- assert(m.match((m.P(1) * p)^0, s) == string.len(s) + 1)
653
- assert(#t == string.len(s) and t[1] == 2 and t[2] == 3)
654
-
655
- t = {}
656
- p = m.P(function (s1, i) assert(s == s1); t[#t + 1] = i;
657
- return i <= s1:len() and i end) * 1
658
- s = "hi, this is a test"
659
- assert(m.match(p^0, s) == string.len(s) + 1)
660
- assert(#t == string.len(s) + 1 and t[1] == 1 and t[2] == 2)
661
-
662
- p = function (s1, i) return m.match(m.P"a"^1, s1, i) end
663
- assert(m.match(p, "aaaa") == 5)
664
- assert(m.match(p, "abaa") == 2)
665
- assert(not m.match(p, "baaa"))
666
-
667
- checkerr("invalid position", m.match, function () return 2^20 end, s)
668
- checkerr("invalid position", m.match, function () return 0 end, s)
669
- checkerr("invalid position", m.match, function (s, i) return i - 1 end, s)
670
- checkerr("invalid position", m.match,
671
- m.P(1)^0 * function (_, i) return i - 1 end, s)
672
- assert(m.match(m.P(1)^0 * function (_, i) return i end * -1, s))
673
- checkerr("invalid position", m.match,
674
- m.P(1)^0 * function (_, i) return i + 1 end, s)
675
- assert(m.match(m.P(function (s, i) return s:len() + 1 end) * -1, s))
676
- checkerr("invalid position", m.match, m.P(function (s, i) return s:len() + 2 end) * -1, s)
677
- assert(not m.match(m.P(function (s, i) return s:len() end) * -1, s))
678
- assert(m.match(m.P(1)^0 * function (_, i) return true end, s) ==
679
- string.len(s) + 1)
680
- for i = 1, string.len(s) + 1 do
681
- assert(m.match(function (_, _) return i end, s) == i)
682
- end
683
-
684
- p = (m.P(function (s, i) return i%2 == 0 and i end) * 1
685
- + m.P(function (s, i) return i%2 ~= 0 and i + 2 <= s:len() and i end) * 3)^0
686
- * -1
687
- assert(p:match(string.rep('a', 14000)))
688
-
689
- -- tests for Function Replacements
690
- f = function (a, ...) if a ~= "x" then return {a, ...} end end
691
-
692
- t = m.match(m.C(1)^0/f, "abc")
693
- checkeq(t, {"a", "b", "c"})
694
-
695
- t = m.match(m.C(1)^0/f/f, "abc")
696
- checkeq(t, {{"a", "b", "c"}})
697
-
698
- t = m.match(m.P(1)^0/f/f, "abc") -- no capture
699
- checkeq(t, {{"abc"}})
700
-
701
- t = m.match((m.P(1)^0/f * m.Cp())/f, "abc")
702
- checkeq(t, {{"abc"}, 4})
703
-
704
- t = m.match((m.C(1)^0/f * m.Cp())/f, "abc")
705
- checkeq(t, {{"a", "b", "c"}, 4})
706
-
707
- t = m.match((m.C(1)^0/f * m.Cp())/f, "xbc")
708
- checkeq(t, {4})
709
-
710
- t = m.match(m.C(m.C(1)^0)/f, "abc")
711
- checkeq(t, {"abc", "a", "b", "c"})
712
-
713
- g = function (...) return 1, ... end
714
- t = {m.match(m.C(1)^0/g/g, "abc")}
715
- checkeq(t, {1, 1, "a", "b", "c"})
716
-
717
- t = {m.match(m.Cc(nil,nil,4) * m.Cc(nil,3) * m.Cc(nil, nil) / g / g, "")}
718
- t1 = {1,1,nil,nil,4,nil,3,nil,nil}
719
- for i=1,10 do assert(t[i] == t1[i]) end
720
-
721
- t = {m.match((m.C(1) / function (x) return x, x.."x" end)^0, "abc")}
722
- checkeq(t, {"a", "ax", "b", "bx", "c", "cx"})
723
-
724
- t = m.match(m.Ct((m.C(1) / function (x,y) return y, x end * m.Cc(1))^0), "abc")
725
- checkeq(t, {nil, "a", 1, nil, "b", 1, nil, "c", 1})
726
-
727
- -- tests for Query Replacements
728
-
729
- assert(m.match(m.C(m.C(1)^0)/{abc = 10}, "abc") == 10)
730
- assert(m.match(m.C(1)^0/{a = 10}, "abc") == 10)
731
- assert(m.match(m.S("ba")^0/{ab = 40}, "abc") == 40)
732
- t = m.match(m.Ct((m.S("ba")/{a = 40})^0), "abc")
733
- checkeq(t, {40})
734
-
735
- assert(m.match(m.Cs((m.C(1)/{a=".", d=".."})^0), "abcdde") == ".bc....e")
736
- assert(m.match(m.Cs((m.C(1)/{f="."})^0), "abcdde") == "abcdde")
737
- assert(m.match(m.Cs((m.C(1)/{d="."})^0), "abcdde") == "abc..e")
738
- assert(m.match(m.Cs((m.C(1)/{e="."})^0), "abcdde") == "abcdd.")
739
- assert(m.match(m.Cs((m.C(1)/{e=".", f="+"})^0), "eefef") == "..+.+")
740
- assert(m.match(m.Cs((m.C(1))^0), "abcdde") == "abcdde")
741
- assert(m.match(m.Cs(m.C(m.C(1)^0)), "abcdde") == "abcdde")
742
- assert(m.match(1 * m.Cs(m.P(1)^0), "abcdde") == "bcdde")
743
- assert(m.match(m.Cs((m.C('0')/'x' + 1)^0), "abcdde") == "abcdde")
744
- assert(m.match(m.Cs((m.C('0')/'x' + 1)^0), "0ab0b0") == "xabxbx")
745
- assert(m.match(m.Cs((m.C('0')/'x' + m.P(1)/{b=3})^0), "b0a0b") == "3xax3")
746
- assert(m.match(m.P(1)/'%0%0'/{aa = -3} * 'x', 'ax') == -3)
747
- assert(m.match(m.C(1)/'%0%1'/{aa = 'z'}/{z = -3} * 'x', 'ax') == -3)
748
-
749
- assert(m.match(m.Cs(m.Cc(0) * (m.P(1)/"")), "4321") == "0")
750
-
751
- assert(m.match(m.Cs((m.P(1) / "%0")^0), "abcd") == "abcd")
752
- assert(m.match(m.Cs((m.P(1) / "%0.%0")^0), "abcd") == "a.ab.bc.cd.d")
753
- assert(m.match(m.Cs((m.P("a") / "%0.%0" + 1)^0), "abcad") == "a.abca.ad")
754
- assert(m.match(m.C("a") / "%1%%%0", "a") == "a%a")
755
- assert(m.match(m.Cs((m.P(1) / ".xx")^0), "abcd") == ".xx.xx.xx.xx")
756
- assert(m.match(m.Cp() * m.P(3) * m.Cp()/"%2%1%1 - %0 ", "abcde") ==
757
- "411 - abc ")
758
-
759
- assert(m.match(m.P(1)/"%0", "abc") == "a")
760
- checkerr("invalid capture index", m.match, m.P(1)/"%1", "abc")
761
- checkerr("invalid capture index", m.match, m.P(1)/"%9", "abc")
762
-
763
- p = m.C(1)
764
- p = p * p; p = p * p; p = p * p * m.C(1) / "%9 - %1"
765
- assert(p:match("1234567890") == "9 - 1")
766
-
767
- assert(m.match(m.Cc(print), "") == print)
768
-
769
- -- too many captures (just ignore extra ones)
770
- p = m.C(1)^0 / "%2-%9-%0-%9"
771
- assert(p:match"01234567890123456789" == "1-8-01234567890123456789-8")
772
- s = string.rep("12345678901234567890", 20)
773
- assert(m.match(m.C(1)^0 / "%9-%1-%0-%3", s) == "9-1-" .. s .. "-3")
774
-
775
- -- string captures with non-string subcaptures
776
- p = m.Cc('alo') * m.C(1) / "%1 - %2 - %1"
777
- assert(p:match'x' == 'alo - x - alo')
778
-
779
- checkerr("invalid capture value (a boolean)", m.match, m.Cc(true) / "%1", "a")
780
-
781
- -- long strings for string capture
782
- l = 10000
783
- s = string.rep('a', l) .. string.rep('b', l) .. string.rep('c', l)
784
-
785
- p = (m.C(m.P'a'^1) * m.C(m.P'b'^1) * m.C(m.P'c'^1)) / '%3%2%1'
786
-
787
- assert(p:match(s) == string.rep('c', l) ..
788
- string.rep('b', l) ..
789
- string.rep('a', l))
790
-
791
- print"+"
792
-
793
- -- accumulator capture
794
- function f (x) return x + 1 end
795
- assert(m.match(m.Cf(m.Cc(0) * m.C(1)^0, f), "alo alo") == 7)
796
-
797
- t = {m.match(m.Cf(m.Cc(1,2,3), error), "")}
798
- checkeq(t, {1})
799
- p = m.Cf(m.Ct(true) * m.Cg(m.C(m.R"az"^1) * "=" * m.C(m.R"az"^1) * ";")^0,
800
- rawset)
801
- t = p:match("a=b;c=du;xux=yuy;")
802
- checkeq(t, {a="b", c="du", xux="yuy"})
803
-
804
-
805
- -- errors in accumulator capture
806
-
807
- -- no initial capture
808
- checkerr("no initial value", m.match, m.Cf(m.P(5), print), 'aaaaaa')
809
- -- no initial capture (very long match forces fold to be a pair open-close)
810
- checkerr("no initial value", m.match, m.Cf(m.P(500), print),
811
- string.rep('a', 600))
812
-
813
- -- nested capture produces no initial value
814
- checkerr("no initial value", m.match, m.Cf(m.P(1) / {}, print), "alo")
815
-
816
-
817
- -- tests for loop checker
818
-
819
- local function isnullable (p)
820
- checkerr("may accept empty string", function (p) return p^0 end, m.P(p))
821
- end
822
-
823
- isnullable(m.P("x")^-4)
824
- assert(m.match(((m.P(0) + 1) * m.S"al")^0, "alo") == 3)
825
- assert(m.match((("x" + #m.P(1))^-4 * m.S"al")^0, "alo") == 3)
826
- isnullable("")
827
- isnullable(m.P("x")^0)
828
- isnullable(m.P("x")^-1)
829
- isnullable(m.P("x") + 1 + 2 + m.P("a")^-1)
830
- isnullable(-m.P("ab"))
831
- isnullable(- -m.P("ab"))
832
- isnullable(# #(m.P("ab") + "xy"))
833
- isnullable(- #m.P("ab")^0)
834
- isnullable(# -m.P("ab")^1)
835
- isnullable(#m.V(3))
836
- isnullable(m.V(3) + m.V(1) + m.P('a')^-1)
837
- isnullable({[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(0)})
838
- assert(m.match(m.P{[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(1)}^0, "abc")
839
- == 3)
840
- assert(m.match(m.P""^-3, "a") == 1)
841
-
842
- local function find (p, s)
843
- return m.match(basiclookfor(p), s)
844
- end
845
-
846
-
847
- local function badgrammar (g, expected)
848
- local stat, msg = pcall(m.P, g)
849
- assert(not stat)
850
- if expected then assert(find(expected, msg)) end
851
- end
852
-
853
- badgrammar({[1] = m.V(1)}, "rule '1'")
854
- badgrammar({[1] = m.V(2)}, "rule '2'") -- invalid non-terminal
855
- badgrammar({[1] = m.V"x"}, "rule 'x'") -- invalid non-terminal
856
- badgrammar({[1] = m.V{}}, "rule '(a table)'") -- invalid non-terminal
857
- badgrammar({[1] = #m.P("a") * m.V(1)}, "rule '1'") -- left-recursive
858
- badgrammar({[1] = -m.P("a") * m.V(1)}, "rule '1'") -- left-recursive
859
- badgrammar({[1] = -1 * m.V(1)}, "rule '1'") -- left-recursive
860
- badgrammar({[1] = -1 + m.V(1)}, "rule '1'") -- left-recursive
861
- badgrammar({[1] = 1 * m.V(2), [2] = m.V(2)}, "rule '2'") -- left-recursive
862
- badgrammar({[1] = 1 * m.V(2)^0, [2] = m.P(0)}, "rule '1'") -- inf. loop
863
- badgrammar({ m.V(2), m.V(3)^0, m.P"" }, "rule '2'") -- inf. loop
864
- badgrammar({ m.V(2) * m.V(3)^0, m.V(3)^0, m.P"" }, "rule '1'") -- inf. loop
865
- badgrammar({"x", x = #(m.V(1) * 'a') }, "rule '1'") -- inf. loop
866
- badgrammar({ -(m.V(1) * 'a') }, "rule '1'") -- inf. loop
867
- badgrammar({"x", x = m.P'a'^-1 * m.V"x"}, "rule 'x'") -- left recursive
868
- badgrammar({"x", x = m.P'a' * m.V"y"^1, y = #m.P(1)}, "rule 'x'")
869
-
870
- assert(m.match({'a' * -m.V(1)}, "aaa") == 2)
871
- assert(m.match({'a' * -m.V(1)}, "aaaa") == nil)
872
-
873
-
874
- -- good x bad grammars
875
- m.P{ ('a' * m.V(1))^-1 }
876
- m.P{ -('a' * m.V(1)) }
877
- m.P{ ('abc' * m.V(1))^-1 }
878
- m.P{ -('abc' * m.V(1)) }
879
- badgrammar{ #m.P('abc') * m.V(1) }
880
- badgrammar{ -('a' + m.V(1)) }
881
- m.P{ #('a' * m.V(1)) }
882
- badgrammar{ #('a' + m.V(1)) }
883
- m.P{ m.B{ m.P'abc' } * 'a' * m.V(1) }
884
- badgrammar{ m.B{ m.P'abc' } * m.V(1) }
885
- badgrammar{ ('a' + m.P'bcd')^-1 * m.V(1) }
886
-
887
-
888
- -- simple tests for maximum sizes:
889
- local p = m.P"a"
890
- for i=1,14 do p = p * p end
891
-
892
- p = {}
893
- for i=1,100 do p[i] = m.P"a" end
894
- p = m.P(p)
895
-
896
-
897
- -- strange values for rule labels
898
-
899
- p = m.P{ "print",
900
- print = m.V(print),
901
- [print] = m.V(_G),
902
- [_G] = m.P"a",
903
- }
904
-
905
- assert(p:match("a"))
906
-
907
- -- initial rule
908
- g = {}
909
- for i = 1, 10 do g["i"..i] = "a" * m.V("i"..i+1) end
910
- g.i11 = m.P""
911
- for i = 1, 10 do
912
- g[1] = "i"..i
913
- local p = m.P(g)
914
- assert(p:match("aaaaaaaaaaa") == 11 - i + 1)
915
- end
916
-
917
- print"+"
918
-
919
-
920
- -- tests for back references
921
- checkerr("back reference 'x' not found", m.match, m.Cb('x'), '')
922
- checkerr("back reference 'b' not found", m.match, m.Cg(1, 'a') * m.Cb('b'), 'a')
923
-
924
- p = m.Cg(m.C(1) * m.C(1), "k") * m.Ct(m.Cb("k"))
925
- t = p:match("ab")
926
- checkeq(t, {"a", "b"})
927
-
928
-
929
- t = {}
930
- function foo (p) t[#t + 1] = p; return p .. "x" end
931
-
932
- p = m.Cg(m.C(2) / foo, "x") * m.Cb"x" *
933
- m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" *
934
- m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" *
935
- m.Cg(m.Cb('x') / foo, "x") * m.Cb"x"
936
- x = {p:match'ab'}
937
- checkeq(x, {'abx', 'abxx', 'abxxx', 'abxxxx'})
938
- checkeq(t, {'ab',
939
- 'ab', 'abx',
940
- 'ab', 'abx', 'abxx',
941
- 'ab', 'abx', 'abxx', 'abxxx'})
942
-
943
-
944
-
945
- -- tests for match-time captures
946
-
947
- p = m.P'a' * (function (s, i) return (s:sub(i, i) == 'b') and i + 1 end)
948
- + 'acd'
949
-
950
- assert(p:match('abc') == 3)
951
- assert(p:match('acd') == 4)
952
-
953
- local function id (s, i, ...)
954
- return true, ...
955
- end
956
-
957
- assert(m.Cmt(m.Cs((m.Cmt(m.S'abc' / { a = 'x', c = 'y' }, id) +
958
- m.R'09'^1 / string.char +
959
- m.P(1))^0), id):match"acb98+68c" == "xyb\98+\68y")
960
-
961
- p = m.P{'S',
962
- S = m.V'atom' * space
963
- + m.Cmt(m.Ct("(" * space * (m.Cmt(m.V'S'^1, id) + m.P(true)) * ")" * space), id),
964
- atom = m.Cmt(m.C(m.R("AZ", "az", "09")^1), id)
965
- }
966
- x = p:match"(a g () ((b) c) (d (e)))"
967
- checkeq(x, {'a', 'g', {}, {{'b'}, 'c'}, {'d', {'e'}}});
968
-
969
- x = {(m.Cmt(1, id)^0):match(string.rep('a', 500))}
970
- assert(#x == 500)
971
-
972
- local function id(s, i, x)
973
- if x == 'a' then return i, 1, 3, 7
974
- else return nil, 2, 4, 6, 8
975
- end
976
- end
977
-
978
- p = ((m.P(id) * 1 + m.Cmt(2, id) * 1 + m.Cmt(1, id) * 1))^0
979
- assert(table.concat{p:match('abababab')} == string.rep('137', 4))
980
-
981
- local function ref (s, i, x)
982
- return m.match(x, s, i - x:len())
983
- end
984
-
985
- assert(m.Cmt(m.P(1)^0, ref):match('alo') == 4)
986
- assert((m.P(1) * m.Cmt(m.P(1)^0, ref)):match('alo') == 4)
987
- assert(not (m.P(1) * m.Cmt(m.C(1)^0, ref)):match('alo'))
988
-
989
- ref = function (s,i,x) return i == tonumber(x) and i, 'xuxu' end
990
-
991
- assert(m.Cmt(1, ref):match'2')
992
- assert(not m.Cmt(1, ref):match'1')
993
- assert(m.Cmt(m.P(1)^0, ref):match'03')
994
-
995
- function ref (s, i, a, b)
996
- if a == b then return i, a:upper() end
997
- end
998
-
999
- p = m.Cmt(m.C(m.R"az"^1) * "-" * m.C(m.R"az"^1), ref)
1000
- p = (any - p)^0 * p * any^0 * -1
1001
-
1002
- assert(p:match'abbbc-bc ddaa' == 'BC')
1003
-
1004
- do -- match-time captures cannot be optimized away
1005
- local touch = 0
1006
- f = m.P(function () touch = touch + 1; return true end)
1007
-
1008
- local function check(n) n = n or 1; assert(touch == n); touch = 0 end
1009
-
1010
- assert(m.match(f * false + 'b', 'a') == nil); check()
1011
- assert(m.match(f * false + 'b', '') == nil); check()
1012
- assert(m.match( (f * 'a')^0 * 'b', 'b') == 2); check()
1013
- assert(m.match( (f * 'a')^0 * 'b', '') == nil); check()
1014
- assert(m.match( (f * 'a')^-1 * 'b', 'b') == 2); check()
1015
- assert(m.match( (f * 'a')^-1 * 'b', '') == nil); check()
1016
- assert(m.match( ('b' + f * 'a')^-1 * 'b', '') == nil); check()
1017
- assert(m.match( (m.P'b'^-1 * f * 'a')^-1 * 'b', '') == nil); check()
1018
- assert(m.match( (-m.P(1) * m.P'b'^-1 * f * 'a')^-1 * 'b', '') == nil);
1019
- check()
1020
- assert(m.match( (f * 'a' + 'b')^-1 * 'b', '') == nil); check()
1021
- assert(m.match(f * 'a' + f * 'b', 'b') == 2); check(2)
1022
- assert(m.match(f * 'a' + f * 'b', 'a') == 2); check(1)
1023
- assert(m.match(-f * 'a' + 'b', 'b') == 2); check(1)
1024
- assert(m.match(-f * 'a' + 'b', '') == nil); check(1)
1025
- end
1026
-
1027
- c = '[' * m.Cg(m.P'='^0, "init") * '[' *
1028
- { m.Cmt(']' * m.C(m.P'='^0) * ']' * m.Cb("init"), function (_, _, s1, s2)
1029
- return s1 == s2 end)
1030
- + 1 * m.V(1) } / 0
1031
-
1032
- assert(c:match'[==[]]====]]]]==]===[]' == 18)
1033
- assert(c:match'[[]=]====]=]]]==]===[]' == 14)
1034
- assert(not c:match'[[]=]====]=]=]==]===[]')
1035
-
1036
-
1037
- -- old bug: optimization of concat with fail removed match-time capture
1038
- p = m.Cmt(0, function (s) p = s end) * m.P(false)
1039
- assert(not p:match('alo'))
1040
- assert(p == 'alo')
1041
-
1042
-
1043
- -- ensure that failed match-time captures are not kept on Lua stack
1044
- do
1045
- local t = {__mode = "kv"}; setmetatable(t,t)
1046
- local c = 0
1047
-
1048
- local function foo (s,i)
1049
- collectgarbage();
1050
- assert(next(t) == "__mode" and next(t, "__mode") == nil)
1051
- local x = {}
1052
- t[x] = true
1053
- c = c + 1
1054
- return i, x
1055
- end
1056
-
1057
- local p = m.P{ m.Cmt(0, foo) * m.P(false) + m.P(1) * m.V(1) + m.P"" }
1058
- p:match(string.rep('1', 10))
1059
- assert(c == 11)
1060
- end
1061
-
1062
- p = (m.P(function () return true, "a" end) * 'a'
1063
- + m.P(function (s, i) return i, "aa", 20 end) * 'b'
1064
- + m.P(function (s,i) if i <= #s then return i, "aaa" end end) * 1)^0
1065
-
1066
- t = {p:match('abacc')}
1067
- checkeq(t, {'a', 'aa', 20, 'a', 'aaa', 'aaa'})
1068
-
1069
-
1070
- -------------------------------------------------------------------
1071
- -- Tests for 're' module
1072
- -------------------------------------------------------------------
1073
-
1074
- local re = require "lib/re"
1075
-
1076
- local match, compile = re.match, re.compile
1077
-
1078
-
1079
-
1080
- assert(match("a", ".") == 2)
1081
- assert(match("a", "''") == 1)
1082
- assert(match("", " ! . ") == 1)
1083
- assert(not match("a", " ! . "))
1084
- assert(match("abcde", " ( . . ) * ") == 5)
1085
- assert(match("abbcde", " [a-c] +") == 5)
1086
- assert(match("0abbc1de", "'0' [a-c]+ '1'") == 7)
1087
- assert(match("0zz1dda", "'0' [^a-c]+ 'a'") == 8)
1088
- assert(match("abbc--", " [a-c] + +") == 5)
1089
- assert(match("abbc--", " [ac-] +") == 2)
1090
- assert(match("abbc--", " [-acb] + ") == 7)
1091
- assert(not match("abbcde", " [b-z] + "))
1092
- assert(match("abb\"de", '"abb"["]"de"') == 7)
1093
- assert(match("abceeef", "'ac' ? 'ab' * 'c' { 'e' * } / 'abceeef' ") == "eee")
1094
- assert(match("abceeef", "'ac'? 'ab'* 'c' { 'f'+ } / 'abceeef' ") == 8)
1095
- local t = {match("abceefe", "( ( & 'e' {} ) ? . ) * ")}
1096
- checkeq(t, {4, 5, 7})
1097
- local t = {match("abceefe", "((&&'e' {})? .)*")}
1098
- checkeq(t, {4, 5, 7})
1099
- local t = {match("abceefe", "( ( ! ! 'e' {} ) ? . ) *")}
1100
- checkeq(t, {4, 5, 7})
1101
- local t = {match("abceefe", "(( & ! & ! 'e' {})? .)*")}
1102
- checkeq(t, {4, 5, 7})
1103
-
1104
- assert(match("cccx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 5)
1105
- assert(match("cdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 4)
1106
- assert(match("abcdcdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 8)
1107
-
1108
- assert(match("abc", "a <- (. a)?") == 4)
1109
- b = "balanced <- '(' ([^()] / balanced)* ')'"
1110
- assert(match("(abc)", b))
1111
- assert(match("(a(b)((c) (d)))", b))
1112
- assert(not match("(a(b ((c) (d)))", b))
1113
-
1114
- b = compile[[ balanced <- "(" ([^()] / balanced)* ")" ]]
1115
- assert(b == m.P(b))
1116
- assert(b:match"((((a))(b)))")
1117
-
1118
- local g = [[
1119
- S <- "0" B / "1" A / "" -- balanced strings
1120
- A <- "0" S / "1" A A -- one more 0
1121
- B <- "1" S / "0" B B -- one more 1
1122
- ]]
1123
- assert(match("00011011", g) == 9)
1124
-
1125
- local g = [[
1126
- S <- ("0" B / "1" A)*
1127
- A <- "0" / "1" A A
1128
- B <- "1" / "0" B B
1129
- ]]
1130
- assert(match("00011011", g) == 9)
1131
- assert(match("000110110", g) == 9)
1132
- assert(match("011110110", g) == 3)
1133
- assert(match("000110010", g) == 1)
1134
-
1135
- s = "aaaaaaaaaaaaaaaaaaaaaaaa"
1136
- assert(match(s, "'a'^3") == 4)
1137
- assert(match(s, "'a'^0") == 1)
1138
- assert(match(s, "'a'^+3") == s:len() + 1)
1139
- assert(not match(s, "'a'^+30"))
1140
- assert(match(s, "'a'^-30") == s:len() + 1)
1141
- assert(match(s, "'a'^-5") == 6)
1142
- for i = 1, s:len() do
1143
- assert(match(s, string.format("'a'^+%d", i)) >= i + 1)
1144
- assert(match(s, string.format("'a'^-%d", i)) <= i + 1)
1145
- assert(match(s, string.format("'a'^%d", i)) == i + 1)
1146
- end
1147
- assert(match("01234567890123456789", "[0-9]^3+") == 19)
1148
-
1149
-
1150
- assert(match("01234567890123456789", "({....}{...}) -> '%2%1'") == "4560123")
1151
- t = match("0123456789", "{| {.}* |}")
1152
- checkeq(t, {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"})
1153
- assert(match("012345", "{| (..) -> '%0%0' |}")[1] == "0101")
1154
-
1155
- assert(match("abcdef", "( {.} {.} {.} {.} {.} ) -> 3") == "c")
1156
- assert(match("abcdef", "( {:x: . :} {.} {.} {.} {.} ) -> 3") == "d")
1157
- assert(match("abcdef", "( {:x: . :} {.} {.} {.} {.} ) -> 0") == 6)
1158
-
1159
- assert(not match("abcdef", "{:x: ({.} {.} {.}) -> 2 :} =x"))
1160
- assert(match("abcbef", "{:x: ({.} {.} {.}) -> 2 :} =x"))
1161
-
1162
- eqcharset(compile"[]]", "]")
1163
- eqcharset(compile"[][]", m.S"[]")
1164
- eqcharset(compile"[]-]", m.S"-]")
1165
- eqcharset(compile"[-]", m.S"-")
1166
- eqcharset(compile"[az-]", m.S"a-z")
1167
- eqcharset(compile"[-az]", m.S"a-z")
1168
- eqcharset(compile"[a-z]", m.R"az")
1169
- eqcharset(compile"[]['\"]", m.S[[]['"]])
1170
-
1171
- eqcharset(compile"[^]]", any - "]")
1172
- eqcharset(compile"[^][]", any - m.S"[]")
1173
- eqcharset(compile"[^]-]", any - m.S"-]")
1174
- eqcharset(compile"[^]-]", any - m.S"-]")
1175
- eqcharset(compile"[^-]", any - m.S"-")
1176
- eqcharset(compile"[^az-]", any - m.S"a-z")
1177
- eqcharset(compile"[^-az]", any - m.S"a-z")
1178
- eqcharset(compile"[^a-z]", any - m.R"az")
1179
- eqcharset(compile"[^]['\"]", any - m.S[[]['"]])
1180
-
1181
- -- tests for comments in 're'
1182
- e = compile[[
1183
- A <- _B -- \t \n %nl .<> <- -> --
1184
- _B <- 'x' --]]
1185
- assert(e:match'xy' == 2)
1186
-
1187
- -- tests for 're' with pre-definitions
1188
- defs = {digits = m.R"09", letters = m.R"az", _=m.P"__"}
1189
- e = compile("%letters (%letters / %digits)*", defs)
1190
- assert(e:match"x123" == 5)
1191
- e = compile("%_", defs)
1192
- assert(e:match"__" == 3)
1193
-
1194
- e = compile([[
1195
- S <- A+
1196
- A <- %letters+ B
1197
- B <- %digits+
1198
- ]], defs)
1199
-
1200
- e = compile("{[0-9]+'.'?[0-9]*} -> sin", math)
1201
- assert(e:match("2.34") == math.sin(2.34))
1202
-
1203
-
1204
- function eq (_, _, a, b) return a == b end
1205
-
1206
- c = re.compile([[
1207
- longstring <- '[' {:init: '='* :} '[' close
1208
- close <- ']' =init ']' / . close
1209
- ]])
1210
-
1211
- assert(c:match'[==[]]===]]]]==]===[]' == 17)
1212
- assert(c:match'[[]=]====]=]]]==]===[]' == 14)
1213
- assert(not c:match'[[]=]====]=]=]==]===[]')
1214
-
1215
- c = re.compile" '[' {:init: '='* :} '[' (!(']' =init ']') .)* ']' =init ']' !. "
1216
-
1217
- assert(c:match'[==[]]===]]]]==]')
1218
- assert(c:match'[[]=]====]=][]==]===[]]')
1219
- assert(not c:match'[[]=]====]=]=]==]===[]')
1220
-
1221
- assert(re.find("hi alalo", "{:x:..:} =x") == 4)
1222
- assert(re.find("hi alalo", "{:x:..:} =x", 4) == 4)
1223
- assert(not re.find("hi alalo", "{:x:..:} =x", 5))
1224
- assert(re.find("hi alalo", "{'al'}", 5) == 6)
1225
- assert(re.find("hi aloalolo", "{:x:..:} =x") == 8)
1226
- assert(re.find("alo alohi x x", "{:word:%w+:}%W*(=word)!%w") == 11)
1227
-
1228
- -- re.find discards any captures
1229
- local a,b,c = re.find("alo", "{.}{'o'}")
1230
- assert(a == 2 and b == 3 and c == nil)
1231
-
1232
- local function match (s,p)
1233
- local i,e = re.find(s,p)
1234
- if i then return s:sub(i, e) end
1235
- end
1236
- assert(match("alo alo", '[a-z]+') == "alo")
1237
- assert(match("alo alo", '{:x: [a-z]+ :} =x') == nil)
1238
- assert(match("alo alo", "{:x: [a-z]+ :} ' ' =x") == "alo alo")
1239
-
1240
- assert(re.gsub("alo alo", "[abc]", "x") == "xlo xlo")
1241
- assert(re.gsub("alo alo", "%w+", ".") == ". .")
1242
- assert(re.gsub("hi, how are you", "[aeiou]", string.upper) ==
1243
- "hI, hOw ArE yOU")
1244
-
1245
- s = 'hi [[a comment[=]=] ending here]] and [=[another]]=]]'
1246
- c = re.compile" '[' {:i: '='* :} '[' (!(']' =i ']') .)* ']' { =i } ']' "
1247
- assert(re.gsub(s, c, "%2") == 'hi and =]')
1248
- assert(re.gsub(s, c, "%0") == s)
1249
- assert(re.gsub('[=[hi]=]', c, "%2") == '=')
1250
-
1251
- assert(re.find("", "!.") == 1)
1252
- assert(re.find("alo", "!.") == 4)
1253
-
1254
- function addtag (s, i, t, tag) t.tag = tag; return i, t end
1255
-
1256
- c = re.compile([[
1257
- doc <- block !.
1258
- block <- (start {| (block / { [^<]+ })* |} end?) => addtag
1259
- start <- '<' {:tag: [a-z]+ :} '>'
1260
- end <- '</' { =tag } '>'
1261
- ]], {addtag = addtag})
1262
-
1263
- x = c:match[[
1264
- <x>hi<b>hello</b>but<b>totheend</x>]]
1265
- checkeq(x, {tag='x', 'hi', {tag = 'b', 'hello'}, 'but',
1266
- {'totheend'}})
1267
-
1268
-
1269
- -- tests for look-ahead captures
1270
- x = {re.match("alo", "&(&{.}) !{'b'} {&(...)} &{..} {...} {!.}")}
1271
- checkeq(x, {"", "alo", ""})
1272
-
1273
- assert(re.match("aloalo",
1274
- "{~ (((&'al' {.}) -> 'A%1' / (&%l {.}) -> '%1%1') / .)* ~}")
1275
- == "AallooAalloo")
1276
-
1277
- -- bug in 0.9 (and older versions), due to captures in look-aheads
1278
- x = re.compile[[ {~ (&(. ([a-z]* -> '*')) ([a-z]+ -> '+') ' '*)* ~} ]]
1279
- assert(x:match"alo alo" == "+ +")
1280
-
1281
- -- valid capture in look-ahead (used inside the look-ahead itself)
1282
- x = re.compile[[
1283
- S <- &({:two: .. :} . =two) {[a-z]+} / . S
1284
- ]]
1285
- assert(x:match("hello aloaLo aloalo xuxu") == "aloalo")
1286
-
1287
-
1288
- p = re.compile[[
1289
- block <- {| {:ident:space*:} line
1290
- ((=ident !space line) / &(=ident space) block)* |}
1291
- line <- {[^%nl]*} %nl
1292
- space <- '_' -- should be ' ', but '_' is simpler for editors
1293
- ]]
1294
-
1295
- t= p:match[[
1296
- 1
1297
- __1.1
1298
- __1.2
1299
- ____1.2.1
1300
- ____
1301
- 2
1302
- __2.1
1303
- ]]
1304
- checkeq(t, {"1", {"1.1", "1.2", {"1.2.1", "", ident = "____"}, ident = "__"},
1305
- "2", {"2.1", ident = "__"}, ident = ""})
1306
-
1307
-
1308
- -- nested grammars
1309
- p = re.compile[[
1310
- s <- a b !.
1311
- b <- ( x <- ('b' x)? )
1312
- a <- ( x <- 'a' x? )
1313
- ]]
1314
-
1315
- assert(p:match'aaabbb')
1316
- assert(p:match'aaa')
1317
- assert(not p:match'bbb')
1318
- assert(not p:match'aaabbba')
1319
-
1320
- -- testing groups
1321
- t = {re.match("abc", "{:S <- {:.:} {S} / '':}")}
1322
- checkeq(t, {"a", "bc", "b", "c", "c", ""})
1323
-
1324
- t = re.match("1234", "{| {:a:.:} {:b:.:} {:c:.{.}:} |}")
1325
- checkeq(t, {a="1", b="2", c="4"})
1326
- t = re.match("1234", "{|{:a:.:} {:b:{.}{.}:} {:c:{.}:}|}")
1327
- checkeq(t, {a="1", b="2", c="4"})
1328
- t = re.match("12345", "{| {:.:} {:b:{.}{.}:} {:{.}{.}:} |}")
1329
- checkeq(t, {"1", b="2", "4", "5"})
1330
- t = re.match("12345", "{| {:.:} {:{:b:{.}{.}:}:} {:{.}{.}:} |}")
1331
- checkeq(t, {"1", "23", "4", "5"})
1332
- t = re.match("12345", "{| {:.:} {{:b:{.}{.}:}} {:{.}{.}:} |}")
1333
- checkeq(t, {"1", "23", "4", "5"})
1334
-
1335
-
1336
- -- testing pre-defined names
1337
- assert(os.setlocale("C") == "C")
1338
-
1339
- function eqlpeggsub (p1, p2)
1340
- local s1 = cs2str(re.compile(p1))
1341
- local s2 = string.gsub(allchar, "[^" .. p2 .. "]", "")
1342
- -- if s1 ~= s2 then print(#s1,#s2) end
1343
- assert(s1 == s2)
1344
- end
1345
-
1346
-
1347
- eqlpeggsub("%w", "%w")
1348
- eqlpeggsub("%a", "%a")
1349
- eqlpeggsub("%l", "%l")
1350
- eqlpeggsub("%u", "%u")
1351
- eqlpeggsub("%p", "%p")
1352
- eqlpeggsub("%d", "%d")
1353
- eqlpeggsub("%x", "%x")
1354
- eqlpeggsub("%s", "%s")
1355
- eqlpeggsub("%c", "%c")
1356
-
1357
- eqlpeggsub("%W", "%W")
1358
- eqlpeggsub("%A", "%A")
1359
- eqlpeggsub("%L", "%L")
1360
- eqlpeggsub("%U", "%U")
1361
- eqlpeggsub("%P", "%P")
1362
- eqlpeggsub("%D", "%D")
1363
- eqlpeggsub("%X", "%X")
1364
- eqlpeggsub("%S", "%S")
1365
- eqlpeggsub("%C", "%C")
1366
-
1367
- eqlpeggsub("[%w]", "%w")
1368
- eqlpeggsub("[_%w]", "_%w")
1369
- eqlpeggsub("[^%w]", "%W")
1370
- eqlpeggsub("[%W%S]", "%W%S")
1371
-
1372
- re.updatelocale()
1373
-
1374
-
1375
- -- testing nested substitutions x string captures
1376
-
1377
- p = re.compile[[
1378
- text <- {~ item* ~}
1379
- item <- macro / [^()] / '(' item* ')'
1380
- arg <- ' '* {~ (!',' item)* ~}
1381
- args <- '(' arg (',' arg)* ')'
1382
- macro <- ('apply' args) -> '%1(%2)'
1383
- / ('add' args) -> '%1 + %2'
1384
- / ('mul' args) -> '%1 * %2'
1385
- ]]
1386
-
1387
- assert(p:match"add(mul(a,b), apply(f,x))" == "a * b + f(x)")
1388
-
1389
- rev = re.compile[[ R <- (!.) -> '' / ({.} R) -> '%2%1']]
1390
-
1391
- assert(rev:match"0123456789" == "9876543210")
1392
-
1393
-
1394
- -- testing error messages in re
1395
-
1396
- local function errmsg (p, err)
1397
- checkerr(err, re.compile, p)
1398
- end
1399
-
1400
- errmsg('aaaa', "rule 'aaaa'")
1401
- errmsg('a', 'outside')
1402
- errmsg('b <- a', 'undefined')
1403
- errmsg("x <- 'a' x <- 'b'", 'already defined')
1404
- errmsg("'a' -", "near '-'")
1405
-
1406
-
1407
- print"OK"
1408
-
1409
-