immunio 1.1.13 → 1.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,13 @@
1
1
  MODULES := lib/hooks/xss
2
2
 
3
- LUA_SRC += \
3
+
4
+ LUA_PROTECT_SRC += \
5
+ lib/hooks/file_io.lua \
6
+ lib/hooks/framework_redirect.lua \
7
+ lib/hooks/template_render_done.lua \
8
+ lib/hooks/sql_execute.lua \
9
+
10
+ LUA_BASE_SRC += \
4
11
  lib/hooks/authenticate.lua \
5
12
  lib/hooks/bad_cookie.lua \
6
13
  lib/hooks/custom_event.lua \
@@ -8,24 +15,22 @@ LUA_SRC += \
8
15
  lib/hooks/encode.lua \
9
16
  lib/hooks/eval.lua \
10
17
  lib/hooks/exception.lua \
11
- lib/hooks/file_io.lua \
12
18
  lib/hooks/framework_csrf_check.lua \
13
19
  lib/hooks/framework_login.lua \
14
20
  lib/hooks/framework_password_reset.lua \
15
21
  lib/hooks/framework_account_created.lua \
16
- lib/hooks/framework_redirect.lua \
17
22
  lib/hooks/framework_session.lua \
18
23
  lib/hooks/framework_user.lua \
19
24
  lib/hooks/framework_route.lua \
20
25
  lib/hooks/framework_bad_response_header.lua \
26
+ lib/hooks/framework_input_params.lua \
27
+ lib/hooks/get_telemetry_config.lua \
28
+ lib/hooks/headers/header_validation.lua \
29
+ lib/hooks/headers/useragent.lua \
21
30
  lib/hooks/http_request_finish.lua \
22
31
  lib/hooks/http_request_start.lua \
23
32
  lib/hooks/http_response_start.lua \
24
33
  lib/hooks/mongodb_execute.lua \
25
34
  lib/hooks/should_report.lua \
26
- lib/hooks/sql_execute.lua \
27
- lib/hooks/template_render_done.lua \
28
- lib/hooks/xss/escape.lua \
29
- lib/hooks/xss/escape_js.lua \
30
35
 
31
36
  include $(patsubst %, %/module.mk,$(MODULES))
@@ -1,4 +1,4 @@
1
- LUA_SRC += \
1
+ LUA_PROTECT_SRC += \
2
2
  lib/hooks/xss/escape.lua \
3
3
  lib/hooks/xss/escape_js.lua \
4
- lib/hooks/xss/html_const.lua
4
+ lib/hooks/xss/html_const.lua
@@ -1,5 +1,4 @@
1
- LUA_SRC += \
2
- lib/lexers/bash_dqstr.lua \
1
+ LUA_BASE_SRC += \
3
2
  lib/lexers/bash.lua \
4
3
  lib/lexers/css_attr.lua \
5
4
  lib/lexers/css.lua \
@@ -7,4 +6,4 @@ LUA_SRC += \
7
6
  lib/lexers/html_entities.lua \
8
7
  lib/lexers/html_entities_ws.lua \
9
8
  lib/lexers/javascript.lua \
10
- lib/lexers/markers.lua
9
+ lib/lexers/markers.lua
@@ -1,6 +1,6 @@
1
- MODULES := lib/hooks lib/lexers lib/schema
1
+ MODULES := lib/hooks lib/lexers
2
2
 
3
- LUA_SRC += \
3
+ LUA_BASE_SRC += \
4
4
  lib/base64.lua \
5
5
  lib/bit.lua \
6
6
  lib/cookie.lua \
@@ -9,12 +9,17 @@ LUA_SRC += \
9
9
  lib/date.lua \
10
10
  lib/defence.lua \
11
11
  lib/diag.lua \
12
+ lib/dkjson.lua \
12
13
  lib/extensions.lua \
13
14
  lib/globtopattern.lua \
15
+ lib/hkdf.lua \
14
16
  lib/hmac.lua \
15
17
  lib/hooks.lua \
16
18
  lib/idn.lua \
19
+ lib/immunio-schemas/immunio_schemas/schemas/request_schema.lua \
20
+ lib/immunio-schemas/immunio_schemas/schemas/validation.lua \
17
21
  lib/ip.lua \
22
+ lib/learn.lua \
18
23
  lib/lexgraph.lua \
19
24
  lib/lexer.lua \
20
25
  lib/lru.lua \
@@ -31,7 +36,6 @@ LUA_SRC += \
31
36
  lib/snap.lua \
32
37
  lib/term.lua \
33
38
  lib/tracking.lua \
34
- lib/useragent.lua \
35
39
  lib/utils.lua \
36
40
  lib/verb_tamper.lua
37
41
 
data/lua-hooks/options.mk CHANGED
@@ -13,6 +13,11 @@ else
13
13
  HOST_SYS= Windows
14
14
  HOST_MSYS= mingw
15
15
  endif
16
+ ifneq (,$(findstring MSYS,$(HOST_SYS)))
17
+ # MSYS is an alias for MINGW
18
+ HOST_SYS= Windows
19
+ HOST_MSYS= mingw
20
+ endif
16
21
  ifneq (,$(findstring CYGWIN,$(HOST_SYS)))
17
22
  HOST_SYS= Windows
18
23
  HOST_MSYS= cygwin
@@ -23,6 +28,7 @@ else
23
28
  endif
24
29
  endif
25
30
 
31
+ TARGET_SYS ?= $(HOST_SYS)
26
32
  CROSS =
27
33
  CC = $(CROSS)cc
28
34
  AR = $(CROSS)ar
@@ -47,13 +53,13 @@ endif
47
53
 
48
54
 
49
55
  XCFLAGS =
50
- CFLAGS = -DLUA_USE_APICHECK -DLUAJIT -Dlua_assert=assert -Wall -fPIC ${INCS} ${XCFLAGS}
56
+ CFLAGS = -DLUA_USE_APICHECK -DLUAJIT -Dlua_assert=assert -Wall -fPIC -fstack-protector ${INCS} ${XCFLAGS}
51
57
  CXXFLAGS = -std=c++11 ${CFLAGS}
52
- LDFLAGS =
58
+ LDFLAGS =
53
59
 
54
60
 
55
61
  LUAJIT_XCFLAGS = -fPIC
56
- ifeq (${HOST_SYS}, Darwin)
62
+ ifeq (${TARGET_SYS}, Darwin)
57
63
  # Disable the JIT on OS X
58
64
  LUAJIT_XCFLAGS += -DLUAJIT_ENABLE_GC64
59
65
  endif
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: immunio
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.13
4
+ version: 1.1.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Immunio
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-20 00:00:00.000000000 Z
11
+ date: 2017-05-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -58,14 +58,14 @@ dependencies:
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '0.5'
61
+ version: 1.1.0
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '0.5'
68
+ version: 1.1.0
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: faraday
71
71
  requirement: !ruby/object:Gem::Requirement
@@ -196,7 +196,6 @@ files:
196
196
  - lua-hooks/ext/lpeg/makefile
197
197
  - lua-hooks/ext/lpeg/module.mk
198
198
  - lua-hooks/ext/lpeg/re.html
199
- - lua-hooks/ext/lpeg/test.lua
200
199
  - lua-hooks/ext/lua-cmsgpack/.gitignore
201
200
  - lua-hooks/ext/lua-cmsgpack/CMakeLists.txt
202
201
  - lua-hooks/ext/lua-cmsgpack/README.md
@@ -443,7 +442,6 @@ files:
443
442
  - lua-hooks/lib/hooks/xss/module.mk
444
443
  - lua-hooks/lib/lexers/module.mk
445
444
  - lua-hooks/lib/module.mk
446
- - lua-hooks/lib/schema/module.mk
447
445
  - lua-hooks/options.mk
448
446
  homepage: http://immun.io/
449
447
  licenses:
@@ -466,8 +464,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
466
464
  version: '0'
467
465
  requirements: []
468
466
  rubyforge_project:
469
- rubygems_version: 2.6.10
467
+ rubygems_version: 2.6.11
470
468
  signing_key:
471
469
  specification_version: 4
472
470
  summary: Immunio Ruby agent
473
471
  test_files: []
472
+ has_rdoc:
@@ -1,1409 +0,0 @@
1
- #!/usr/bin/env lua5.1
2
-
3
- -- $Id: test.lua,v 1.105 2014/12/12 17:00:39 roberto Exp $
4
-
5
- -- require"strict" -- just to be pedantic
6
-
7
- local m = require"lpeg"
8
-
9
-
10
- -- for general use
11
- local a, b, c, d, e, f, g, p, t
12
-
13
-
14
- -- compatibility with Lua 5.2
15
- local unpack = rawget(table, "unpack") or unpack
16
- local loadstring = rawget(_G, "loadstring") or load
17
-
18
-
19
- -- most tests here do not need much stack space
20
- m.setmaxstack(5)
21
-
22
- local any = m.P(1)
23
- local space = m.S" \t\n"^0
24
-
25
- local function checkeq (x, y, p)
26
- if p then print(x,y) end
27
- if type(x) ~= "table" then assert(x == y)
28
- else
29
- for k,v in pairs(x) do checkeq(v, y[k], p) end
30
- for k,v in pairs(y) do checkeq(v, x[k], p) end
31
- end
32
- end
33
-
34
-
35
- local mt = getmetatable(m.P(1))
36
-
37
-
38
- local allchar = {}
39
- for i=0,255 do allchar[i + 1] = i end
40
- allchar = string.char(unpack(allchar))
41
- assert(#allchar == 256)
42
-
43
- local function cs2str (c)
44
- return m.match(m.Cs((c + m.P(1)/"")^0), allchar)
45
- end
46
-
47
- local function eqcharset (c1, c2)
48
- assert(cs2str(c1) == cs2str(c2))
49
- end
50
-
51
-
52
- print"General tests for LPeg library"
53
-
54
- assert(type(m.version()) == "string")
55
- print("version " .. m.version())
56
- assert(m.type("alo") ~= "pattern")
57
- assert(m.type(io.input) ~= "pattern")
58
- assert(m.type(m.P"alo") == "pattern")
59
-
60
- -- tests for some basic optimizations
61
- assert(m.match(m.P(false) + "a", "a") == 2)
62
- assert(m.match(m.P(true) + "a", "a") == 1)
63
- assert(m.match("a" + m.P(false), "b") == nil)
64
- assert(m.match("a" + m.P(true), "b") == 1)
65
-
66
- assert(m.match(m.P(false) * "a", "a") == nil)
67
- assert(m.match(m.P(true) * "a", "a") == 2)
68
- assert(m.match("a" * m.P(false), "a") == nil)
69
- assert(m.match("a" * m.P(true), "a") == 2)
70
-
71
- assert(m.match(#m.P(false) * "a", "a") == nil)
72
- assert(m.match(#m.P(true) * "a", "a") == 2)
73
- assert(m.match("a" * #m.P(false), "a") == nil)
74
- assert(m.match("a" * #m.P(true), "a") == 2)
75
-
76
-
77
- -- tests for locale
78
- do
79
- assert(m.locale(m) == m)
80
- local t = {}
81
- assert(m.locale(t, m) == t)
82
- local x = m.locale()
83
- for n,v in pairs(x) do
84
- assert(type(n) == "string")
85
- eqcharset(v, m[n])
86
- end
87
- end
88
-
89
-
90
- assert(m.match(3, "aaaa"))
91
- assert(m.match(4, "aaaa"))
92
- assert(not m.match(5, "aaaa"))
93
- assert(m.match(-3, "aa"))
94
- assert(not m.match(-3, "aaa"))
95
- assert(not m.match(-3, "aaaa"))
96
- assert(not m.match(-4, "aaaa"))
97
- assert(m.P(-5):match"aaaa")
98
-
99
- assert(m.match("a", "alo") == 2)
100
- assert(m.match("al", "alo") == 3)
101
- assert(not m.match("alu", "alo"))
102
- assert(m.match(true, "") == 1)
103
-
104
- local digit = m.S"0123456789"
105
- local upper = m.S"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
106
- local lower = m.S"abcdefghijklmnopqrstuvwxyz"
107
- local letter = m.S"" + upper + lower
108
- local alpha = letter + digit + m.R()
109
-
110
- eqcharset(m.S"", m.P(false))
111
- eqcharset(upper, m.R("AZ"))
112
- eqcharset(lower, m.R("az"))
113
- eqcharset(upper + lower, m.R("AZ", "az"))
114
- eqcharset(upper + lower, m.R("AZ", "cz", "aa", "bb", "90"))
115
- eqcharset(digit, m.S"01234567" + "8" + "9")
116
- eqcharset(upper, letter - lower)
117
- eqcharset(m.S(""), m.R())
118
- assert(cs2str(m.S("")) == "")
119
-
120
- eqcharset(m.S"\0", "\0")
121
- eqcharset(m.S"\1\0\2", m.R"\0\2")
122
- eqcharset(m.S"\1\0\2", m.R"\1\2" + "\0")
123
- eqcharset(m.S"\1\0\2" - "\0", m.R"\1\2")
124
-
125
- local word = alpha^1 * (1 - alpha)^0
126
-
127
- assert((word^0 * -1):match"alo alo")
128
- assert(m.match(word^1 * -1, "alo alo"))
129
- assert(m.match(word^2 * -1, "alo alo"))
130
- assert(not m.match(word^3 * -1, "alo alo"))
131
-
132
- assert(not m.match(word^-1 * -1, "alo alo"))
133
- assert(m.match(word^-2 * -1, "alo alo"))
134
- assert(m.match(word^-3 * -1, "alo alo"))
135
-
136
- local eos = m.P(-1)
137
-
138
- assert(m.match(digit^0 * letter * digit * eos, "1298a1"))
139
- assert(not m.match(digit^0 * letter * eos, "1257a1"))
140
-
141
- b = {
142
- [1] = "(" * (((1 - m.S"()") + #m.P"(" * m.V(1))^0) * ")"
143
- }
144
-
145
- assert(m.match(b, "(al())()"))
146
- assert(not m.match(b * eos, "(al())()"))
147
- assert(m.match(b * eos, "((al())()(é))"))
148
- assert(not m.match(b, "(al()()"))
149
-
150
- assert(not m.match(letter^1 - "for", "foreach"))
151
- assert(m.match(letter^1 - ("for" * eos), "foreach"))
152
- assert(not m.match(letter^1 - ("for" * eos), "for"))
153
-
154
- function basiclookfor (p)
155
- return m.P {
156
- [1] = p + (1 * m.V(1))
157
- }
158
- end
159
-
160
- function caplookfor (p)
161
- return basiclookfor(p:C())
162
- end
163
-
164
- assert(m.match(caplookfor(letter^1), " 4achou123...") == "achou")
165
- a = {m.match(caplookfor(letter^1)^0, " two words, one more ")}
166
- checkeq(a, {"two", "words", "one", "more"})
167
-
168
- assert(m.match( basiclookfor((#m.P(b) * 1) * m.Cp()), " ( (a)") == 7)
169
-
170
- a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "123")}
171
- checkeq(a, {"123", "d"})
172
-
173
- -- bug in LPeg 0.12 (nil value does not create a 'ktable')
174
- assert(m.match(m.Cc(nil), "") == nil)
175
-
176
- a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "abcd")}
177
- checkeq(a, {"abcd", "l"})
178
-
179
- a = {m.match(m.Cc(10,20,30) * 'a' * m.Cp(), 'aaa')}
180
- checkeq(a, {10,20,30,2})
181
- a = {m.match(m.Cp() * m.Cc(10,20,30) * 'a' * m.Cp(), 'aaa')}
182
- checkeq(a, {1,10,20,30,2})
183
- a = m.match(m.Ct(m.Cp() * m.Cc(10,20,30) * 'a' * m.Cp()), 'aaa')
184
- checkeq(a, {1,10,20,30,2})
185
- a = m.match(m.Ct(m.Cp() * m.Cc(7,8) * m.Cc(10,20,30) * 'a' * m.Cp()), 'aaa')
186
- checkeq(a, {1,7,8,10,20,30,2})
187
- a = {m.match(m.Cc() * m.Cc() * m.Cc(1) * m.Cc(2,3,4) * m.Cc() * 'a', 'aaa')}
188
- checkeq(a, {1,2,3,4})
189
-
190
- a = {m.match(m.Cp() * letter^1 * m.Cp(), "abcd")}
191
- checkeq(a, {1, 5})
192
-
193
-
194
- t = {m.match({[1] = m.C(m.C(1) * m.V(1) + -1)}, "abc")}
195
- checkeq(t, {"abc", "a", "bc", "b", "c", "c", ""})
196
-
197
- -- bug in 0.12 ('hascapture' did not check for captures inside a rule)
198
- do
199
- local pat = m.P{
200
- 'S';
201
- S1 = m.C('abc') + 3,
202
- S = #m.V('S1') -- rule has capture, but '#' must ignore it
203
- }
204
- assert(pat:match'abc' == 1)
205
- end
206
-
207
-
208
- -- test for small capture boundary
209
- for i = 250,260 do
210
- assert(#m.match(m.C(i), string.rep('a', i)) == i)
211
- assert(#m.match(m.C(m.C(i)), string.rep('a', i)) == i)
212
- end
213
-
214
- -- tests for any*n and any*-n
215
- for n = 1, 550, 13 do
216
- local x_1 = string.rep('x', n - 1)
217
- local x = x_1 .. 'a'
218
- assert(not m.P(n):match(x_1))
219
- assert(m.P(n):match(x) == n + 1)
220
- assert(n < 4 or m.match(m.P(n) + "xxx", x_1) == 4)
221
- assert(m.C(n):match(x) == x)
222
- assert(m.C(m.C(n)):match(x) == x)
223
- assert(m.P(-n):match(x_1) == 1)
224
- assert(not m.P(-n):match(x))
225
- assert(n < 13 or m.match(m.Cc(20) * ((n - 13) * m.P(10)) * 3, x) == 20)
226
- local n3 = math.floor(n/3)
227
- assert(m.match(n3 * m.Cp() * n3 * n3, x) == n3 + 1)
228
- end
229
-
230
- -- true values
231
- assert(m.P(0):match("x") == 1)
232
- assert(m.P(0):match("") == 1)
233
- assert(m.C(0):match("x") == "")
234
-
235
- assert(m.match(m.Cc(0) * m.P(10) + m.Cc(1) * "xuxu", "xuxu") == 1)
236
- assert(m.match(m.Cc(0) * m.P(10) + m.Cc(1) * "xuxu", "xuxuxuxuxu") == 0)
237
- assert(m.match(m.C(m.P(2)^1), "abcde") == "abcd")
238
- p = m.Cc(0) * 1 + m.Cc(1) * 2 + m.Cc(2) * 3 + m.Cc(3) * 4
239
-
240
-
241
- -- test for alternation optimization
242
- assert(m.match(m.P"a"^1 + "ab" + m.P"x"^0, "ab") == 2)
243
- assert(m.match((m.P"a"^1 + "ab" + m.P"x"^0 * 1)^0, "ab") == 3)
244
- assert(m.match(m.P"ab" + "cd" + "" + "cy" + "ak", "98") == 1)
245
- assert(m.match(m.P"ab" + "cd" + "ax" + "cy", "ax") == 3)
246
- assert(m.match("a" * m.P"b"^0 * "c" + "cd" + "ax" + "cy", "ax") == 3)
247
- assert(m.match((m.P"ab" + "cd" + "ax" + "cy")^0, "ax") == 3)
248
- assert(m.match(m.P(1) * "x" + m.S"" * "xu" + "ay", "ay") == 3)
249
- assert(m.match(m.P"abc" + "cde" + "aka", "aka") == 4)
250
- assert(m.match(m.S"abc" * "x" + "cde" + "aka", "ax") == 3)
251
- assert(m.match(m.S"abc" * "x" + "cde" + "aka", "aka") == 4)
252
- assert(m.match(m.S"abc" * "x" + "cde" + "aka", "cde") == 4)
253
- assert(m.match(m.S"abc" * "x" + "ide" + m.S"ab" * "ka", "aka") == 4)
254
- assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "ax") == 3)
255
- assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "aka") == 4)
256
- assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "cde") == 4)
257
- assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "ide" + m.S"ab" * "ka", "aka") == 4)
258
- assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "ide" + m.S"ab" * "ka", "ax") == 3)
259
- assert(m.match(m.P(1) * "x" + "cde" + m.S"ab" * "ka", "aka") == 4)
260
- assert(m.match(m.P(1) * "x" + "cde" + m.P(1) * "ka", "aka") == 4)
261
- assert(m.match(m.P(1) * "x" + "cde" + m.P(1) * "ka", "cde") == 4)
262
- assert(m.match(m.P"eb" + "cd" + m.P"e"^0 + "x", "ee") == 3)
263
- assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "abcd") == 3)
264
- assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "eeex") == 4)
265
- assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "cd") == 3)
266
- assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "x") == 1)
267
- assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x" + "", "zee") == 1)
268
- assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "abcd") == 3)
269
- assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "eeex") == 4)
270
- assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "cd") == 3)
271
- assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "x") == 2)
272
- assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x" + "", "zee") == 1)
273
- assert(not m.match(("aa" * m.P"bc"^-1 + "aab") * "e", "aabe"))
274
-
275
- assert(m.match("alo" * (m.P"\n" + -1), "alo") == 4)
276
-
277
-
278
- -- bug in 0.12 (rc1)
279
- assert(m.match((m.P"\128\187\191" + m.S"abc")^0, "\128\187\191") == 4)
280
-
281
- assert(m.match(m.S"\0\128\255\127"^0, string.rep("\0\128\255\127", 10)) ==
282
- 4*10 + 1)
283
-
284
- -- optimizations with optional parts
285
- assert(m.match(("ab" * -m.P"c")^-1, "abc") == 1)
286
- assert(m.match(("ab" * #m.P"c")^-1, "abd") == 1)
287
- assert(m.match(("ab" * m.B"c")^-1, "ab") == 1)
288
- assert(m.match(("ab" * m.P"cd"^0)^-1, "abcdcdc") == 7)
289
-
290
- assert(m.match(m.P"ab"^-1 - "c", "abcd") == 3)
291
-
292
- p = ('Aa' * ('Bb' * ('Cc' * m.P'Dd'^0)^0)^0)^-1
293
- assert(p:match("AaBbCcDdBbCcDdDdDdBb") == 21)
294
-
295
-
296
- pi = "3.14159 26535 89793 23846 26433 83279 50288 41971 69399 37510"
297
- assert(m.match(m.Cs((m.P"1" / "a" + m.P"5" / "b" + m.P"9" / "c" + 1)^0), pi) ==
298
- m.match(m.Cs((m.P(1) / {["1"] = "a", ["5"] = "b", ["9"] = "c"})^0), pi))
299
- print"+"
300
-
301
-
302
- -- tests for capture optimizations
303
- assert(m.match((m.P(3) + 4 * m.Cp()) * "a", "abca") == 5)
304
- t = {m.match(((m.P"a" + m.Cp()) * m.P"x")^0, "axxaxx")}
305
- checkeq(t, {3, 6})
306
-
307
-
308
- -- tests for numbered captures
309
- p = m.C(1)
310
- assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 3, "abcdefgh") == "a")
311
- assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 1, "abcdefgh") == "abcdef")
312
- assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 4, "abcdefgh") == "bc")
313
- assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 0, "abcdefgh") == 7)
314
-
315
- a, b, c = m.match(p * (m.C(p * m.C(2)) * m.C(3) / 4) * p, "abcdefgh")
316
- assert(a == "a" and b == "efg" and c == "h")
317
-
318
- -- test for table captures
319
- t = m.match(m.Ct(letter^1), "alo")
320
- checkeq(t, {})
321
-
322
- t, n = m.match(m.Ct(m.C(letter)^1) * m.Cc"t", "alo")
323
- assert(n == "t" and table.concat(t) == "alo")
324
-
325
- t = m.match(m.Ct(m.C(m.C(letter)^1)), "alo")
326
- assert(table.concat(t, ";") == "alo;a;l;o")
327
-
328
- t = m.match(m.Ct(m.C(m.C(letter)^1)), "alo")
329
- assert(table.concat(t, ";") == "alo;a;l;o")
330
-
331
- t = m.match(m.Ct(m.Ct((m.Cp() * letter * m.Cp())^1)), "alo")
332
- assert(table.concat(t[1], ";") == "1;2;2;3;3;4")
333
-
334
- t = m.match(m.Ct(m.C(m.C(1) * 1 * m.C(1))), "alo")
335
- checkeq(t, {"alo", "a", "o"})
336
-
337
-
338
- -- tests for groups
339
- p = m.Cg(1) -- no capture
340
- assert(p:match('x') == 'x')
341
- p = m.Cg(m.P(true)/function () end * 1) -- no value
342
- assert(p:match('x') == 'x')
343
- p = m.Cg(m.Cg(m.Cg(m.C(1))))
344
- assert(p:match('x') == 'x')
345
- p = m.Cg(m.Cg(m.Cg(m.C(1))^0) * m.Cg(m.Cc(1) * m.Cc(2)))
346
- t = {p:match'abc'}
347
- checkeq(t, {'a', 'b', 'c', 1, 2})
348
-
349
- p = m.Ct(m.Cg(m.Cc(10), "hi") * m.C(1)^0 * m.Cg(m.Cc(20), "ho"))
350
- t = p:match''
351
- checkeq(t, {hi = 10, ho = 20})
352
- t = p:match'abc'
353
- checkeq(t, {hi = 10, ho = 20, 'a', 'b', 'c'})
354
-
355
-
356
- -- test for error messages
357
- local function checkerr (msg, f, ...)
358
- local st, err = pcall(f, ...)
359
- assert(not st and m.match({ m.P(msg) + 1 * m.V(1) }, err))
360
- end
361
-
362
- checkerr("rule '1' may be left recursive", m.match, { m.V(1) * 'a' }, "a")
363
- checkerr("rule '1' used outside a grammar", m.match, m.V(1), "")
364
- checkerr("rule 'hiii' used outside a grammar", m.match, m.V('hiii'), "")
365
- checkerr("rule 'hiii' undefined in given grammar", m.match, { m.V('hiii') }, "")
366
- checkerr("undefined in given grammar", m.match, { m.V{} }, "")
367
-
368
- checkerr("rule 'A' is not a pattern", m.P, { m.P(1), A = {} })
369
- checkerr("grammar has no initial rule", m.P, { [print] = {} })
370
-
371
- -- grammar with a long call chain before left recursion
372
- p = {'a',
373
- a = m.V'b' * m.V'c' * m.V'd' * m.V'a',
374
- b = m.V'c',
375
- c = m.V'd',
376
- d = m.V'e',
377
- e = m.V'f',
378
- f = m.V'g',
379
- g = m.P''
380
- }
381
- checkerr("rule 'a' may be left recursive", m.match, p, "a")
382
-
383
- -- Bug in peephole optimization of LPeg 0.12 (IJmp -> ICommit)
384
- -- the next grammar has an original sequence IJmp -> ICommit -> IJmp L1
385
- -- that is optimized to ICommit L1
386
-
387
- p = m.P { (m.P {m.P'abc'} + 'ayz') * m.V'y'; y = m.P'x' }
388
- assert(p:match('abcx') == 5 and p:match('ayzx') == 5 and not p:match'abc')
389
-
390
-
391
- -- tests for non-pattern as arguments to pattern functions
392
-
393
- p = { ('a' * m.V(1))^-1 } * m.P'b' * { 'a' * m.V(2); m.V(1)^-1 }
394
- assert(m.match(p, "aaabaac") == 7)
395
-
396
- p = m.P'abc' * 2 * -5 * true * 'de' -- mix of numbers and strings and booleans
397
-
398
- assert(p:match("abc01de") == 8)
399
- assert(p:match("abc01de3456") == nil)
400
-
401
- p = 'abc' * (2 * (-5 * (true * m.P'de')))
402
-
403
- assert(p:match("abc01de") == 8)
404
- assert(p:match("abc01de3456") == nil)
405
-
406
- p = { m.V(2), m.P"abc" } *
407
- (m.P{ "xx", xx = m.P"xx" } + { "x", x = m.P"a" * m.V"x" + "" })
408
- assert(p:match("abcaaaxx") == 7)
409
- assert(p:match("abcxx") == 6)
410
-
411
-
412
- -- a large table capture
413
- t = m.match(m.Ct(m.C('a')^0), string.rep("a", 10000))
414
- assert(#t == 10000 and t[1] == 'a' and t[#t] == 'a')
415
-
416
- print('+')
417
-
418
-
419
- -- bug in 0.10 (rechecking a grammar, after tail-call optimization)
420
- m.P{ m.P { (m.P(3) + "xuxu")^0 * m.V"xuxu", xuxu = m.P(1) } }
421
-
422
- local V = m.V
423
-
424
- local Space = m.S(" \n\t")^0
425
- local Number = m.C(m.R("09")^1) * Space
426
- local FactorOp = m.C(m.S("+-")) * Space
427
- local TermOp = m.C(m.S("*/")) * Space
428
- local Open = "(" * Space
429
- local Close = ")" * Space
430
-
431
-
432
- local function f_factor (v1, op, v2, d)
433
- assert(d == nil)
434
- if op == "+" then return v1 + v2
435
- else return v1 - v2
436
- end
437
- end
438
-
439
-
440
- local function f_term (v1, op, v2, d)
441
- assert(d == nil)
442
- if op == "*" then return v1 * v2
443
- else return v1 / v2
444
- end
445
- end
446
-
447
- G = m.P{ "Exp",
448
- Exp = m.Cf(V"Factor" * m.Cg(FactorOp * V"Factor")^0, f_factor);
449
- Factor = m.Cf(V"Term" * m.Cg(TermOp * V"Term")^0, f_term);
450
- Term = Number / tonumber + Open * V"Exp" * Close;
451
- }
452
-
453
- G = Space * G * -1
454
-
455
- for _, s in ipairs{" 3 + 5*9 / (1+1) ", "3+4/2", "3+3-3- 9*2+3*9/1- 8"} do
456
- assert(m.match(G, s) == loadstring("return "..s)())
457
- end
458
-
459
-
460
- -- test for grammars (errors deep in calling non-terminals)
461
- g = m.P{
462
- [1] = m.V(2) + "a",
463
- [2] = "a" * m.V(3) * "x",
464
- [3] = "b" * m.V(3) + "c"
465
- }
466
-
467
- assert(m.match(g, "abbbcx") == 7)
468
- assert(m.match(g, "abbbbx") == 2)
469
-
470
-
471
- -- tests for \0
472
- assert(m.match(m.R("\0\1")^1, "\0\1\0") == 4)
473
- assert(m.match(m.S("\0\1ab")^1, "\0\1\0a") == 5)
474
- assert(m.match(m.P(1)^3, "\0\1\0a") == 5)
475
- assert(not m.match(-4, "\0\1\0a"))
476
- assert(m.match("\0\1\0a", "\0\1\0a") == 5)
477
- assert(m.match("\0\0\0", "\0\0\0") == 4)
478
- assert(not m.match("\0\0\0", "\0\0"))
479
-
480
-
481
- -- tests for predicates
482
- assert(not m.match(-m.P("a") * 2, "alo"))
483
- assert(m.match(- -m.P("a") * 2, "alo") == 3)
484
- assert(m.match(#m.P("a") * 2, "alo") == 3)
485
- assert(m.match(##m.P("a") * 2, "alo") == 3)
486
- assert(not m.match(##m.P("c") * 2, "alo"))
487
- assert(m.match(m.Cs((##m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
488
- assert(m.match(m.Cs((#((#m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
489
- assert(m.match(m.Cs((- -m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
490
- assert(m.match(m.Cs((-((-m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
491
-
492
- p = -m.P'a' * m.Cc(1) + -m.P'b' * m.Cc(2) + -m.P'c' * m.Cc(3)
493
- assert(p:match('a') == 2 and p:match('') == 1 and p:match('b') == 1)
494
-
495
- p = -m.P'a' * m.Cc(10) + #m.P'a' * m.Cc(20)
496
- assert(p:match('a') == 20 and p:match('') == 10 and p:match('b') == 10)
497
-
498
-
499
-
500
- -- look-behind predicate
501
- assert(not m.match(m.B'a', 'a'))
502
- assert(m.match(1 * m.B'a', 'a') == 2)
503
- assert(not m.match(m.B(1), 'a'))
504
- assert(m.match(1 * m.B(1), 'a') == 2)
505
- assert(m.match(-m.B(1), 'a') == 1)
506
- assert(m.match(m.B(250), string.rep('a', 250)) == nil)
507
- assert(m.match(250 * m.B(250), string.rep('a', 250)) == 251)
508
-
509
- -- look-behind with an open call
510
- checkerr("pattern may not have fixed length", m.B, m.V'S1')
511
- checkerr("too long to look behind", m.B, 260)
512
-
513
- B = #letter * -m.B(letter) + -letter * m.B(letter)
514
- x = m.Ct({ (B * m.Cp())^-1 * (1 * m.V(1) + m.P(true)) })
515
- checkeq(m.match(x, 'ar cal c'), {1,3,4,7,9,10})
516
- checkeq(m.match(x, ' ar cal '), {2,4,5,8})
517
- checkeq(m.match(x, ' '), {})
518
- checkeq(m.match(x, 'aloalo'), {1,7})
519
-
520
- assert(m.match(B, "a") == 1)
521
- assert(m.match(1 * B, "a") == 2)
522
- assert(not m.B(1 - letter):match(""))
523
- assert((-m.B(letter)):match("") == 1)
524
-
525
- assert((4 * m.B(letter, 4)):match("aaaaaaaa") == 5)
526
- assert(not (4 * m.B(#letter * 5)):match("aaaaaaaa"))
527
- assert((4 * -m.B(#letter * 5)):match("aaaaaaaa") == 5)
528
-
529
- -- look-behind with grammars
530
- assert(m.match('a' * m.B{'x', x = m.P(3)}, 'aaa') == nil)
531
- assert(m.match('aa' * m.B{'x', x = m.P('aaa')}, 'aaaa') == nil)
532
- assert(m.match('aaa' * m.B{'x', x = m.P('aaa')}, 'aaaaa') == 4)
533
-
534
-
535
-
536
- -- bug in 0.9
537
- assert(m.match(('a' * #m.P'b'), "ab") == 2)
538
- assert(not m.match(('a' * #m.P'b'), "a"))
539
-
540
- assert(not m.match(#m.S'567', ""))
541
- assert(m.match(#m.S'567' * 1, "6") == 2)
542
-
543
-
544
- -- tests for Tail Calls
545
-
546
- p = m.P{ 'a' * m.V(1) + '' }
547
- assert(p:match(string.rep('a', 1000)) == 1001)
548
-
549
- -- create a grammar for a simple DFA for even number of 0s and 1s
550
- --
551
- -- ->1 <---0---> 2
552
- -- ^ ^
553
- -- | |
554
- -- 1 1
555
- -- | |
556
- -- V V
557
- -- 3 <---0---> 4
558
- --
559
- -- this grammar should keep no backtracking information
560
-
561
- p = m.P{
562
- [1] = '0' * m.V(2) + '1' * m.V(3) + -1,
563
- [2] = '0' * m.V(1) + '1' * m.V(4),
564
- [3] = '0' * m.V(4) + '1' * m.V(1),
565
- [4] = '0' * m.V(3) + '1' * m.V(2),
566
- }
567
-
568
- assert(p:match(string.rep("00", 10000)))
569
- assert(p:match(string.rep("01", 10000)))
570
- assert(p:match(string.rep("011", 10000)))
571
- assert(not p:match(string.rep("011", 10000) .. "1"))
572
- assert(not p:match(string.rep("011", 10001)))
573
-
574
-
575
- -- this grammar does need backtracking info.
576
- local lim = 10000
577
- p = m.P{ '0' * m.V(1) + '0' }
578
- checkerr("too many pending", m.match, p, string.rep("0", lim))
579
- m.setmaxstack(2*lim)
580
- checkerr("too many pending", m.match, p, string.rep("0", lim))
581
- m.setmaxstack(2*lim + 4)
582
- assert(m.match(p, string.rep("0", lim)) == lim + 1)
583
-
584
- -- this repetition should not need stack space (only the call does)
585
- p = m.P{ ('a' * m.V(1))^0 * 'b' + 'c' }
586
- m.setmaxstack(200)
587
- assert(p:match(string.rep('a', 180) .. 'c' .. string.rep('b', 180)) == 362)
588
-
589
- m.setmaxstack(5) -- restore original limit
590
-
591
- -- tests for optional start position
592
- assert(m.match("a", "abc", 1))
593
- assert(m.match("b", "abc", 2))
594
- assert(m.match("c", "abc", 3))
595
- assert(not m.match(1, "abc", 4))
596
- assert(m.match("a", "abc", -3))
597
- assert(m.match("b", "abc", -2))
598
- assert(m.match("c", "abc", -1))
599
- assert(m.match("abc", "abc", -4)) -- truncate to position 1
600
-
601
- assert(m.match("", "abc", 10)) -- empty string is everywhere!
602
- assert(m.match("", "", 10))
603
- assert(not m.match(1, "", 1))
604
- assert(not m.match(1, "", -1))
605
- assert(not m.match(1, "", 0))
606
-
607
- print("+")
608
-
609
-
610
- -- tests for argument captures
611
- checkerr("invalid argument", m.Carg, 0)
612
- checkerr("invalid argument", m.Carg, -1)
613
- checkerr("invalid argument", m.Carg, 2^18)
614
- checkerr("absent extra argument #1", m.match, m.Carg(1), 'a', 1)
615
- assert(m.match(m.Carg(1), 'a', 1, print) == print)
616
- x = {m.match(m.Carg(1) * m.Carg(2), '', 1, 10, 20)}
617
- checkeq(x, {10, 20})
618
-
619
- assert(m.match(m.Cmt(m.Cg(m.Carg(3), "a") *
620
- m.Cmt(m.Cb("a"), function (s,i,x)
621
- assert(s == "a" and i == 1);
622
- return i, x+1
623
- end) *
624
- m.Carg(2), function (s,i,a,b,c)
625
- assert(s == "a" and i == 1 and c == nil);
626
- return i, 2*a + 3*b
627
- end) * "a",
628
- "a", 1, false, 100, 1000) == 2*1001 + 3*100)
629
-
630
-
631
- -- tests for Lua functions
632
-
633
- t = {}
634
- s = ""
635
- p = m.P(function (s1, i) assert(s == s1); t[#t + 1] = i; return nil end) * false
636
- s = "hi, this is a test"
637
- assert(m.match(((p - m.P(-1)) + 2)^0, s) == string.len(s) + 1)
638
- assert(#t == string.len(s)/2 and t[1] == 1 and t[2] == 3)
639
-
640
- assert(not m.match(p, s))
641
-
642
- p = mt.__add(function (s, i) return i end, function (s, i) return nil end)
643
- assert(m.match(p, "alo"))
644
-
645
- p = mt.__mul(function (s, i) return i end, function (s, i) return nil end)
646
- assert(not m.match(p, "alo"))
647
-
648
-
649
- t = {}
650
- p = function (s1, i) assert(s == s1); t[#t + 1] = i; return i end
651
- s = "hi, this is a test"
652
- assert(m.match((m.P(1) * p)^0, s) == string.len(s) + 1)
653
- assert(#t == string.len(s) and t[1] == 2 and t[2] == 3)
654
-
655
- t = {}
656
- p = m.P(function (s1, i) assert(s == s1); t[#t + 1] = i;
657
- return i <= s1:len() and i end) * 1
658
- s = "hi, this is a test"
659
- assert(m.match(p^0, s) == string.len(s) + 1)
660
- assert(#t == string.len(s) + 1 and t[1] == 1 and t[2] == 2)
661
-
662
- p = function (s1, i) return m.match(m.P"a"^1, s1, i) end
663
- assert(m.match(p, "aaaa") == 5)
664
- assert(m.match(p, "abaa") == 2)
665
- assert(not m.match(p, "baaa"))
666
-
667
- checkerr("invalid position", m.match, function () return 2^20 end, s)
668
- checkerr("invalid position", m.match, function () return 0 end, s)
669
- checkerr("invalid position", m.match, function (s, i) return i - 1 end, s)
670
- checkerr("invalid position", m.match,
671
- m.P(1)^0 * function (_, i) return i - 1 end, s)
672
- assert(m.match(m.P(1)^0 * function (_, i) return i end * -1, s))
673
- checkerr("invalid position", m.match,
674
- m.P(1)^0 * function (_, i) return i + 1 end, s)
675
- assert(m.match(m.P(function (s, i) return s:len() + 1 end) * -1, s))
676
- checkerr("invalid position", m.match, m.P(function (s, i) return s:len() + 2 end) * -1, s)
677
- assert(not m.match(m.P(function (s, i) return s:len() end) * -1, s))
678
- assert(m.match(m.P(1)^0 * function (_, i) return true end, s) ==
679
- string.len(s) + 1)
680
- for i = 1, string.len(s) + 1 do
681
- assert(m.match(function (_, _) return i end, s) == i)
682
- end
683
-
684
- p = (m.P(function (s, i) return i%2 == 0 and i end) * 1
685
- + m.P(function (s, i) return i%2 ~= 0 and i + 2 <= s:len() and i end) * 3)^0
686
- * -1
687
- assert(p:match(string.rep('a', 14000)))
688
-
689
- -- tests for Function Replacements
690
- f = function (a, ...) if a ~= "x" then return {a, ...} end end
691
-
692
- t = m.match(m.C(1)^0/f, "abc")
693
- checkeq(t, {"a", "b", "c"})
694
-
695
- t = m.match(m.C(1)^0/f/f, "abc")
696
- checkeq(t, {{"a", "b", "c"}})
697
-
698
- t = m.match(m.P(1)^0/f/f, "abc") -- no capture
699
- checkeq(t, {{"abc"}})
700
-
701
- t = m.match((m.P(1)^0/f * m.Cp())/f, "abc")
702
- checkeq(t, {{"abc"}, 4})
703
-
704
- t = m.match((m.C(1)^0/f * m.Cp())/f, "abc")
705
- checkeq(t, {{"a", "b", "c"}, 4})
706
-
707
- t = m.match((m.C(1)^0/f * m.Cp())/f, "xbc")
708
- checkeq(t, {4})
709
-
710
- t = m.match(m.C(m.C(1)^0)/f, "abc")
711
- checkeq(t, {"abc", "a", "b", "c"})
712
-
713
- g = function (...) return 1, ... end
714
- t = {m.match(m.C(1)^0/g/g, "abc")}
715
- checkeq(t, {1, 1, "a", "b", "c"})
716
-
717
- t = {m.match(m.Cc(nil,nil,4) * m.Cc(nil,3) * m.Cc(nil, nil) / g / g, "")}
718
- t1 = {1,1,nil,nil,4,nil,3,nil,nil}
719
- for i=1,10 do assert(t[i] == t1[i]) end
720
-
721
- t = {m.match((m.C(1) / function (x) return x, x.."x" end)^0, "abc")}
722
- checkeq(t, {"a", "ax", "b", "bx", "c", "cx"})
723
-
724
- t = m.match(m.Ct((m.C(1) / function (x,y) return y, x end * m.Cc(1))^0), "abc")
725
- checkeq(t, {nil, "a", 1, nil, "b", 1, nil, "c", 1})
726
-
727
- -- tests for Query Replacements
728
-
729
- assert(m.match(m.C(m.C(1)^0)/{abc = 10}, "abc") == 10)
730
- assert(m.match(m.C(1)^0/{a = 10}, "abc") == 10)
731
- assert(m.match(m.S("ba")^0/{ab = 40}, "abc") == 40)
732
- t = m.match(m.Ct((m.S("ba")/{a = 40})^0), "abc")
733
- checkeq(t, {40})
734
-
735
- assert(m.match(m.Cs((m.C(1)/{a=".", d=".."})^0), "abcdde") == ".bc....e")
736
- assert(m.match(m.Cs((m.C(1)/{f="."})^0), "abcdde") == "abcdde")
737
- assert(m.match(m.Cs((m.C(1)/{d="."})^0), "abcdde") == "abc..e")
738
- assert(m.match(m.Cs((m.C(1)/{e="."})^0), "abcdde") == "abcdd.")
739
- assert(m.match(m.Cs((m.C(1)/{e=".", f="+"})^0), "eefef") == "..+.+")
740
- assert(m.match(m.Cs((m.C(1))^0), "abcdde") == "abcdde")
741
- assert(m.match(m.Cs(m.C(m.C(1)^0)), "abcdde") == "abcdde")
742
- assert(m.match(1 * m.Cs(m.P(1)^0), "abcdde") == "bcdde")
743
- assert(m.match(m.Cs((m.C('0')/'x' + 1)^0), "abcdde") == "abcdde")
744
- assert(m.match(m.Cs((m.C('0')/'x' + 1)^0), "0ab0b0") == "xabxbx")
745
- assert(m.match(m.Cs((m.C('0')/'x' + m.P(1)/{b=3})^0), "b0a0b") == "3xax3")
746
- assert(m.match(m.P(1)/'%0%0'/{aa = -3} * 'x', 'ax') == -3)
747
- assert(m.match(m.C(1)/'%0%1'/{aa = 'z'}/{z = -3} * 'x', 'ax') == -3)
748
-
749
- assert(m.match(m.Cs(m.Cc(0) * (m.P(1)/"")), "4321") == "0")
750
-
751
- assert(m.match(m.Cs((m.P(1) / "%0")^0), "abcd") == "abcd")
752
- assert(m.match(m.Cs((m.P(1) / "%0.%0")^0), "abcd") == "a.ab.bc.cd.d")
753
- assert(m.match(m.Cs((m.P("a") / "%0.%0" + 1)^0), "abcad") == "a.abca.ad")
754
- assert(m.match(m.C("a") / "%1%%%0", "a") == "a%a")
755
- assert(m.match(m.Cs((m.P(1) / ".xx")^0), "abcd") == ".xx.xx.xx.xx")
756
- assert(m.match(m.Cp() * m.P(3) * m.Cp()/"%2%1%1 - %0 ", "abcde") ==
757
- "411 - abc ")
758
-
759
- assert(m.match(m.P(1)/"%0", "abc") == "a")
760
- checkerr("invalid capture index", m.match, m.P(1)/"%1", "abc")
761
- checkerr("invalid capture index", m.match, m.P(1)/"%9", "abc")
762
-
763
- p = m.C(1)
764
- p = p * p; p = p * p; p = p * p * m.C(1) / "%9 - %1"
765
- assert(p:match("1234567890") == "9 - 1")
766
-
767
- assert(m.match(m.Cc(print), "") == print)
768
-
769
- -- too many captures (just ignore extra ones)
770
- p = m.C(1)^0 / "%2-%9-%0-%9"
771
- assert(p:match"01234567890123456789" == "1-8-01234567890123456789-8")
772
- s = string.rep("12345678901234567890", 20)
773
- assert(m.match(m.C(1)^0 / "%9-%1-%0-%3", s) == "9-1-" .. s .. "-3")
774
-
775
- -- string captures with non-string subcaptures
776
- p = m.Cc('alo') * m.C(1) / "%1 - %2 - %1"
777
- assert(p:match'x' == 'alo - x - alo')
778
-
779
- checkerr("invalid capture value (a boolean)", m.match, m.Cc(true) / "%1", "a")
780
-
781
- -- long strings for string capture
782
- l = 10000
783
- s = string.rep('a', l) .. string.rep('b', l) .. string.rep('c', l)
784
-
785
- p = (m.C(m.P'a'^1) * m.C(m.P'b'^1) * m.C(m.P'c'^1)) / '%3%2%1'
786
-
787
- assert(p:match(s) == string.rep('c', l) ..
788
- string.rep('b', l) ..
789
- string.rep('a', l))
790
-
791
- print"+"
792
-
793
- -- accumulator capture
794
- function f (x) return x + 1 end
795
- assert(m.match(m.Cf(m.Cc(0) * m.C(1)^0, f), "alo alo") == 7)
796
-
797
- t = {m.match(m.Cf(m.Cc(1,2,3), error), "")}
798
- checkeq(t, {1})
799
- p = m.Cf(m.Ct(true) * m.Cg(m.C(m.R"az"^1) * "=" * m.C(m.R"az"^1) * ";")^0,
800
- rawset)
801
- t = p:match("a=b;c=du;xux=yuy;")
802
- checkeq(t, {a="b", c="du", xux="yuy"})
803
-
804
-
805
- -- errors in accumulator capture
806
-
807
- -- no initial capture
808
- checkerr("no initial value", m.match, m.Cf(m.P(5), print), 'aaaaaa')
809
- -- no initial capture (very long match forces fold to be a pair open-close)
810
- checkerr("no initial value", m.match, m.Cf(m.P(500), print),
811
- string.rep('a', 600))
812
-
813
- -- nested capture produces no initial value
814
- checkerr("no initial value", m.match, m.Cf(m.P(1) / {}, print), "alo")
815
-
816
-
817
- -- tests for loop checker
818
-
819
- local function isnullable (p)
820
- checkerr("may accept empty string", function (p) return p^0 end, m.P(p))
821
- end
822
-
823
- isnullable(m.P("x")^-4)
824
- assert(m.match(((m.P(0) + 1) * m.S"al")^0, "alo") == 3)
825
- assert(m.match((("x" + #m.P(1))^-4 * m.S"al")^0, "alo") == 3)
826
- isnullable("")
827
- isnullable(m.P("x")^0)
828
- isnullable(m.P("x")^-1)
829
- isnullable(m.P("x") + 1 + 2 + m.P("a")^-1)
830
- isnullable(-m.P("ab"))
831
- isnullable(- -m.P("ab"))
832
- isnullable(# #(m.P("ab") + "xy"))
833
- isnullable(- #m.P("ab")^0)
834
- isnullable(# -m.P("ab")^1)
835
- isnullable(#m.V(3))
836
- isnullable(m.V(3) + m.V(1) + m.P('a')^-1)
837
- isnullable({[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(0)})
838
- assert(m.match(m.P{[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(1)}^0, "abc")
839
- == 3)
840
- assert(m.match(m.P""^-3, "a") == 1)
841
-
842
- local function find (p, s)
843
- return m.match(basiclookfor(p), s)
844
- end
845
-
846
-
847
- local function badgrammar (g, expected)
848
- local stat, msg = pcall(m.P, g)
849
- assert(not stat)
850
- if expected then assert(find(expected, msg)) end
851
- end
852
-
853
- badgrammar({[1] = m.V(1)}, "rule '1'")
854
- badgrammar({[1] = m.V(2)}, "rule '2'") -- invalid non-terminal
855
- badgrammar({[1] = m.V"x"}, "rule 'x'") -- invalid non-terminal
856
- badgrammar({[1] = m.V{}}, "rule '(a table)'") -- invalid non-terminal
857
- badgrammar({[1] = #m.P("a") * m.V(1)}, "rule '1'") -- left-recursive
858
- badgrammar({[1] = -m.P("a") * m.V(1)}, "rule '1'") -- left-recursive
859
- badgrammar({[1] = -1 * m.V(1)}, "rule '1'") -- left-recursive
860
- badgrammar({[1] = -1 + m.V(1)}, "rule '1'") -- left-recursive
861
- badgrammar({[1] = 1 * m.V(2), [2] = m.V(2)}, "rule '2'") -- left-recursive
862
- badgrammar({[1] = 1 * m.V(2)^0, [2] = m.P(0)}, "rule '1'") -- inf. loop
863
- badgrammar({ m.V(2), m.V(3)^0, m.P"" }, "rule '2'") -- inf. loop
864
- badgrammar({ m.V(2) * m.V(3)^0, m.V(3)^0, m.P"" }, "rule '1'") -- inf. loop
865
- badgrammar({"x", x = #(m.V(1) * 'a') }, "rule '1'") -- inf. loop
866
- badgrammar({ -(m.V(1) * 'a') }, "rule '1'") -- inf. loop
867
- badgrammar({"x", x = m.P'a'^-1 * m.V"x"}, "rule 'x'") -- left recursive
868
- badgrammar({"x", x = m.P'a' * m.V"y"^1, y = #m.P(1)}, "rule 'x'")
869
-
870
- assert(m.match({'a' * -m.V(1)}, "aaa") == 2)
871
- assert(m.match({'a' * -m.V(1)}, "aaaa") == nil)
872
-
873
-
874
- -- good x bad grammars
875
- m.P{ ('a' * m.V(1))^-1 }
876
- m.P{ -('a' * m.V(1)) }
877
- m.P{ ('abc' * m.V(1))^-1 }
878
- m.P{ -('abc' * m.V(1)) }
879
- badgrammar{ #m.P('abc') * m.V(1) }
880
- badgrammar{ -('a' + m.V(1)) }
881
- m.P{ #('a' * m.V(1)) }
882
- badgrammar{ #('a' + m.V(1)) }
883
- m.P{ m.B{ m.P'abc' } * 'a' * m.V(1) }
884
- badgrammar{ m.B{ m.P'abc' } * m.V(1) }
885
- badgrammar{ ('a' + m.P'bcd')^-1 * m.V(1) }
886
-
887
-
888
- -- simple tests for maximum sizes:
889
- local p = m.P"a"
890
- for i=1,14 do p = p * p end
891
-
892
- p = {}
893
- for i=1,100 do p[i] = m.P"a" end
894
- p = m.P(p)
895
-
896
-
897
- -- strange values for rule labels
898
-
899
- p = m.P{ "print",
900
- print = m.V(print),
901
- [print] = m.V(_G),
902
- [_G] = m.P"a",
903
- }
904
-
905
- assert(p:match("a"))
906
-
907
- -- initial rule
908
- g = {}
909
- for i = 1, 10 do g["i"..i] = "a" * m.V("i"..i+1) end
910
- g.i11 = m.P""
911
- for i = 1, 10 do
912
- g[1] = "i"..i
913
- local p = m.P(g)
914
- assert(p:match("aaaaaaaaaaa") == 11 - i + 1)
915
- end
916
-
917
- print"+"
918
-
919
-
920
- -- tests for back references
921
- checkerr("back reference 'x' not found", m.match, m.Cb('x'), '')
922
- checkerr("back reference 'b' not found", m.match, m.Cg(1, 'a') * m.Cb('b'), 'a')
923
-
924
- p = m.Cg(m.C(1) * m.C(1), "k") * m.Ct(m.Cb("k"))
925
- t = p:match("ab")
926
- checkeq(t, {"a", "b"})
927
-
928
-
929
- t = {}
930
- function foo (p) t[#t + 1] = p; return p .. "x" end
931
-
932
- p = m.Cg(m.C(2) / foo, "x") * m.Cb"x" *
933
- m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" *
934
- m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" *
935
- m.Cg(m.Cb('x') / foo, "x") * m.Cb"x"
936
- x = {p:match'ab'}
937
- checkeq(x, {'abx', 'abxx', 'abxxx', 'abxxxx'})
938
- checkeq(t, {'ab',
939
- 'ab', 'abx',
940
- 'ab', 'abx', 'abxx',
941
- 'ab', 'abx', 'abxx', 'abxxx'})
942
-
943
-
944
-
945
- -- tests for match-time captures
946
-
947
- p = m.P'a' * (function (s, i) return (s:sub(i, i) == 'b') and i + 1 end)
948
- + 'acd'
949
-
950
- assert(p:match('abc') == 3)
951
- assert(p:match('acd') == 4)
952
-
953
- local function id (s, i, ...)
954
- return true, ...
955
- end
956
-
957
- assert(m.Cmt(m.Cs((m.Cmt(m.S'abc' / { a = 'x', c = 'y' }, id) +
958
- m.R'09'^1 / string.char +
959
- m.P(1))^0), id):match"acb98+68c" == "xyb\98+\68y")
960
-
961
- p = m.P{'S',
962
- S = m.V'atom' * space
963
- + m.Cmt(m.Ct("(" * space * (m.Cmt(m.V'S'^1, id) + m.P(true)) * ")" * space), id),
964
- atom = m.Cmt(m.C(m.R("AZ", "az", "09")^1), id)
965
- }
966
- x = p:match"(a g () ((b) c) (d (e)))"
967
- checkeq(x, {'a', 'g', {}, {{'b'}, 'c'}, {'d', {'e'}}});
968
-
969
- x = {(m.Cmt(1, id)^0):match(string.rep('a', 500))}
970
- assert(#x == 500)
971
-
972
- local function id(s, i, x)
973
- if x == 'a' then return i, 1, 3, 7
974
- else return nil, 2, 4, 6, 8
975
- end
976
- end
977
-
978
- p = ((m.P(id) * 1 + m.Cmt(2, id) * 1 + m.Cmt(1, id) * 1))^0
979
- assert(table.concat{p:match('abababab')} == string.rep('137', 4))
980
-
981
- local function ref (s, i, x)
982
- return m.match(x, s, i - x:len())
983
- end
984
-
985
- assert(m.Cmt(m.P(1)^0, ref):match('alo') == 4)
986
- assert((m.P(1) * m.Cmt(m.P(1)^0, ref)):match('alo') == 4)
987
- assert(not (m.P(1) * m.Cmt(m.C(1)^0, ref)):match('alo'))
988
-
989
- ref = function (s,i,x) return i == tonumber(x) and i, 'xuxu' end
990
-
991
- assert(m.Cmt(1, ref):match'2')
992
- assert(not m.Cmt(1, ref):match'1')
993
- assert(m.Cmt(m.P(1)^0, ref):match'03')
994
-
995
- function ref (s, i, a, b)
996
- if a == b then return i, a:upper() end
997
- end
998
-
999
- p = m.Cmt(m.C(m.R"az"^1) * "-" * m.C(m.R"az"^1), ref)
1000
- p = (any - p)^0 * p * any^0 * -1
1001
-
1002
- assert(p:match'abbbc-bc ddaa' == 'BC')
1003
-
1004
- do -- match-time captures cannot be optimized away
1005
- local touch = 0
1006
- f = m.P(function () touch = touch + 1; return true end)
1007
-
1008
- local function check(n) n = n or 1; assert(touch == n); touch = 0 end
1009
-
1010
- assert(m.match(f * false + 'b', 'a') == nil); check()
1011
- assert(m.match(f * false + 'b', '') == nil); check()
1012
- assert(m.match( (f * 'a')^0 * 'b', 'b') == 2); check()
1013
- assert(m.match( (f * 'a')^0 * 'b', '') == nil); check()
1014
- assert(m.match( (f * 'a')^-1 * 'b', 'b') == 2); check()
1015
- assert(m.match( (f * 'a')^-1 * 'b', '') == nil); check()
1016
- assert(m.match( ('b' + f * 'a')^-1 * 'b', '') == nil); check()
1017
- assert(m.match( (m.P'b'^-1 * f * 'a')^-1 * 'b', '') == nil); check()
1018
- assert(m.match( (-m.P(1) * m.P'b'^-1 * f * 'a')^-1 * 'b', '') == nil);
1019
- check()
1020
- assert(m.match( (f * 'a' + 'b')^-1 * 'b', '') == nil); check()
1021
- assert(m.match(f * 'a' + f * 'b', 'b') == 2); check(2)
1022
- assert(m.match(f * 'a' + f * 'b', 'a') == 2); check(1)
1023
- assert(m.match(-f * 'a' + 'b', 'b') == 2); check(1)
1024
- assert(m.match(-f * 'a' + 'b', '') == nil); check(1)
1025
- end
1026
-
1027
- c = '[' * m.Cg(m.P'='^0, "init") * '[' *
1028
- { m.Cmt(']' * m.C(m.P'='^0) * ']' * m.Cb("init"), function (_, _, s1, s2)
1029
- return s1 == s2 end)
1030
- + 1 * m.V(1) } / 0
1031
-
1032
- assert(c:match'[==[]]====]]]]==]===[]' == 18)
1033
- assert(c:match'[[]=]====]=]]]==]===[]' == 14)
1034
- assert(not c:match'[[]=]====]=]=]==]===[]')
1035
-
1036
-
1037
- -- old bug: optimization of concat with fail removed match-time capture
1038
- p = m.Cmt(0, function (s) p = s end) * m.P(false)
1039
- assert(not p:match('alo'))
1040
- assert(p == 'alo')
1041
-
1042
-
1043
- -- ensure that failed match-time captures are not kept on Lua stack
1044
- do
1045
- local t = {__mode = "kv"}; setmetatable(t,t)
1046
- local c = 0
1047
-
1048
- local function foo (s,i)
1049
- collectgarbage();
1050
- assert(next(t) == "__mode" and next(t, "__mode") == nil)
1051
- local x = {}
1052
- t[x] = true
1053
- c = c + 1
1054
- return i, x
1055
- end
1056
-
1057
- local p = m.P{ m.Cmt(0, foo) * m.P(false) + m.P(1) * m.V(1) + m.P"" }
1058
- p:match(string.rep('1', 10))
1059
- assert(c == 11)
1060
- end
1061
-
1062
- p = (m.P(function () return true, "a" end) * 'a'
1063
- + m.P(function (s, i) return i, "aa", 20 end) * 'b'
1064
- + m.P(function (s,i) if i <= #s then return i, "aaa" end end) * 1)^0
1065
-
1066
- t = {p:match('abacc')}
1067
- checkeq(t, {'a', 'aa', 20, 'a', 'aaa', 'aaa'})
1068
-
1069
-
1070
- -------------------------------------------------------------------
1071
- -- Tests for 're' module
1072
- -------------------------------------------------------------------
1073
-
1074
- local re = require "lib/re"
1075
-
1076
- local match, compile = re.match, re.compile
1077
-
1078
-
1079
-
1080
- assert(match("a", ".") == 2)
1081
- assert(match("a", "''") == 1)
1082
- assert(match("", " ! . ") == 1)
1083
- assert(not match("a", " ! . "))
1084
- assert(match("abcde", " ( . . ) * ") == 5)
1085
- assert(match("abbcde", " [a-c] +") == 5)
1086
- assert(match("0abbc1de", "'0' [a-c]+ '1'") == 7)
1087
- assert(match("0zz1dda", "'0' [^a-c]+ 'a'") == 8)
1088
- assert(match("abbc--", " [a-c] + +") == 5)
1089
- assert(match("abbc--", " [ac-] +") == 2)
1090
- assert(match("abbc--", " [-acb] + ") == 7)
1091
- assert(not match("abbcde", " [b-z] + "))
1092
- assert(match("abb\"de", '"abb"["]"de"') == 7)
1093
- assert(match("abceeef", "'ac' ? 'ab' * 'c' { 'e' * } / 'abceeef' ") == "eee")
1094
- assert(match("abceeef", "'ac'? 'ab'* 'c' { 'f'+ } / 'abceeef' ") == 8)
1095
- local t = {match("abceefe", "( ( & 'e' {} ) ? . ) * ")}
1096
- checkeq(t, {4, 5, 7})
1097
- local t = {match("abceefe", "((&&'e' {})? .)*")}
1098
- checkeq(t, {4, 5, 7})
1099
- local t = {match("abceefe", "( ( ! ! 'e' {} ) ? . ) *")}
1100
- checkeq(t, {4, 5, 7})
1101
- local t = {match("abceefe", "(( & ! & ! 'e' {})? .)*")}
1102
- checkeq(t, {4, 5, 7})
1103
-
1104
- assert(match("cccx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 5)
1105
- assert(match("cdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 4)
1106
- assert(match("abcdcdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 8)
1107
-
1108
- assert(match("abc", "a <- (. a)?") == 4)
1109
- b = "balanced <- '(' ([^()] / balanced)* ')'"
1110
- assert(match("(abc)", b))
1111
- assert(match("(a(b)((c) (d)))", b))
1112
- assert(not match("(a(b ((c) (d)))", b))
1113
-
1114
- b = compile[[ balanced <- "(" ([^()] / balanced)* ")" ]]
1115
- assert(b == m.P(b))
1116
- assert(b:match"((((a))(b)))")
1117
-
1118
- local g = [[
1119
- S <- "0" B / "1" A / "" -- balanced strings
1120
- A <- "0" S / "1" A A -- one more 0
1121
- B <- "1" S / "0" B B -- one more 1
1122
- ]]
1123
- assert(match("00011011", g) == 9)
1124
-
1125
- local g = [[
1126
- S <- ("0" B / "1" A)*
1127
- A <- "0" / "1" A A
1128
- B <- "1" / "0" B B
1129
- ]]
1130
- assert(match("00011011", g) == 9)
1131
- assert(match("000110110", g) == 9)
1132
- assert(match("011110110", g) == 3)
1133
- assert(match("000110010", g) == 1)
1134
-
1135
- s = "aaaaaaaaaaaaaaaaaaaaaaaa"
1136
- assert(match(s, "'a'^3") == 4)
1137
- assert(match(s, "'a'^0") == 1)
1138
- assert(match(s, "'a'^+3") == s:len() + 1)
1139
- assert(not match(s, "'a'^+30"))
1140
- assert(match(s, "'a'^-30") == s:len() + 1)
1141
- assert(match(s, "'a'^-5") == 6)
1142
- for i = 1, s:len() do
1143
- assert(match(s, string.format("'a'^+%d", i)) >= i + 1)
1144
- assert(match(s, string.format("'a'^-%d", i)) <= i + 1)
1145
- assert(match(s, string.format("'a'^%d", i)) == i + 1)
1146
- end
1147
- assert(match("01234567890123456789", "[0-9]^3+") == 19)
1148
-
1149
-
1150
- assert(match("01234567890123456789", "({....}{...}) -> '%2%1'") == "4560123")
1151
- t = match("0123456789", "{| {.}* |}")
1152
- checkeq(t, {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"})
1153
- assert(match("012345", "{| (..) -> '%0%0' |}")[1] == "0101")
1154
-
1155
- assert(match("abcdef", "( {.} {.} {.} {.} {.} ) -> 3") == "c")
1156
- assert(match("abcdef", "( {:x: . :} {.} {.} {.} {.} ) -> 3") == "d")
1157
- assert(match("abcdef", "( {:x: . :} {.} {.} {.} {.} ) -> 0") == 6)
1158
-
1159
- assert(not match("abcdef", "{:x: ({.} {.} {.}) -> 2 :} =x"))
1160
- assert(match("abcbef", "{:x: ({.} {.} {.}) -> 2 :} =x"))
1161
-
1162
- eqcharset(compile"[]]", "]")
1163
- eqcharset(compile"[][]", m.S"[]")
1164
- eqcharset(compile"[]-]", m.S"-]")
1165
- eqcharset(compile"[-]", m.S"-")
1166
- eqcharset(compile"[az-]", m.S"a-z")
1167
- eqcharset(compile"[-az]", m.S"a-z")
1168
- eqcharset(compile"[a-z]", m.R"az")
1169
- eqcharset(compile"[]['\"]", m.S[[]['"]])
1170
-
1171
- eqcharset(compile"[^]]", any - "]")
1172
- eqcharset(compile"[^][]", any - m.S"[]")
1173
- eqcharset(compile"[^]-]", any - m.S"-]")
1174
- eqcharset(compile"[^]-]", any - m.S"-]")
1175
- eqcharset(compile"[^-]", any - m.S"-")
1176
- eqcharset(compile"[^az-]", any - m.S"a-z")
1177
- eqcharset(compile"[^-az]", any - m.S"a-z")
1178
- eqcharset(compile"[^a-z]", any - m.R"az")
1179
- eqcharset(compile"[^]['\"]", any - m.S[[]['"]])
1180
-
1181
- -- tests for comments in 're'
1182
- e = compile[[
1183
- A <- _B -- \t \n %nl .<> <- -> --
1184
- _B <- 'x' --]]
1185
- assert(e:match'xy' == 2)
1186
-
1187
- -- tests for 're' with pre-definitions
1188
- defs = {digits = m.R"09", letters = m.R"az", _=m.P"__"}
1189
- e = compile("%letters (%letters / %digits)*", defs)
1190
- assert(e:match"x123" == 5)
1191
- e = compile("%_", defs)
1192
- assert(e:match"__" == 3)
1193
-
1194
- e = compile([[
1195
- S <- A+
1196
- A <- %letters+ B
1197
- B <- %digits+
1198
- ]], defs)
1199
-
1200
- e = compile("{[0-9]+'.'?[0-9]*} -> sin", math)
1201
- assert(e:match("2.34") == math.sin(2.34))
1202
-
1203
-
1204
- function eq (_, _, a, b) return a == b end
1205
-
1206
- c = re.compile([[
1207
- longstring <- '[' {:init: '='* :} '[' close
1208
- close <- ']' =init ']' / . close
1209
- ]])
1210
-
1211
- assert(c:match'[==[]]===]]]]==]===[]' == 17)
1212
- assert(c:match'[[]=]====]=]]]==]===[]' == 14)
1213
- assert(not c:match'[[]=]====]=]=]==]===[]')
1214
-
1215
- c = re.compile" '[' {:init: '='* :} '[' (!(']' =init ']') .)* ']' =init ']' !. "
1216
-
1217
- assert(c:match'[==[]]===]]]]==]')
1218
- assert(c:match'[[]=]====]=][]==]===[]]')
1219
- assert(not c:match'[[]=]====]=]=]==]===[]')
1220
-
1221
- assert(re.find("hi alalo", "{:x:..:} =x") == 4)
1222
- assert(re.find("hi alalo", "{:x:..:} =x", 4) == 4)
1223
- assert(not re.find("hi alalo", "{:x:..:} =x", 5))
1224
- assert(re.find("hi alalo", "{'al'}", 5) == 6)
1225
- assert(re.find("hi aloalolo", "{:x:..:} =x") == 8)
1226
- assert(re.find("alo alohi x x", "{:word:%w+:}%W*(=word)!%w") == 11)
1227
-
1228
- -- re.find discards any captures
1229
- local a,b,c = re.find("alo", "{.}{'o'}")
1230
- assert(a == 2 and b == 3 and c == nil)
1231
-
1232
- local function match (s,p)
1233
- local i,e = re.find(s,p)
1234
- if i then return s:sub(i, e) end
1235
- end
1236
- assert(match("alo alo", '[a-z]+') == "alo")
1237
- assert(match("alo alo", '{:x: [a-z]+ :} =x') == nil)
1238
- assert(match("alo alo", "{:x: [a-z]+ :} ' ' =x") == "alo alo")
1239
-
1240
- assert(re.gsub("alo alo", "[abc]", "x") == "xlo xlo")
1241
- assert(re.gsub("alo alo", "%w+", ".") == ". .")
1242
- assert(re.gsub("hi, how are you", "[aeiou]", string.upper) ==
1243
- "hI, hOw ArE yOU")
1244
-
1245
- s = 'hi [[a comment[=]=] ending here]] and [=[another]]=]]'
1246
- c = re.compile" '[' {:i: '='* :} '[' (!(']' =i ']') .)* ']' { =i } ']' "
1247
- assert(re.gsub(s, c, "%2") == 'hi and =]')
1248
- assert(re.gsub(s, c, "%0") == s)
1249
- assert(re.gsub('[=[hi]=]', c, "%2") == '=')
1250
-
1251
- assert(re.find("", "!.") == 1)
1252
- assert(re.find("alo", "!.") == 4)
1253
-
1254
- function addtag (s, i, t, tag) t.tag = tag; return i, t end
1255
-
1256
- c = re.compile([[
1257
- doc <- block !.
1258
- block <- (start {| (block / { [^<]+ })* |} end?) => addtag
1259
- start <- '<' {:tag: [a-z]+ :} '>'
1260
- end <- '</' { =tag } '>'
1261
- ]], {addtag = addtag})
1262
-
1263
- x = c:match[[
1264
- <x>hi<b>hello</b>but<b>totheend</x>]]
1265
- checkeq(x, {tag='x', 'hi', {tag = 'b', 'hello'}, 'but',
1266
- {'totheend'}})
1267
-
1268
-
1269
- -- tests for look-ahead captures
1270
- x = {re.match("alo", "&(&{.}) !{'b'} {&(...)} &{..} {...} {!.}")}
1271
- checkeq(x, {"", "alo", ""})
1272
-
1273
- assert(re.match("aloalo",
1274
- "{~ (((&'al' {.}) -> 'A%1' / (&%l {.}) -> '%1%1') / .)* ~}")
1275
- == "AallooAalloo")
1276
-
1277
- -- bug in 0.9 (and older versions), due to captures in look-aheads
1278
- x = re.compile[[ {~ (&(. ([a-z]* -> '*')) ([a-z]+ -> '+') ' '*)* ~} ]]
1279
- assert(x:match"alo alo" == "+ +")
1280
-
1281
- -- valid capture in look-ahead (used inside the look-ahead itself)
1282
- x = re.compile[[
1283
- S <- &({:two: .. :} . =two) {[a-z]+} / . S
1284
- ]]
1285
- assert(x:match("hello aloaLo aloalo xuxu") == "aloalo")
1286
-
1287
-
1288
- p = re.compile[[
1289
- block <- {| {:ident:space*:} line
1290
- ((=ident !space line) / &(=ident space) block)* |}
1291
- line <- {[^%nl]*} %nl
1292
- space <- '_' -- should be ' ', but '_' is simpler for editors
1293
- ]]
1294
-
1295
- t= p:match[[
1296
- 1
1297
- __1.1
1298
- __1.2
1299
- ____1.2.1
1300
- ____
1301
- 2
1302
- __2.1
1303
- ]]
1304
- checkeq(t, {"1", {"1.1", "1.2", {"1.2.1", "", ident = "____"}, ident = "__"},
1305
- "2", {"2.1", ident = "__"}, ident = ""})
1306
-
1307
-
1308
- -- nested grammars
1309
- p = re.compile[[
1310
- s <- a b !.
1311
- b <- ( x <- ('b' x)? )
1312
- a <- ( x <- 'a' x? )
1313
- ]]
1314
-
1315
- assert(p:match'aaabbb')
1316
- assert(p:match'aaa')
1317
- assert(not p:match'bbb')
1318
- assert(not p:match'aaabbba')
1319
-
1320
- -- testing groups
1321
- t = {re.match("abc", "{:S <- {:.:} {S} / '':}")}
1322
- checkeq(t, {"a", "bc", "b", "c", "c", ""})
1323
-
1324
- t = re.match("1234", "{| {:a:.:} {:b:.:} {:c:.{.}:} |}")
1325
- checkeq(t, {a="1", b="2", c="4"})
1326
- t = re.match("1234", "{|{:a:.:} {:b:{.}{.}:} {:c:{.}:}|}")
1327
- checkeq(t, {a="1", b="2", c="4"})
1328
- t = re.match("12345", "{| {:.:} {:b:{.}{.}:} {:{.}{.}:} |}")
1329
- checkeq(t, {"1", b="2", "4", "5"})
1330
- t = re.match("12345", "{| {:.:} {:{:b:{.}{.}:}:} {:{.}{.}:} |}")
1331
- checkeq(t, {"1", "23", "4", "5"})
1332
- t = re.match("12345", "{| {:.:} {{:b:{.}{.}:}} {:{.}{.}:} |}")
1333
- checkeq(t, {"1", "23", "4", "5"})
1334
-
1335
-
1336
- -- testing pre-defined names
1337
- assert(os.setlocale("C") == "C")
1338
-
1339
- function eqlpeggsub (p1, p2)
1340
- local s1 = cs2str(re.compile(p1))
1341
- local s2 = string.gsub(allchar, "[^" .. p2 .. "]", "")
1342
- -- if s1 ~= s2 then print(#s1,#s2) end
1343
- assert(s1 == s2)
1344
- end
1345
-
1346
-
1347
- eqlpeggsub("%w", "%w")
1348
- eqlpeggsub("%a", "%a")
1349
- eqlpeggsub("%l", "%l")
1350
- eqlpeggsub("%u", "%u")
1351
- eqlpeggsub("%p", "%p")
1352
- eqlpeggsub("%d", "%d")
1353
- eqlpeggsub("%x", "%x")
1354
- eqlpeggsub("%s", "%s")
1355
- eqlpeggsub("%c", "%c")
1356
-
1357
- eqlpeggsub("%W", "%W")
1358
- eqlpeggsub("%A", "%A")
1359
- eqlpeggsub("%L", "%L")
1360
- eqlpeggsub("%U", "%U")
1361
- eqlpeggsub("%P", "%P")
1362
- eqlpeggsub("%D", "%D")
1363
- eqlpeggsub("%X", "%X")
1364
- eqlpeggsub("%S", "%S")
1365
- eqlpeggsub("%C", "%C")
1366
-
1367
- eqlpeggsub("[%w]", "%w")
1368
- eqlpeggsub("[_%w]", "_%w")
1369
- eqlpeggsub("[^%w]", "%W")
1370
- eqlpeggsub("[%W%S]", "%W%S")
1371
-
1372
- re.updatelocale()
1373
-
1374
-
1375
- -- testing nested substitutions x string captures
1376
-
1377
- p = re.compile[[
1378
- text <- {~ item* ~}
1379
- item <- macro / [^()] / '(' item* ')'
1380
- arg <- ' '* {~ (!',' item)* ~}
1381
- args <- '(' arg (',' arg)* ')'
1382
- macro <- ('apply' args) -> '%1(%2)'
1383
- / ('add' args) -> '%1 + %2'
1384
- / ('mul' args) -> '%1 * %2'
1385
- ]]
1386
-
1387
- assert(p:match"add(mul(a,b), apply(f,x))" == "a * b + f(x)")
1388
-
1389
- rev = re.compile[[ R <- (!.) -> '' / ({.} R) -> '%2%1']]
1390
-
1391
- assert(rev:match"0123456789" == "9876543210")
1392
-
1393
-
1394
- -- testing error messages in re
1395
-
1396
- local function errmsg (p, err)
1397
- checkerr(err, re.compile, p)
1398
- end
1399
-
1400
- errmsg('aaaa', "rule 'aaaa'")
1401
- errmsg('a', 'outside')
1402
- errmsg('b <- a', 'undefined')
1403
- errmsg("x <- 'a' x <- 'b'", 'already defined')
1404
- errmsg("'a' -", "near '-'")
1405
-
1406
-
1407
- print"OK"
1408
-
1409
-