immunio 0.15.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (157) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +234 -0
  3. data/README.md +147 -0
  4. data/bin/immunio +5 -0
  5. data/lib/immunio.rb +29 -0
  6. data/lib/immunio/agent.rb +260 -0
  7. data/lib/immunio/authentication.rb +96 -0
  8. data/lib/immunio/blocked_app.rb +38 -0
  9. data/lib/immunio/channel.rb +432 -0
  10. data/lib/immunio/cli.rb +39 -0
  11. data/lib/immunio/context.rb +114 -0
  12. data/lib/immunio/errors.rb +43 -0
  13. data/lib/immunio/immunio_ca.crt +45 -0
  14. data/lib/immunio/logger.rb +87 -0
  15. data/lib/immunio/plugins/action_dispatch.rb +45 -0
  16. data/lib/immunio/plugins/action_view.rb +431 -0
  17. data/lib/immunio/plugins/active_record.rb +707 -0
  18. data/lib/immunio/plugins/active_record_relation.rb +370 -0
  19. data/lib/immunio/plugins/authlogic.rb +80 -0
  20. data/lib/immunio/plugins/csrf.rb +24 -0
  21. data/lib/immunio/plugins/devise.rb +40 -0
  22. data/lib/immunio/plugins/environment_reporter.rb +69 -0
  23. data/lib/immunio/plugins/eval.rb +51 -0
  24. data/lib/immunio/plugins/exception_handler.rb +55 -0
  25. data/lib/immunio/plugins/gems_tracker.rb +5 -0
  26. data/lib/immunio/plugins/haml.rb +36 -0
  27. data/lib/immunio/plugins/http_finisher.rb +50 -0
  28. data/lib/immunio/plugins/http_tracker.rb +203 -0
  29. data/lib/immunio/plugins/io.rb +96 -0
  30. data/lib/immunio/plugins/redirect.rb +42 -0
  31. data/lib/immunio/plugins/warden.rb +66 -0
  32. data/lib/immunio/processor.rb +234 -0
  33. data/lib/immunio/rails.rb +26 -0
  34. data/lib/immunio/request.rb +139 -0
  35. data/lib/immunio/rufus_lua_ext/ref.rb +27 -0
  36. data/lib/immunio/rufus_lua_ext/state.rb +157 -0
  37. data/lib/immunio/rufus_lua_ext/table.rb +137 -0
  38. data/lib/immunio/rufus_lua_ext/utils.rb +13 -0
  39. data/lib/immunio/version.rb +5 -0
  40. data/lib/immunio/vm.rb +291 -0
  41. data/lua-hooks/ext/all.c +78 -0
  42. data/lua-hooks/ext/bitop/README +22 -0
  43. data/lua-hooks/ext/bitop/bit.c +189 -0
  44. data/lua-hooks/ext/extconf.rb +38 -0
  45. data/lua-hooks/ext/libinjection/COPYING +37 -0
  46. data/lua-hooks/ext/libinjection/libinjection.h +65 -0
  47. data/lua-hooks/ext/libinjection/libinjection_html5.c +847 -0
  48. data/lua-hooks/ext/libinjection/libinjection_html5.h +54 -0
  49. data/lua-hooks/ext/libinjection/libinjection_sqli.c +2301 -0
  50. data/lua-hooks/ext/libinjection/libinjection_sqli.h +295 -0
  51. data/lua-hooks/ext/libinjection/libinjection_sqli_data.h +9349 -0
  52. data/lua-hooks/ext/libinjection/libinjection_xss.c +531 -0
  53. data/lua-hooks/ext/libinjection/libinjection_xss.h +21 -0
  54. data/lua-hooks/ext/libinjection/lualib.c +109 -0
  55. data/lua-hooks/ext/lpeg/HISTORY +90 -0
  56. data/lua-hooks/ext/lpeg/lpcap.c +537 -0
  57. data/lua-hooks/ext/lpeg/lpcap.h +43 -0
  58. data/lua-hooks/ext/lpeg/lpcode.c +986 -0
  59. data/lua-hooks/ext/lpeg/lpcode.h +34 -0
  60. data/lua-hooks/ext/lpeg/lpeg-128.gif +0 -0
  61. data/lua-hooks/ext/lpeg/lpeg.html +1429 -0
  62. data/lua-hooks/ext/lpeg/lpprint.c +244 -0
  63. data/lua-hooks/ext/lpeg/lpprint.h +35 -0
  64. data/lua-hooks/ext/lpeg/lptree.c +1238 -0
  65. data/lua-hooks/ext/lpeg/lptree.h +77 -0
  66. data/lua-hooks/ext/lpeg/lptypes.h +149 -0
  67. data/lua-hooks/ext/lpeg/lpvm.c +355 -0
  68. data/lua-hooks/ext/lpeg/lpvm.h +58 -0
  69. data/lua-hooks/ext/lpeg/makefile +55 -0
  70. data/lua-hooks/ext/lpeg/re.html +498 -0
  71. data/lua-hooks/ext/lpeg/test.lua +1409 -0
  72. data/lua-hooks/ext/lua-cmsgpack/CMakeLists.txt +45 -0
  73. data/lua-hooks/ext/lua-cmsgpack/README.md +115 -0
  74. data/lua-hooks/ext/lua-cmsgpack/lua_cmsgpack.c +957 -0
  75. data/lua-hooks/ext/lua-cmsgpack/test.lua +570 -0
  76. data/lua-hooks/ext/lua-snapshot/LICENSE +7 -0
  77. data/lua-hooks/ext/lua-snapshot/Makefile +12 -0
  78. data/lua-hooks/ext/lua-snapshot/README.md +18 -0
  79. data/lua-hooks/ext/lua-snapshot/dump.lua +15 -0
  80. data/lua-hooks/ext/lua-snapshot/snapshot.c +455 -0
  81. data/lua-hooks/ext/lua/COPYRIGHT +34 -0
  82. data/lua-hooks/ext/lua/lapi.c +1087 -0
  83. data/lua-hooks/ext/lua/lapi.h +16 -0
  84. data/lua-hooks/ext/lua/lauxlib.c +652 -0
  85. data/lua-hooks/ext/lua/lauxlib.h +174 -0
  86. data/lua-hooks/ext/lua/lbaselib.c +659 -0
  87. data/lua-hooks/ext/lua/lcode.c +831 -0
  88. data/lua-hooks/ext/lua/lcode.h +76 -0
  89. data/lua-hooks/ext/lua/ldblib.c +398 -0
  90. data/lua-hooks/ext/lua/ldebug.c +638 -0
  91. data/lua-hooks/ext/lua/ldebug.h +33 -0
  92. data/lua-hooks/ext/lua/ldo.c +519 -0
  93. data/lua-hooks/ext/lua/ldo.h +57 -0
  94. data/lua-hooks/ext/lua/ldump.c +164 -0
  95. data/lua-hooks/ext/lua/lfunc.c +174 -0
  96. data/lua-hooks/ext/lua/lfunc.h +34 -0
  97. data/lua-hooks/ext/lua/lgc.c +710 -0
  98. data/lua-hooks/ext/lua/lgc.h +110 -0
  99. data/lua-hooks/ext/lua/linit.c +38 -0
  100. data/lua-hooks/ext/lua/liolib.c +556 -0
  101. data/lua-hooks/ext/lua/llex.c +463 -0
  102. data/lua-hooks/ext/lua/llex.h +81 -0
  103. data/lua-hooks/ext/lua/llimits.h +128 -0
  104. data/lua-hooks/ext/lua/lmathlib.c +263 -0
  105. data/lua-hooks/ext/lua/lmem.c +86 -0
  106. data/lua-hooks/ext/lua/lmem.h +49 -0
  107. data/lua-hooks/ext/lua/loadlib.c +705 -0
  108. data/lua-hooks/ext/lua/loadlib_rel.c +760 -0
  109. data/lua-hooks/ext/lua/lobject.c +214 -0
  110. data/lua-hooks/ext/lua/lobject.h +381 -0
  111. data/lua-hooks/ext/lua/lopcodes.c +102 -0
  112. data/lua-hooks/ext/lua/lopcodes.h +268 -0
  113. data/lua-hooks/ext/lua/loslib.c +243 -0
  114. data/lua-hooks/ext/lua/lparser.c +1339 -0
  115. data/lua-hooks/ext/lua/lparser.h +82 -0
  116. data/lua-hooks/ext/lua/lstate.c +214 -0
  117. data/lua-hooks/ext/lua/lstate.h +169 -0
  118. data/lua-hooks/ext/lua/lstring.c +111 -0
  119. data/lua-hooks/ext/lua/lstring.h +31 -0
  120. data/lua-hooks/ext/lua/lstrlib.c +871 -0
  121. data/lua-hooks/ext/lua/ltable.c +588 -0
  122. data/lua-hooks/ext/lua/ltable.h +40 -0
  123. data/lua-hooks/ext/lua/ltablib.c +287 -0
  124. data/lua-hooks/ext/lua/ltm.c +75 -0
  125. data/lua-hooks/ext/lua/ltm.h +54 -0
  126. data/lua-hooks/ext/lua/lua.c +392 -0
  127. data/lua-hooks/ext/lua/lua.def +131 -0
  128. data/lua-hooks/ext/lua/lua.h +388 -0
  129. data/lua-hooks/ext/lua/lua.rc +28 -0
  130. data/lua-hooks/ext/lua/lua_dll.rc +26 -0
  131. data/lua-hooks/ext/lua/luac.c +200 -0
  132. data/lua-hooks/ext/lua/luac.rc +1 -0
  133. data/lua-hooks/ext/lua/luaconf.h +763 -0
  134. data/lua-hooks/ext/lua/luaconf.h.in +724 -0
  135. data/lua-hooks/ext/lua/luaconf.h.orig +763 -0
  136. data/lua-hooks/ext/lua/lualib.h +53 -0
  137. data/lua-hooks/ext/lua/lundump.c +227 -0
  138. data/lua-hooks/ext/lua/lundump.h +36 -0
  139. data/lua-hooks/ext/lua/lvm.c +767 -0
  140. data/lua-hooks/ext/lua/lvm.h +36 -0
  141. data/lua-hooks/ext/lua/lzio.c +82 -0
  142. data/lua-hooks/ext/lua/lzio.h +67 -0
  143. data/lua-hooks/ext/lua/print.c +227 -0
  144. data/lua-hooks/ext/luautf8/README.md +152 -0
  145. data/lua-hooks/ext/luautf8/lutf8lib.c +1274 -0
  146. data/lua-hooks/ext/luautf8/unidata.h +3064 -0
  147. data/lua-hooks/lib/boot.lua +254 -0
  148. data/lua-hooks/lib/encode.lua +4 -0
  149. data/lua-hooks/lib/lexers/LICENSE +21 -0
  150. data/lua-hooks/lib/lexers/bash.lua +134 -0
  151. data/lua-hooks/lib/lexers/bash_dqstr.lua +62 -0
  152. data/lua-hooks/lib/lexers/css.lua +216 -0
  153. data/lua-hooks/lib/lexers/html.lua +106 -0
  154. data/lua-hooks/lib/lexers/javascript.lua +68 -0
  155. data/lua-hooks/lib/lexers/lexer.lua +1575 -0
  156. data/lua-hooks/lib/lexers/markers.lua +33 -0
  157. metadata +308 -0
@@ -0,0 +1,1409 @@
1
+ #!/usr/bin/env lua5.1
2
+
3
+ -- $Id: test.lua,v 1.105 2014/12/12 17:00:39 roberto Exp $
4
+
5
+ -- require"strict" -- just to be pedantic
6
+
7
+ local m = require"lpeg"
8
+
9
+
10
+ -- for general use
11
+ local a, b, c, d, e, f, g, p, t
12
+
13
+
14
+ -- compatibility with Lua 5.2
15
+ local unpack = rawget(table, "unpack") or unpack
16
+ local loadstring = rawget(_G, "loadstring") or load
17
+
18
+
19
+ -- most tests here do not need much stack space
20
+ m.setmaxstack(5)
21
+
22
+ local any = m.P(1)
23
+ local space = m.S" \t\n"^0
24
+
25
+ local function checkeq (x, y, p)
26
+ if p then print(x,y) end
27
+ if type(x) ~= "table" then assert(x == y)
28
+ else
29
+ for k,v in pairs(x) do checkeq(v, y[k], p) end
30
+ for k,v in pairs(y) do checkeq(v, x[k], p) end
31
+ end
32
+ end
33
+
34
+
35
+ local mt = getmetatable(m.P(1))
36
+
37
+
38
+ local allchar = {}
39
+ for i=0,255 do allchar[i + 1] = i end
40
+ allchar = string.char(unpack(allchar))
41
+ assert(#allchar == 256)
42
+
43
+ local function cs2str (c)
44
+ return m.match(m.Cs((c + m.P(1)/"")^0), allchar)
45
+ end
46
+
47
+ local function eqcharset (c1, c2)
48
+ assert(cs2str(c1) == cs2str(c2))
49
+ end
50
+
51
+
52
+ print"General tests for LPeg library"
53
+
54
+ assert(type(m.version()) == "string")
55
+ print("version " .. m.version())
56
+ assert(m.type("alo") ~= "pattern")
57
+ assert(m.type(io.input) ~= "pattern")
58
+ assert(m.type(m.P"alo") == "pattern")
59
+
60
+ -- tests for some basic optimizations
61
+ assert(m.match(m.P(false) + "a", "a") == 2)
62
+ assert(m.match(m.P(true) + "a", "a") == 1)
63
+ assert(m.match("a" + m.P(false), "b") == nil)
64
+ assert(m.match("a" + m.P(true), "b") == 1)
65
+
66
+ assert(m.match(m.P(false) * "a", "a") == nil)
67
+ assert(m.match(m.P(true) * "a", "a") == 2)
68
+ assert(m.match("a" * m.P(false), "a") == nil)
69
+ assert(m.match("a" * m.P(true), "a") == 2)
70
+
71
+ assert(m.match(#m.P(false) * "a", "a") == nil)
72
+ assert(m.match(#m.P(true) * "a", "a") == 2)
73
+ assert(m.match("a" * #m.P(false), "a") == nil)
74
+ assert(m.match("a" * #m.P(true), "a") == 2)
75
+
76
+
77
+ -- tests for locale
78
+ do
79
+ assert(m.locale(m) == m)
80
+ local t = {}
81
+ assert(m.locale(t, m) == t)
82
+ local x = m.locale()
83
+ for n,v in pairs(x) do
84
+ assert(type(n) == "string")
85
+ eqcharset(v, m[n])
86
+ end
87
+ end
88
+
89
+
90
+ assert(m.match(3, "aaaa"))
91
+ assert(m.match(4, "aaaa"))
92
+ assert(not m.match(5, "aaaa"))
93
+ assert(m.match(-3, "aa"))
94
+ assert(not m.match(-3, "aaa"))
95
+ assert(not m.match(-3, "aaaa"))
96
+ assert(not m.match(-4, "aaaa"))
97
+ assert(m.P(-5):match"aaaa")
98
+
99
+ assert(m.match("a", "alo") == 2)
100
+ assert(m.match("al", "alo") == 3)
101
+ assert(not m.match("alu", "alo"))
102
+ assert(m.match(true, "") == 1)
103
+
104
+ local digit = m.S"0123456789"
105
+ local upper = m.S"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
106
+ local lower = m.S"abcdefghijklmnopqrstuvwxyz"
107
+ local letter = m.S"" + upper + lower
108
+ local alpha = letter + digit + m.R()
109
+
110
+ eqcharset(m.S"", m.P(false))
111
+ eqcharset(upper, m.R("AZ"))
112
+ eqcharset(lower, m.R("az"))
113
+ eqcharset(upper + lower, m.R("AZ", "az"))
114
+ eqcharset(upper + lower, m.R("AZ", "cz", "aa", "bb", "90"))
115
+ eqcharset(digit, m.S"01234567" + "8" + "9")
116
+ eqcharset(upper, letter - lower)
117
+ eqcharset(m.S(""), m.R())
118
+ assert(cs2str(m.S("")) == "")
119
+
120
+ eqcharset(m.S"\0", "\0")
121
+ eqcharset(m.S"\1\0\2", m.R"\0\2")
122
+ eqcharset(m.S"\1\0\2", m.R"\1\2" + "\0")
123
+ eqcharset(m.S"\1\0\2" - "\0", m.R"\1\2")
124
+
125
+ local word = alpha^1 * (1 - alpha)^0
126
+
127
+ assert((word^0 * -1):match"alo alo")
128
+ assert(m.match(word^1 * -1, "alo alo"))
129
+ assert(m.match(word^2 * -1, "alo alo"))
130
+ assert(not m.match(word^3 * -1, "alo alo"))
131
+
132
+ assert(not m.match(word^-1 * -1, "alo alo"))
133
+ assert(m.match(word^-2 * -1, "alo alo"))
134
+ assert(m.match(word^-3 * -1, "alo alo"))
135
+
136
+ local eos = m.P(-1)
137
+
138
+ assert(m.match(digit^0 * letter * digit * eos, "1298a1"))
139
+ assert(not m.match(digit^0 * letter * eos, "1257a1"))
140
+
141
+ b = {
142
+ [1] = "(" * (((1 - m.S"()") + #m.P"(" * m.V(1))^0) * ")"
143
+ }
144
+
145
+ assert(m.match(b, "(al())()"))
146
+ assert(not m.match(b * eos, "(al())()"))
147
+ assert(m.match(b * eos, "((al())()(é))"))
148
+ assert(not m.match(b, "(al()()"))
149
+
150
+ assert(not m.match(letter^1 - "for", "foreach"))
151
+ assert(m.match(letter^1 - ("for" * eos), "foreach"))
152
+ assert(not m.match(letter^1 - ("for" * eos), "for"))
153
+
154
+ function basiclookfor (p)
155
+ return m.P {
156
+ [1] = p + (1 * m.V(1))
157
+ }
158
+ end
159
+
160
+ function caplookfor (p)
161
+ return basiclookfor(p:C())
162
+ end
163
+
164
+ assert(m.match(caplookfor(letter^1), " 4achou123...") == "achou")
165
+ a = {m.match(caplookfor(letter^1)^0, " two words, one more ")}
166
+ checkeq(a, {"two", "words", "one", "more"})
167
+
168
+ assert(m.match( basiclookfor((#m.P(b) * 1) * m.Cp()), " ( (a)") == 7)
169
+
170
+ a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "123")}
171
+ checkeq(a, {"123", "d"})
172
+
173
+ -- bug in LPeg 0.12 (nil value does not create a 'ktable')
174
+ assert(m.match(m.Cc(nil), "") == nil)
175
+
176
+ a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "abcd")}
177
+ checkeq(a, {"abcd", "l"})
178
+
179
+ a = {m.match(m.Cc(10,20,30) * 'a' * m.Cp(), 'aaa')}
180
+ checkeq(a, {10,20,30,2})
181
+ a = {m.match(m.Cp() * m.Cc(10,20,30) * 'a' * m.Cp(), 'aaa')}
182
+ checkeq(a, {1,10,20,30,2})
183
+ a = m.match(m.Ct(m.Cp() * m.Cc(10,20,30) * 'a' * m.Cp()), 'aaa')
184
+ checkeq(a, {1,10,20,30,2})
185
+ a = m.match(m.Ct(m.Cp() * m.Cc(7,8) * m.Cc(10,20,30) * 'a' * m.Cp()), 'aaa')
186
+ checkeq(a, {1,7,8,10,20,30,2})
187
+ a = {m.match(m.Cc() * m.Cc() * m.Cc(1) * m.Cc(2,3,4) * m.Cc() * 'a', 'aaa')}
188
+ checkeq(a, {1,2,3,4})
189
+
190
+ a = {m.match(m.Cp() * letter^1 * m.Cp(), "abcd")}
191
+ checkeq(a, {1, 5})
192
+
193
+
194
+ t = {m.match({[1] = m.C(m.C(1) * m.V(1) + -1)}, "abc")}
195
+ checkeq(t, {"abc", "a", "bc", "b", "c", "c", ""})
196
+
197
+ -- bug in 0.12 ('hascapture' did not check for captures inside a rule)
198
+ do
199
+ local pat = m.P{
200
+ 'S';
201
+ S1 = m.C('abc') + 3,
202
+ S = #m.V('S1') -- rule has capture, but '#' must ignore it
203
+ }
204
+ assert(pat:match'abc' == 1)
205
+ end
206
+
207
+
208
+ -- test for small capture boundary
209
+ for i = 250,260 do
210
+ assert(#m.match(m.C(i), string.rep('a', i)) == i)
211
+ assert(#m.match(m.C(m.C(i)), string.rep('a', i)) == i)
212
+ end
213
+
214
+ -- tests for any*n and any*-n
215
+ for n = 1, 550, 13 do
216
+ local x_1 = string.rep('x', n - 1)
217
+ local x = x_1 .. 'a'
218
+ assert(not m.P(n):match(x_1))
219
+ assert(m.P(n):match(x) == n + 1)
220
+ assert(n < 4 or m.match(m.P(n) + "xxx", x_1) == 4)
221
+ assert(m.C(n):match(x) == x)
222
+ assert(m.C(m.C(n)):match(x) == x)
223
+ assert(m.P(-n):match(x_1) == 1)
224
+ assert(not m.P(-n):match(x))
225
+ assert(n < 13 or m.match(m.Cc(20) * ((n - 13) * m.P(10)) * 3, x) == 20)
226
+ local n3 = math.floor(n/3)
227
+ assert(m.match(n3 * m.Cp() * n3 * n3, x) == n3 + 1)
228
+ end
229
+
230
+ -- true values
231
+ assert(m.P(0):match("x") == 1)
232
+ assert(m.P(0):match("") == 1)
233
+ assert(m.C(0):match("x") == "")
234
+
235
+ assert(m.match(m.Cc(0) * m.P(10) + m.Cc(1) * "xuxu", "xuxu") == 1)
236
+ assert(m.match(m.Cc(0) * m.P(10) + m.Cc(1) * "xuxu", "xuxuxuxuxu") == 0)
237
+ assert(m.match(m.C(m.P(2)^1), "abcde") == "abcd")
238
+ p = m.Cc(0) * 1 + m.Cc(1) * 2 + m.Cc(2) * 3 + m.Cc(3) * 4
239
+
240
+
241
+ -- test for alternation optimization
242
+ assert(m.match(m.P"a"^1 + "ab" + m.P"x"^0, "ab") == 2)
243
+ assert(m.match((m.P"a"^1 + "ab" + m.P"x"^0 * 1)^0, "ab") == 3)
244
+ assert(m.match(m.P"ab" + "cd" + "" + "cy" + "ak", "98") == 1)
245
+ assert(m.match(m.P"ab" + "cd" + "ax" + "cy", "ax") == 3)
246
+ assert(m.match("a" * m.P"b"^0 * "c" + "cd" + "ax" + "cy", "ax") == 3)
247
+ assert(m.match((m.P"ab" + "cd" + "ax" + "cy")^0, "ax") == 3)
248
+ assert(m.match(m.P(1) * "x" + m.S"" * "xu" + "ay", "ay") == 3)
249
+ assert(m.match(m.P"abc" + "cde" + "aka", "aka") == 4)
250
+ assert(m.match(m.S"abc" * "x" + "cde" + "aka", "ax") == 3)
251
+ assert(m.match(m.S"abc" * "x" + "cde" + "aka", "aka") == 4)
252
+ assert(m.match(m.S"abc" * "x" + "cde" + "aka", "cde") == 4)
253
+ assert(m.match(m.S"abc" * "x" + "ide" + m.S"ab" * "ka", "aka") == 4)
254
+ assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "ax") == 3)
255
+ assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "aka") == 4)
256
+ assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "cde") == 4)
257
+ assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "ide" + m.S"ab" * "ka", "aka") == 4)
258
+ assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "ide" + m.S"ab" * "ka", "ax") == 3)
259
+ assert(m.match(m.P(1) * "x" + "cde" + m.S"ab" * "ka", "aka") == 4)
260
+ assert(m.match(m.P(1) * "x" + "cde" + m.P(1) * "ka", "aka") == 4)
261
+ assert(m.match(m.P(1) * "x" + "cde" + m.P(1) * "ka", "cde") == 4)
262
+ assert(m.match(m.P"eb" + "cd" + m.P"e"^0 + "x", "ee") == 3)
263
+ assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "abcd") == 3)
264
+ assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "eeex") == 4)
265
+ assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "cd") == 3)
266
+ assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "x") == 1)
267
+ assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x" + "", "zee") == 1)
268
+ assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "abcd") == 3)
269
+ assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "eeex") == 4)
270
+ assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "cd") == 3)
271
+ assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "x") == 2)
272
+ assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x" + "", "zee") == 1)
273
+ assert(not m.match(("aa" * m.P"bc"^-1 + "aab") * "e", "aabe"))
274
+
275
+ assert(m.match("alo" * (m.P"\n" + -1), "alo") == 4)
276
+
277
+
278
+ -- bug in 0.12 (rc1)
279
+ assert(m.match((m.P"\128\187\191" + m.S"abc")^0, "\128\187\191") == 4)
280
+
281
+ assert(m.match(m.S"\0\128\255\127"^0, string.rep("\0\128\255\127", 10)) ==
282
+ 4*10 + 1)
283
+
284
+ -- optimizations with optional parts
285
+ assert(m.match(("ab" * -m.P"c")^-1, "abc") == 1)
286
+ assert(m.match(("ab" * #m.P"c")^-1, "abd") == 1)
287
+ assert(m.match(("ab" * m.B"c")^-1, "ab") == 1)
288
+ assert(m.match(("ab" * m.P"cd"^0)^-1, "abcdcdc") == 7)
289
+
290
+ assert(m.match(m.P"ab"^-1 - "c", "abcd") == 3)
291
+
292
+ p = ('Aa' * ('Bb' * ('Cc' * m.P'Dd'^0)^0)^0)^-1
293
+ assert(p:match("AaBbCcDdBbCcDdDdDdBb") == 21)
294
+
295
+
296
+ pi = "3.14159 26535 89793 23846 26433 83279 50288 41971 69399 37510"
297
+ assert(m.match(m.Cs((m.P"1" / "a" + m.P"5" / "b" + m.P"9" / "c" + 1)^0), pi) ==
298
+ m.match(m.Cs((m.P(1) / {["1"] = "a", ["5"] = "b", ["9"] = "c"})^0), pi))
299
+ print"+"
300
+
301
+
302
+ -- tests for capture optimizations
303
+ assert(m.match((m.P(3) + 4 * m.Cp()) * "a", "abca") == 5)
304
+ t = {m.match(((m.P"a" + m.Cp()) * m.P"x")^0, "axxaxx")}
305
+ checkeq(t, {3, 6})
306
+
307
+
308
+ -- tests for numbered captures
309
+ p = m.C(1)
310
+ assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 3, "abcdefgh") == "a")
311
+ assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 1, "abcdefgh") == "abcdef")
312
+ assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 4, "abcdefgh") == "bc")
313
+ assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 0, "abcdefgh") == 7)
314
+
315
+ a, b, c = m.match(p * (m.C(p * m.C(2)) * m.C(3) / 4) * p, "abcdefgh")
316
+ assert(a == "a" and b == "efg" and c == "h")
317
+
318
+ -- test for table captures
319
+ t = m.match(m.Ct(letter^1), "alo")
320
+ checkeq(t, {})
321
+
322
+ t, n = m.match(m.Ct(m.C(letter)^1) * m.Cc"t", "alo")
323
+ assert(n == "t" and table.concat(t) == "alo")
324
+
325
+ t = m.match(m.Ct(m.C(m.C(letter)^1)), "alo")
326
+ assert(table.concat(t, ";") == "alo;a;l;o")
327
+
328
+ t = m.match(m.Ct(m.C(m.C(letter)^1)), "alo")
329
+ assert(table.concat(t, ";") == "alo;a;l;o")
330
+
331
+ t = m.match(m.Ct(m.Ct((m.Cp() * letter * m.Cp())^1)), "alo")
332
+ assert(table.concat(t[1], ";") == "1;2;2;3;3;4")
333
+
334
+ t = m.match(m.Ct(m.C(m.C(1) * 1 * m.C(1))), "alo")
335
+ checkeq(t, {"alo", "a", "o"})
336
+
337
+
338
+ -- tests for groups
339
+ p = m.Cg(1) -- no capture
340
+ assert(p:match('x') == 'x')
341
+ p = m.Cg(m.P(true)/function () end * 1) -- no value
342
+ assert(p:match('x') == 'x')
343
+ p = m.Cg(m.Cg(m.Cg(m.C(1))))
344
+ assert(p:match('x') == 'x')
345
+ p = m.Cg(m.Cg(m.Cg(m.C(1))^0) * m.Cg(m.Cc(1) * m.Cc(2)))
346
+ t = {p:match'abc'}
347
+ checkeq(t, {'a', 'b', 'c', 1, 2})
348
+
349
+ p = m.Ct(m.Cg(m.Cc(10), "hi") * m.C(1)^0 * m.Cg(m.Cc(20), "ho"))
350
+ t = p:match''
351
+ checkeq(t, {hi = 10, ho = 20})
352
+ t = p:match'abc'
353
+ checkeq(t, {hi = 10, ho = 20, 'a', 'b', 'c'})
354
+
355
+
356
+ -- test for error messages
357
+ local function checkerr (msg, f, ...)
358
+ local st, err = pcall(f, ...)
359
+ assert(not st and m.match({ m.P(msg) + 1 * m.V(1) }, err))
360
+ end
361
+
362
+ checkerr("rule '1' may be left recursive", m.match, { m.V(1) * 'a' }, "a")
363
+ checkerr("rule '1' used outside a grammar", m.match, m.V(1), "")
364
+ checkerr("rule 'hiii' used outside a grammar", m.match, m.V('hiii'), "")
365
+ checkerr("rule 'hiii' undefined in given grammar", m.match, { m.V('hiii') }, "")
366
+ checkerr("undefined in given grammar", m.match, { m.V{} }, "")
367
+
368
+ checkerr("rule 'A' is not a pattern", m.P, { m.P(1), A = {} })
369
+ checkerr("grammar has no initial rule", m.P, { [print] = {} })
370
+
371
+ -- grammar with a long call chain before left recursion
372
+ p = {'a',
373
+ a = m.V'b' * m.V'c' * m.V'd' * m.V'a',
374
+ b = m.V'c',
375
+ c = m.V'd',
376
+ d = m.V'e',
377
+ e = m.V'f',
378
+ f = m.V'g',
379
+ g = m.P''
380
+ }
381
+ checkerr("rule 'a' may be left recursive", m.match, p, "a")
382
+
383
+ -- Bug in peephole optimization of LPeg 0.12 (IJmp -> ICommit)
384
+ -- the next grammar has an original sequence IJmp -> ICommit -> IJmp L1
385
+ -- that is optimized to ICommit L1
386
+
387
+ p = m.P { (m.P {m.P'abc'} + 'ayz') * m.V'y'; y = m.P'x' }
388
+ assert(p:match('abcx') == 5 and p:match('ayzx') == 5 and not p:match'abc')
389
+
390
+
391
+ -- tests for non-pattern as arguments to pattern functions
392
+
393
+ p = { ('a' * m.V(1))^-1 } * m.P'b' * { 'a' * m.V(2); m.V(1)^-1 }
394
+ assert(m.match(p, "aaabaac") == 7)
395
+
396
+ p = m.P'abc' * 2 * -5 * true * 'de' -- mix of numbers and strings and booleans
397
+
398
+ assert(p:match("abc01de") == 8)
399
+ assert(p:match("abc01de3456") == nil)
400
+
401
+ p = 'abc' * (2 * (-5 * (true * m.P'de')))
402
+
403
+ assert(p:match("abc01de") == 8)
404
+ assert(p:match("abc01de3456") == nil)
405
+
406
+ p = { m.V(2), m.P"abc" } *
407
+ (m.P{ "xx", xx = m.P"xx" } + { "x", x = m.P"a" * m.V"x" + "" })
408
+ assert(p:match("abcaaaxx") == 7)
409
+ assert(p:match("abcxx") == 6)
410
+
411
+
412
+ -- a large table capture
413
+ t = m.match(m.Ct(m.C('a')^0), string.rep("a", 10000))
414
+ assert(#t == 10000 and t[1] == 'a' and t[#t] == 'a')
415
+
416
+ print('+')
417
+
418
+
419
+ -- bug in 0.10 (rechecking a grammar, after tail-call optimization)
420
+ m.P{ m.P { (m.P(3) + "xuxu")^0 * m.V"xuxu", xuxu = m.P(1) } }
421
+
422
+ local V = m.V
423
+
424
+ local Space = m.S(" \n\t")^0
425
+ local Number = m.C(m.R("09")^1) * Space
426
+ local FactorOp = m.C(m.S("+-")) * Space
427
+ local TermOp = m.C(m.S("*/")) * Space
428
+ local Open = "(" * Space
429
+ local Close = ")" * Space
430
+
431
+
432
+ local function f_factor (v1, op, v2, d)
433
+ assert(d == nil)
434
+ if op == "+" then return v1 + v2
435
+ else return v1 - v2
436
+ end
437
+ end
438
+
439
+
440
+ local function f_term (v1, op, v2, d)
441
+ assert(d == nil)
442
+ if op == "*" then return v1 * v2
443
+ else return v1 / v2
444
+ end
445
+ end
446
+
447
+ G = m.P{ "Exp",
448
+ Exp = m.Cf(V"Factor" * m.Cg(FactorOp * V"Factor")^0, f_factor);
449
+ Factor = m.Cf(V"Term" * m.Cg(TermOp * V"Term")^0, f_term);
450
+ Term = Number / tonumber + Open * V"Exp" * Close;
451
+ }
452
+
453
+ G = Space * G * -1
454
+
455
+ for _, s in ipairs{" 3 + 5*9 / (1+1) ", "3+4/2", "3+3-3- 9*2+3*9/1- 8"} do
456
+ assert(m.match(G, s) == loadstring("return "..s)())
457
+ end
458
+
459
+
460
+ -- test for grammars (errors deep in calling non-terminals)
461
+ g = m.P{
462
+ [1] = m.V(2) + "a",
463
+ [2] = "a" * m.V(3) * "x",
464
+ [3] = "b" * m.V(3) + "c"
465
+ }
466
+
467
+ assert(m.match(g, "abbbcx") == 7)
468
+ assert(m.match(g, "abbbbx") == 2)
469
+
470
+
471
+ -- tests for \0
472
+ assert(m.match(m.R("\0\1")^1, "\0\1\0") == 4)
473
+ assert(m.match(m.S("\0\1ab")^1, "\0\1\0a") == 5)
474
+ assert(m.match(m.P(1)^3, "\0\1\0a") == 5)
475
+ assert(not m.match(-4, "\0\1\0a"))
476
+ assert(m.match("\0\1\0a", "\0\1\0a") == 5)
477
+ assert(m.match("\0\0\0", "\0\0\0") == 4)
478
+ assert(not m.match("\0\0\0", "\0\0"))
479
+
480
+
481
+ -- tests for predicates
482
+ assert(not m.match(-m.P("a") * 2, "alo"))
483
+ assert(m.match(- -m.P("a") * 2, "alo") == 3)
484
+ assert(m.match(#m.P("a") * 2, "alo") == 3)
485
+ assert(m.match(##m.P("a") * 2, "alo") == 3)
486
+ assert(not m.match(##m.P("c") * 2, "alo"))
487
+ assert(m.match(m.Cs((##m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
488
+ assert(m.match(m.Cs((#((#m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
489
+ assert(m.match(m.Cs((- -m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
490
+ assert(m.match(m.Cs((-((-m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
491
+
492
+ p = -m.P'a' * m.Cc(1) + -m.P'b' * m.Cc(2) + -m.P'c' * m.Cc(3)
493
+ assert(p:match('a') == 2 and p:match('') == 1 and p:match('b') == 1)
494
+
495
+ p = -m.P'a' * m.Cc(10) + #m.P'a' * m.Cc(20)
496
+ assert(p:match('a') == 20 and p:match('') == 10 and p:match('b') == 10)
497
+
498
+
499
+
500
+ -- look-behind predicate
501
+ assert(not m.match(m.B'a', 'a'))
502
+ assert(m.match(1 * m.B'a', 'a') == 2)
503
+ assert(not m.match(m.B(1), 'a'))
504
+ assert(m.match(1 * m.B(1), 'a') == 2)
505
+ assert(m.match(-m.B(1), 'a') == 1)
506
+ assert(m.match(m.B(250), string.rep('a', 250)) == nil)
507
+ assert(m.match(250 * m.B(250), string.rep('a', 250)) == 251)
508
+
509
+ -- look-behind with an open call
510
+ checkerr("pattern may not have fixed length", m.B, m.V'S1')
511
+ checkerr("too long to look behind", m.B, 260)
512
+
513
+ B = #letter * -m.B(letter) + -letter * m.B(letter)
514
+ x = m.Ct({ (B * m.Cp())^-1 * (1 * m.V(1) + m.P(true)) })
515
+ checkeq(m.match(x, 'ar cal c'), {1,3,4,7,9,10})
516
+ checkeq(m.match(x, ' ar cal '), {2,4,5,8})
517
+ checkeq(m.match(x, ' '), {})
518
+ checkeq(m.match(x, 'aloalo'), {1,7})
519
+
520
+ assert(m.match(B, "a") == 1)
521
+ assert(m.match(1 * B, "a") == 2)
522
+ assert(not m.B(1 - letter):match(""))
523
+ assert((-m.B(letter)):match("") == 1)
524
+
525
+ assert((4 * m.B(letter, 4)):match("aaaaaaaa") == 5)
526
+ assert(not (4 * m.B(#letter * 5)):match("aaaaaaaa"))
527
+ assert((4 * -m.B(#letter * 5)):match("aaaaaaaa") == 5)
528
+
529
+ -- look-behind with grammars
530
+ assert(m.match('a' * m.B{'x', x = m.P(3)}, 'aaa') == nil)
531
+ assert(m.match('aa' * m.B{'x', x = m.P('aaa')}, 'aaaa') == nil)
532
+ assert(m.match('aaa' * m.B{'x', x = m.P('aaa')}, 'aaaaa') == 4)
533
+
534
+
535
+
536
+ -- bug in 0.9
537
+ assert(m.match(('a' * #m.P'b'), "ab") == 2)
538
+ assert(not m.match(('a' * #m.P'b'), "a"))
539
+
540
+ assert(not m.match(#m.S'567', ""))
541
+ assert(m.match(#m.S'567' * 1, "6") == 2)
542
+
543
+
544
+ -- tests for Tail Calls
545
+
546
+ p = m.P{ 'a' * m.V(1) + '' }
547
+ assert(p:match(string.rep('a', 1000)) == 1001)
548
+
549
+ -- create a grammar for a simple DFA for even number of 0s and 1s
550
+ --
551
+ -- ->1 <---0---> 2
552
+ -- ^ ^
553
+ -- | |
554
+ -- 1 1
555
+ -- | |
556
+ -- V V
557
+ -- 3 <---0---> 4
558
+ --
559
+ -- this grammar should keep no backtracking information
560
+
561
+ p = m.P{
562
+ [1] = '0' * m.V(2) + '1' * m.V(3) + -1,
563
+ [2] = '0' * m.V(1) + '1' * m.V(4),
564
+ [3] = '0' * m.V(4) + '1' * m.V(1),
565
+ [4] = '0' * m.V(3) + '1' * m.V(2),
566
+ }
567
+
568
+ assert(p:match(string.rep("00", 10000)))
569
+ assert(p:match(string.rep("01", 10000)))
570
+ assert(p:match(string.rep("011", 10000)))
571
+ assert(not p:match(string.rep("011", 10000) .. "1"))
572
+ assert(not p:match(string.rep("011", 10001)))
573
+
574
+
575
+ -- this grammar does need backtracking info.
576
+ local lim = 10000
577
+ p = m.P{ '0' * m.V(1) + '0' }
578
+ checkerr("too many pending", m.match, p, string.rep("0", lim))
579
+ m.setmaxstack(2*lim)
580
+ checkerr("too many pending", m.match, p, string.rep("0", lim))
581
+ m.setmaxstack(2*lim + 4)
582
+ assert(m.match(p, string.rep("0", lim)) == lim + 1)
583
+
584
+ -- this repetition should not need stack space (only the call does)
585
+ p = m.P{ ('a' * m.V(1))^0 * 'b' + 'c' }
586
+ m.setmaxstack(200)
587
+ assert(p:match(string.rep('a', 180) .. 'c' .. string.rep('b', 180)) == 362)
588
+
589
+ m.setmaxstack(5) -- restore original limit
590
+
591
+ -- tests for optional start position
592
+ assert(m.match("a", "abc", 1))
593
+ assert(m.match("b", "abc", 2))
594
+ assert(m.match("c", "abc", 3))
595
+ assert(not m.match(1, "abc", 4))
596
+ assert(m.match("a", "abc", -3))
597
+ assert(m.match("b", "abc", -2))
598
+ assert(m.match("c", "abc", -1))
599
+ assert(m.match("abc", "abc", -4)) -- truncate to position 1
600
+
601
+ assert(m.match("", "abc", 10)) -- empty string is everywhere!
602
+ assert(m.match("", "", 10))
603
+ assert(not m.match(1, "", 1))
604
+ assert(not m.match(1, "", -1))
605
+ assert(not m.match(1, "", 0))
606
+
607
+ print("+")
608
+
609
+
610
+ -- tests for argument captures
611
+ checkerr("invalid argument", m.Carg, 0)
612
+ checkerr("invalid argument", m.Carg, -1)
613
+ checkerr("invalid argument", m.Carg, 2^18)
614
+ checkerr("absent extra argument #1", m.match, m.Carg(1), 'a', 1)
615
+ assert(m.match(m.Carg(1), 'a', 1, print) == print)
616
+ x = {m.match(m.Carg(1) * m.Carg(2), '', 1, 10, 20)}
617
+ checkeq(x, {10, 20})
618
+
619
+ assert(m.match(m.Cmt(m.Cg(m.Carg(3), "a") *
620
+ m.Cmt(m.Cb("a"), function (s,i,x)
621
+ assert(s == "a" and i == 1);
622
+ return i, x+1
623
+ end) *
624
+ m.Carg(2), function (s,i,a,b,c)
625
+ assert(s == "a" and i == 1 and c == nil);
626
+ return i, 2*a + 3*b
627
+ end) * "a",
628
+ "a", 1, false, 100, 1000) == 2*1001 + 3*100)
629
+
630
+
631
+ -- tests for Lua functions
632
+
633
+ t = {}
634
+ s = ""
635
+ p = m.P(function (s1, i) assert(s == s1); t[#t + 1] = i; return nil end) * false
636
+ s = "hi, this is a test"
637
+ assert(m.match(((p - m.P(-1)) + 2)^0, s) == string.len(s) + 1)
638
+ assert(#t == string.len(s)/2 and t[1] == 1 and t[2] == 3)
639
+
640
+ assert(not m.match(p, s))
641
+
642
+ p = mt.__add(function (s, i) return i end, function (s, i) return nil end)
643
+ assert(m.match(p, "alo"))
644
+
645
+ p = mt.__mul(function (s, i) return i end, function (s, i) return nil end)
646
+ assert(not m.match(p, "alo"))
647
+
648
+
649
+ t = {}
650
+ p = function (s1, i) assert(s == s1); t[#t + 1] = i; return i end
651
+ s = "hi, this is a test"
652
+ assert(m.match((m.P(1) * p)^0, s) == string.len(s) + 1)
653
+ assert(#t == string.len(s) and t[1] == 2 and t[2] == 3)
654
+
655
+ t = {}
656
+ p = m.P(function (s1, i) assert(s == s1); t[#t + 1] = i;
657
+ return i <= s1:len() and i end) * 1
658
+ s = "hi, this is a test"
659
+ assert(m.match(p^0, s) == string.len(s) + 1)
660
+ assert(#t == string.len(s) + 1 and t[1] == 1 and t[2] == 2)
661
+
662
+ p = function (s1, i) return m.match(m.P"a"^1, s1, i) end
663
+ assert(m.match(p, "aaaa") == 5)
664
+ assert(m.match(p, "abaa") == 2)
665
+ assert(not m.match(p, "baaa"))
666
+
667
+ checkerr("invalid position", m.match, function () return 2^20 end, s)
668
+ checkerr("invalid position", m.match, function () return 0 end, s)
669
+ checkerr("invalid position", m.match, function (s, i) return i - 1 end, s)
670
+ checkerr("invalid position", m.match,
671
+ m.P(1)^0 * function (_, i) return i - 1 end, s)
672
+ assert(m.match(m.P(1)^0 * function (_, i) return i end * -1, s))
673
+ checkerr("invalid position", m.match,
674
+ m.P(1)^0 * function (_, i) return i + 1 end, s)
675
+ assert(m.match(m.P(function (s, i) return s:len() + 1 end) * -1, s))
676
+ checkerr("invalid position", m.match, m.P(function (s, i) return s:len() + 2 end) * -1, s)
677
+ assert(not m.match(m.P(function (s, i) return s:len() end) * -1, s))
678
+ assert(m.match(m.P(1)^0 * function (_, i) return true end, s) ==
679
+ string.len(s) + 1)
680
+ for i = 1, string.len(s) + 1 do
681
+ assert(m.match(function (_, _) return i end, s) == i)
682
+ end
683
+
684
+ p = (m.P(function (s, i) return i%2 == 0 and i end) * 1
685
+ + m.P(function (s, i) return i%2 ~= 0 and i + 2 <= s:len() and i end) * 3)^0
686
+ * -1
687
+ assert(p:match(string.rep('a', 14000)))
688
+
689
+ -- tests for Function Replacements
690
+ f = function (a, ...) if a ~= "x" then return {a, ...} end end
691
+
692
+ t = m.match(m.C(1)^0/f, "abc")
693
+ checkeq(t, {"a", "b", "c"})
694
+
695
+ t = m.match(m.C(1)^0/f/f, "abc")
696
+ checkeq(t, {{"a", "b", "c"}})
697
+
698
+ t = m.match(m.P(1)^0/f/f, "abc") -- no capture
699
+ checkeq(t, {{"abc"}})
700
+
701
+ t = m.match((m.P(1)^0/f * m.Cp())/f, "abc")
702
+ checkeq(t, {{"abc"}, 4})
703
+
704
+ t = m.match((m.C(1)^0/f * m.Cp())/f, "abc")
705
+ checkeq(t, {{"a", "b", "c"}, 4})
706
+
707
+ t = m.match((m.C(1)^0/f * m.Cp())/f, "xbc")
708
+ checkeq(t, {4})
709
+
710
+ t = m.match(m.C(m.C(1)^0)/f, "abc")
711
+ checkeq(t, {"abc", "a", "b", "c"})
712
+
713
+ g = function (...) return 1, ... end
714
+ t = {m.match(m.C(1)^0/g/g, "abc")}
715
+ checkeq(t, {1, 1, "a", "b", "c"})
716
+
717
+ t = {m.match(m.Cc(nil,nil,4) * m.Cc(nil,3) * m.Cc(nil, nil) / g / g, "")}
718
+ t1 = {1,1,nil,nil,4,nil,3,nil,nil}
719
+ for i=1,10 do assert(t[i] == t1[i]) end
720
+
721
+ t = {m.match((m.C(1) / function (x) return x, x.."x" end)^0, "abc")}
722
+ checkeq(t, {"a", "ax", "b", "bx", "c", "cx"})
723
+
724
+ t = m.match(m.Ct((m.C(1) / function (x,y) return y, x end * m.Cc(1))^0), "abc")
725
+ checkeq(t, {nil, "a", 1, nil, "b", 1, nil, "c", 1})
726
+
727
+ -- tests for Query Replacements
728
+
729
+ assert(m.match(m.C(m.C(1)^0)/{abc = 10}, "abc") == 10)
730
+ assert(m.match(m.C(1)^0/{a = 10}, "abc") == 10)
731
+ assert(m.match(m.S("ba")^0/{ab = 40}, "abc") == 40)
732
+ t = m.match(m.Ct((m.S("ba")/{a = 40})^0), "abc")
733
+ checkeq(t, {40})
734
+
735
+ assert(m.match(m.Cs((m.C(1)/{a=".", d=".."})^0), "abcdde") == ".bc....e")
736
+ assert(m.match(m.Cs((m.C(1)/{f="."})^0), "abcdde") == "abcdde")
737
+ assert(m.match(m.Cs((m.C(1)/{d="."})^0), "abcdde") == "abc..e")
738
+ assert(m.match(m.Cs((m.C(1)/{e="."})^0), "abcdde") == "abcdd.")
739
+ assert(m.match(m.Cs((m.C(1)/{e=".", f="+"})^0), "eefef") == "..+.+")
740
+ assert(m.match(m.Cs((m.C(1))^0), "abcdde") == "abcdde")
741
+ assert(m.match(m.Cs(m.C(m.C(1)^0)), "abcdde") == "abcdde")
742
+ assert(m.match(1 * m.Cs(m.P(1)^0), "abcdde") == "bcdde")
743
+ assert(m.match(m.Cs((m.C('0')/'x' + 1)^0), "abcdde") == "abcdde")
744
+ assert(m.match(m.Cs((m.C('0')/'x' + 1)^0), "0ab0b0") == "xabxbx")
745
+ assert(m.match(m.Cs((m.C('0')/'x' + m.P(1)/{b=3})^0), "b0a0b") == "3xax3")
746
+ assert(m.match(m.P(1)/'%0%0'/{aa = -3} * 'x', 'ax') == -3)
747
+ assert(m.match(m.C(1)/'%0%1'/{aa = 'z'}/{z = -3} * 'x', 'ax') == -3)
748
+
749
+ assert(m.match(m.Cs(m.Cc(0) * (m.P(1)/"")), "4321") == "0")
750
+
751
+ assert(m.match(m.Cs((m.P(1) / "%0")^0), "abcd") == "abcd")
752
+ assert(m.match(m.Cs((m.P(1) / "%0.%0")^0), "abcd") == "a.ab.bc.cd.d")
753
+ assert(m.match(m.Cs((m.P("a") / "%0.%0" + 1)^0), "abcad") == "a.abca.ad")
754
+ assert(m.match(m.C("a") / "%1%%%0", "a") == "a%a")
755
+ assert(m.match(m.Cs((m.P(1) / ".xx")^0), "abcd") == ".xx.xx.xx.xx")
756
+ assert(m.match(m.Cp() * m.P(3) * m.Cp()/"%2%1%1 - %0 ", "abcde") ==
757
+ "411 - abc ")
758
+
759
+ assert(m.match(m.P(1)/"%0", "abc") == "a")
760
+ checkerr("invalid capture index", m.match, m.P(1)/"%1", "abc")
761
+ checkerr("invalid capture index", m.match, m.P(1)/"%9", "abc")
762
+
763
+ p = m.C(1)
764
+ p = p * p; p = p * p; p = p * p * m.C(1) / "%9 - %1"
765
+ assert(p:match("1234567890") == "9 - 1")
766
+
767
+ assert(m.match(m.Cc(print), "") == print)
768
+
769
+ -- too many captures (just ignore extra ones)
770
+ p = m.C(1)^0 / "%2-%9-%0-%9"
771
+ assert(p:match"01234567890123456789" == "1-8-01234567890123456789-8")
772
+ s = string.rep("12345678901234567890", 20)
773
+ assert(m.match(m.C(1)^0 / "%9-%1-%0-%3", s) == "9-1-" .. s .. "-3")
774
+
775
+ -- string captures with non-string subcaptures
776
+ p = m.Cc('alo') * m.C(1) / "%1 - %2 - %1"
777
+ assert(p:match'x' == 'alo - x - alo')
778
+
779
+ checkerr("invalid capture value (a boolean)", m.match, m.Cc(true) / "%1", "a")
780
+
781
+ -- long strings for string capture
782
+ l = 10000
783
+ s = string.rep('a', l) .. string.rep('b', l) .. string.rep('c', l)
784
+
785
+ p = (m.C(m.P'a'^1) * m.C(m.P'b'^1) * m.C(m.P'c'^1)) / '%3%2%1'
786
+
787
+ assert(p:match(s) == string.rep('c', l) ..
788
+ string.rep('b', l) ..
789
+ string.rep('a', l))
790
+
791
+ print"+"
792
+
793
+ -- accumulator capture
794
+ function f (x) return x + 1 end
795
+ assert(m.match(m.Cf(m.Cc(0) * m.C(1)^0, f), "alo alo") == 7)
796
+
797
+ t = {m.match(m.Cf(m.Cc(1,2,3), error), "")}
798
+ checkeq(t, {1})
799
+ p = m.Cf(m.Ct(true) * m.Cg(m.C(m.R"az"^1) * "=" * m.C(m.R"az"^1) * ";")^0,
800
+ rawset)
801
+ t = p:match("a=b;c=du;xux=yuy;")
802
+ checkeq(t, {a="b", c="du", xux="yuy"})
803
+
804
+
805
+ -- errors in accumulator capture
806
+
807
+ -- no initial capture
808
+ checkerr("no initial value", m.match, m.Cf(m.P(5), print), 'aaaaaa')
809
+ -- no initial capture (very long match forces fold to be a pair open-close)
810
+ checkerr("no initial value", m.match, m.Cf(m.P(500), print),
811
+ string.rep('a', 600))
812
+
813
+ -- nested capture produces no initial value
814
+ checkerr("no initial value", m.match, m.Cf(m.P(1) / {}, print), "alo")
815
+
816
+
817
+ -- tests for loop checker
818
+
819
+ local function isnullable (p)
820
+ checkerr("may accept empty string", function (p) return p^0 end, m.P(p))
821
+ end
822
+
823
+ isnullable(m.P("x")^-4)
824
+ assert(m.match(((m.P(0) + 1) * m.S"al")^0, "alo") == 3)
825
+ assert(m.match((("x" + #m.P(1))^-4 * m.S"al")^0, "alo") == 3)
826
+ isnullable("")
827
+ isnullable(m.P("x")^0)
828
+ isnullable(m.P("x")^-1)
829
+ isnullable(m.P("x") + 1 + 2 + m.P("a")^-1)
830
+ isnullable(-m.P("ab"))
831
+ isnullable(- -m.P("ab"))
832
+ isnullable(# #(m.P("ab") + "xy"))
833
+ isnullable(- #m.P("ab")^0)
834
+ isnullable(# -m.P("ab")^1)
835
+ isnullable(#m.V(3))
836
+ isnullable(m.V(3) + m.V(1) + m.P('a')^-1)
837
+ isnullable({[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(0)})
838
+ assert(m.match(m.P{[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(1)}^0, "abc")
839
+ == 3)
840
+ assert(m.match(m.P""^-3, "a") == 1)
841
+
842
+ local function find (p, s)
843
+ return m.match(basiclookfor(p), s)
844
+ end
845
+
846
+
847
+ local function badgrammar (g, expected)
848
+ local stat, msg = pcall(m.P, g)
849
+ assert(not stat)
850
+ if expected then assert(find(expected, msg)) end
851
+ end
852
+
853
+ badgrammar({[1] = m.V(1)}, "rule '1'")
854
+ badgrammar({[1] = m.V(2)}, "rule '2'") -- invalid non-terminal
855
+ badgrammar({[1] = m.V"x"}, "rule 'x'") -- invalid non-terminal
856
+ badgrammar({[1] = m.V{}}, "rule '(a table)'") -- invalid non-terminal
857
+ badgrammar({[1] = #m.P("a") * m.V(1)}, "rule '1'") -- left-recursive
858
+ badgrammar({[1] = -m.P("a") * m.V(1)}, "rule '1'") -- left-recursive
859
+ badgrammar({[1] = -1 * m.V(1)}, "rule '1'") -- left-recursive
860
+ badgrammar({[1] = -1 + m.V(1)}, "rule '1'") -- left-recursive
861
+ badgrammar({[1] = 1 * m.V(2), [2] = m.V(2)}, "rule '2'") -- left-recursive
862
+ badgrammar({[1] = 1 * m.V(2)^0, [2] = m.P(0)}, "rule '1'") -- inf. loop
863
+ badgrammar({ m.V(2), m.V(3)^0, m.P"" }, "rule '2'") -- inf. loop
864
+ badgrammar({ m.V(2) * m.V(3)^0, m.V(3)^0, m.P"" }, "rule '1'") -- inf. loop
865
+ badgrammar({"x", x = #(m.V(1) * 'a') }, "rule '1'") -- inf. loop
866
+ badgrammar({ -(m.V(1) * 'a') }, "rule '1'") -- inf. loop
867
+ badgrammar({"x", x = m.P'a'^-1 * m.V"x"}, "rule 'x'") -- left recursive
868
+ badgrammar({"x", x = m.P'a' * m.V"y"^1, y = #m.P(1)}, "rule 'x'")
869
+
870
+ assert(m.match({'a' * -m.V(1)}, "aaa") == 2)
871
+ assert(m.match({'a' * -m.V(1)}, "aaaa") == nil)
872
+
873
+
874
+ -- good x bad grammars
875
+ m.P{ ('a' * m.V(1))^-1 }
876
+ m.P{ -('a' * m.V(1)) }
877
+ m.P{ ('abc' * m.V(1))^-1 }
878
+ m.P{ -('abc' * m.V(1)) }
879
+ badgrammar{ #m.P('abc') * m.V(1) }
880
+ badgrammar{ -('a' + m.V(1)) }
881
+ m.P{ #('a' * m.V(1)) }
882
+ badgrammar{ #('a' + m.V(1)) }
883
+ m.P{ m.B{ m.P'abc' } * 'a' * m.V(1) }
884
+ badgrammar{ m.B{ m.P'abc' } * m.V(1) }
885
+ badgrammar{ ('a' + m.P'bcd')^-1 * m.V(1) }
886
+
887
+
888
+ -- simple tests for maximum sizes:
889
+ local p = m.P"a"
890
+ for i=1,14 do p = p * p end
891
+
892
+ p = {}
893
+ for i=1,100 do p[i] = m.P"a" end
894
+ p = m.P(p)
895
+
896
+
897
+ -- strange values for rule labels
898
+
899
+ p = m.P{ "print",
900
+ print = m.V(print),
901
+ [print] = m.V(_G),
902
+ [_G] = m.P"a",
903
+ }
904
+
905
+ assert(p:match("a"))
906
+
907
+ -- initial rule
908
+ g = {}
909
+ for i = 1, 10 do g["i"..i] = "a" * m.V("i"..i+1) end
910
+ g.i11 = m.P""
911
+ for i = 1, 10 do
912
+ g[1] = "i"..i
913
+ local p = m.P(g)
914
+ assert(p:match("aaaaaaaaaaa") == 11 - i + 1)
915
+ end
916
+
917
+ print"+"
918
+
919
+
920
+ -- tests for back references
921
+ checkerr("back reference 'x' not found", m.match, m.Cb('x'), '')
922
+ checkerr("back reference 'b' not found", m.match, m.Cg(1, 'a') * m.Cb('b'), 'a')
923
+
924
+ p = m.Cg(m.C(1) * m.C(1), "k") * m.Ct(m.Cb("k"))
925
+ t = p:match("ab")
926
+ checkeq(t, {"a", "b"})
927
+
928
+
929
+ t = {}
930
+ function foo (p) t[#t + 1] = p; return p .. "x" end
931
+
932
+ p = m.Cg(m.C(2) / foo, "x") * m.Cb"x" *
933
+ m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" *
934
+ m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" *
935
+ m.Cg(m.Cb('x') / foo, "x") * m.Cb"x"
936
+ x = {p:match'ab'}
937
+ checkeq(x, {'abx', 'abxx', 'abxxx', 'abxxxx'})
938
+ checkeq(t, {'ab',
939
+ 'ab', 'abx',
940
+ 'ab', 'abx', 'abxx',
941
+ 'ab', 'abx', 'abxx', 'abxxx'})
942
+
943
+
944
+
945
+ -- tests for match-time captures
946
+
947
+ p = m.P'a' * (function (s, i) return (s:sub(i, i) == 'b') and i + 1 end)
948
+ + 'acd'
949
+
950
+ assert(p:match('abc') == 3)
951
+ assert(p:match('acd') == 4)
952
+
953
+ local function id (s, i, ...)
954
+ return true, ...
955
+ end
956
+
957
+ assert(m.Cmt(m.Cs((m.Cmt(m.S'abc' / { a = 'x', c = 'y' }, id) +
958
+ m.R'09'^1 / string.char +
959
+ m.P(1))^0), id):match"acb98+68c" == "xyb\98+\68y")
960
+
961
+ p = m.P{'S',
962
+ S = m.V'atom' * space
963
+ + m.Cmt(m.Ct("(" * space * (m.Cmt(m.V'S'^1, id) + m.P(true)) * ")" * space), id),
964
+ atom = m.Cmt(m.C(m.R("AZ", "az", "09")^1), id)
965
+ }
966
+ x = p:match"(a g () ((b) c) (d (e)))"
967
+ checkeq(x, {'a', 'g', {}, {{'b'}, 'c'}, {'d', {'e'}}});
968
+
969
+ x = {(m.Cmt(1, id)^0):match(string.rep('a', 500))}
970
+ assert(#x == 500)
971
+
972
+ local function id(s, i, x)
973
+ if x == 'a' then return i, 1, 3, 7
974
+ else return nil, 2, 4, 6, 8
975
+ end
976
+ end
977
+
978
+ p = ((m.P(id) * 1 + m.Cmt(2, id) * 1 + m.Cmt(1, id) * 1))^0
979
+ assert(table.concat{p:match('abababab')} == string.rep('137', 4))
980
+
981
+ local function ref (s, i, x)
982
+ return m.match(x, s, i - x:len())
983
+ end
984
+
985
+ assert(m.Cmt(m.P(1)^0, ref):match('alo') == 4)
986
+ assert((m.P(1) * m.Cmt(m.P(1)^0, ref)):match('alo') == 4)
987
+ assert(not (m.P(1) * m.Cmt(m.C(1)^0, ref)):match('alo'))
988
+
989
+ ref = function (s,i,x) return i == tonumber(x) and i, 'xuxu' end
990
+
991
+ assert(m.Cmt(1, ref):match'2')
992
+ assert(not m.Cmt(1, ref):match'1')
993
+ assert(m.Cmt(m.P(1)^0, ref):match'03')
994
+
995
+ function ref (s, i, a, b)
996
+ if a == b then return i, a:upper() end
997
+ end
998
+
999
+ p = m.Cmt(m.C(m.R"az"^1) * "-" * m.C(m.R"az"^1), ref)
1000
+ p = (any - p)^0 * p * any^0 * -1
1001
+
1002
+ assert(p:match'abbbc-bc ddaa' == 'BC')
1003
+
1004
+ do -- match-time captures cannot be optimized away
1005
+ local touch = 0
1006
+ f = m.P(function () touch = touch + 1; return true end)
1007
+
1008
+ local function check(n) n = n or 1; assert(touch == n); touch = 0 end
1009
+
1010
+ assert(m.match(f * false + 'b', 'a') == nil); check()
1011
+ assert(m.match(f * false + 'b', '') == nil); check()
1012
+ assert(m.match( (f * 'a')^0 * 'b', 'b') == 2); check()
1013
+ assert(m.match( (f * 'a')^0 * 'b', '') == nil); check()
1014
+ assert(m.match( (f * 'a')^-1 * 'b', 'b') == 2); check()
1015
+ assert(m.match( (f * 'a')^-1 * 'b', '') == nil); check()
1016
+ assert(m.match( ('b' + f * 'a')^-1 * 'b', '') == nil); check()
1017
+ assert(m.match( (m.P'b'^-1 * f * 'a')^-1 * 'b', '') == nil); check()
1018
+ assert(m.match( (-m.P(1) * m.P'b'^-1 * f * 'a')^-1 * 'b', '') == nil);
1019
+ check()
1020
+ assert(m.match( (f * 'a' + 'b')^-1 * 'b', '') == nil); check()
1021
+ assert(m.match(f * 'a' + f * 'b', 'b') == 2); check(2)
1022
+ assert(m.match(f * 'a' + f * 'b', 'a') == 2); check(1)
1023
+ assert(m.match(-f * 'a' + 'b', 'b') == 2); check(1)
1024
+ assert(m.match(-f * 'a' + 'b', '') == nil); check(1)
1025
+ end
1026
+
1027
+ c = '[' * m.Cg(m.P'='^0, "init") * '[' *
1028
+ { m.Cmt(']' * m.C(m.P'='^0) * ']' * m.Cb("init"), function (_, _, s1, s2)
1029
+ return s1 == s2 end)
1030
+ + 1 * m.V(1) } / 0
1031
+
1032
+ assert(c:match'[==[]]====]]]]==]===[]' == 18)
1033
+ assert(c:match'[[]=]====]=]]]==]===[]' == 14)
1034
+ assert(not c:match'[[]=]====]=]=]==]===[]')
1035
+
1036
+
1037
+ -- old bug: optimization of concat with fail removed match-time capture
1038
+ p = m.Cmt(0, function (s) p = s end) * m.P(false)
1039
+ assert(not p:match('alo'))
1040
+ assert(p == 'alo')
1041
+
1042
+
1043
+ -- ensure that failed match-time captures are not kept on Lua stack
1044
+ do
1045
+ local t = {__mode = "kv"}; setmetatable(t,t)
1046
+ local c = 0
1047
+
1048
+ local function foo (s,i)
1049
+ collectgarbage();
1050
+ assert(next(t) == "__mode" and next(t, "__mode") == nil)
1051
+ local x = {}
1052
+ t[x] = true
1053
+ c = c + 1
1054
+ return i, x
1055
+ end
1056
+
1057
+ local p = m.P{ m.Cmt(0, foo) * m.P(false) + m.P(1) * m.V(1) + m.P"" }
1058
+ p:match(string.rep('1', 10))
1059
+ assert(c == 11)
1060
+ end
1061
+
1062
+ p = (m.P(function () return true, "a" end) * 'a'
1063
+ + m.P(function (s, i) return i, "aa", 20 end) * 'b'
1064
+ + m.P(function (s,i) if i <= #s then return i, "aaa" end end) * 1)^0
1065
+
1066
+ t = {p:match('abacc')}
1067
+ checkeq(t, {'a', 'aa', 20, 'a', 'aaa', 'aaa'})
1068
+
1069
+
1070
+ -------------------------------------------------------------------
1071
+ -- Tests for 're' module
1072
+ -------------------------------------------------------------------
1073
+
1074
+ local re = require "lib/re"
1075
+
1076
+ local match, compile = re.match, re.compile
1077
+
1078
+
1079
+
1080
+ assert(match("a", ".") == 2)
1081
+ assert(match("a", "''") == 1)
1082
+ assert(match("", " ! . ") == 1)
1083
+ assert(not match("a", " ! . "))
1084
+ assert(match("abcde", " ( . . ) * ") == 5)
1085
+ assert(match("abbcde", " [a-c] +") == 5)
1086
+ assert(match("0abbc1de", "'0' [a-c]+ '1'") == 7)
1087
+ assert(match("0zz1dda", "'0' [^a-c]+ 'a'") == 8)
1088
+ assert(match("abbc--", " [a-c] + +") == 5)
1089
+ assert(match("abbc--", " [ac-] +") == 2)
1090
+ assert(match("abbc--", " [-acb] + ") == 7)
1091
+ assert(not match("abbcde", " [b-z] + "))
1092
+ assert(match("abb\"de", '"abb"["]"de"') == 7)
1093
+ assert(match("abceeef", "'ac' ? 'ab' * 'c' { 'e' * } / 'abceeef' ") == "eee")
1094
+ assert(match("abceeef", "'ac'? 'ab'* 'c' { 'f'+ } / 'abceeef' ") == 8)
1095
+ local t = {match("abceefe", "( ( & 'e' {} ) ? . ) * ")}
1096
+ checkeq(t, {4, 5, 7})
1097
+ local t = {match("abceefe", "((&&'e' {})? .)*")}
1098
+ checkeq(t, {4, 5, 7})
1099
+ local t = {match("abceefe", "( ( ! ! 'e' {} ) ? . ) *")}
1100
+ checkeq(t, {4, 5, 7})
1101
+ local t = {match("abceefe", "(( & ! & ! 'e' {})? .)*")}
1102
+ checkeq(t, {4, 5, 7})
1103
+
1104
+ assert(match("cccx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 5)
1105
+ assert(match("cdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 4)
1106
+ assert(match("abcdcdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 8)
1107
+
1108
+ assert(match("abc", "a <- (. a)?") == 4)
1109
+ b = "balanced <- '(' ([^()] / balanced)* ')'"
1110
+ assert(match("(abc)", b))
1111
+ assert(match("(a(b)((c) (d)))", b))
1112
+ assert(not match("(a(b ((c) (d)))", b))
1113
+
1114
+ b = compile[[ balanced <- "(" ([^()] / balanced)* ")" ]]
1115
+ assert(b == m.P(b))
1116
+ assert(b:match"((((a))(b)))")
1117
+
1118
+ local g = [[
1119
+ S <- "0" B / "1" A / "" -- balanced strings
1120
+ A <- "0" S / "1" A A -- one more 0
1121
+ B <- "1" S / "0" B B -- one more 1
1122
+ ]]
1123
+ assert(match("00011011", g) == 9)
1124
+
1125
+ local g = [[
1126
+ S <- ("0" B / "1" A)*
1127
+ A <- "0" / "1" A A
1128
+ B <- "1" / "0" B B
1129
+ ]]
1130
+ assert(match("00011011", g) == 9)
1131
+ assert(match("000110110", g) == 9)
1132
+ assert(match("011110110", g) == 3)
1133
+ assert(match("000110010", g) == 1)
1134
+
1135
+ s = "aaaaaaaaaaaaaaaaaaaaaaaa"
1136
+ assert(match(s, "'a'^3") == 4)
1137
+ assert(match(s, "'a'^0") == 1)
1138
+ assert(match(s, "'a'^+3") == s:len() + 1)
1139
+ assert(not match(s, "'a'^+30"))
1140
+ assert(match(s, "'a'^-30") == s:len() + 1)
1141
+ assert(match(s, "'a'^-5") == 6)
1142
+ for i = 1, s:len() do
1143
+ assert(match(s, string.format("'a'^+%d", i)) >= i + 1)
1144
+ assert(match(s, string.format("'a'^-%d", i)) <= i + 1)
1145
+ assert(match(s, string.format("'a'^%d", i)) == i + 1)
1146
+ end
1147
+ assert(match("01234567890123456789", "[0-9]^3+") == 19)
1148
+
1149
+
1150
+ assert(match("01234567890123456789", "({....}{...}) -> '%2%1'") == "4560123")
1151
+ t = match("0123456789", "{| {.}* |}")
1152
+ checkeq(t, {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"})
1153
+ assert(match("012345", "{| (..) -> '%0%0' |}")[1] == "0101")
1154
+
1155
+ assert(match("abcdef", "( {.} {.} {.} {.} {.} ) -> 3") == "c")
1156
+ assert(match("abcdef", "( {:x: . :} {.} {.} {.} {.} ) -> 3") == "d")
1157
+ assert(match("abcdef", "( {:x: . :} {.} {.} {.} {.} ) -> 0") == 6)
1158
+
1159
+ assert(not match("abcdef", "{:x: ({.} {.} {.}) -> 2 :} =x"))
1160
+ assert(match("abcbef", "{:x: ({.} {.} {.}) -> 2 :} =x"))
1161
+
1162
+ eqcharset(compile"[]]", "]")
1163
+ eqcharset(compile"[][]", m.S"[]")
1164
+ eqcharset(compile"[]-]", m.S"-]")
1165
+ eqcharset(compile"[-]", m.S"-")
1166
+ eqcharset(compile"[az-]", m.S"a-z")
1167
+ eqcharset(compile"[-az]", m.S"a-z")
1168
+ eqcharset(compile"[a-z]", m.R"az")
1169
+ eqcharset(compile"[]['\"]", m.S[[]['"]])
1170
+
1171
+ eqcharset(compile"[^]]", any - "]")
1172
+ eqcharset(compile"[^][]", any - m.S"[]")
1173
+ eqcharset(compile"[^]-]", any - m.S"-]")
1174
+ eqcharset(compile"[^]-]", any - m.S"-]")
1175
+ eqcharset(compile"[^-]", any - m.S"-")
1176
+ eqcharset(compile"[^az-]", any - m.S"a-z")
1177
+ eqcharset(compile"[^-az]", any - m.S"a-z")
1178
+ eqcharset(compile"[^a-z]", any - m.R"az")
1179
+ eqcharset(compile"[^]['\"]", any - m.S[[]['"]])
1180
+
1181
+ -- tests for comments in 're'
1182
+ e = compile[[
1183
+ A <- _B -- \t \n %nl .<> <- -> --
1184
+ _B <- 'x' --]]
1185
+ assert(e:match'xy' == 2)
1186
+
1187
+ -- tests for 're' with pre-definitions
1188
+ defs = {digits = m.R"09", letters = m.R"az", _=m.P"__"}
1189
+ e = compile("%letters (%letters / %digits)*", defs)
1190
+ assert(e:match"x123" == 5)
1191
+ e = compile("%_", defs)
1192
+ assert(e:match"__" == 3)
1193
+
1194
+ e = compile([[
1195
+ S <- A+
1196
+ A <- %letters+ B
1197
+ B <- %digits+
1198
+ ]], defs)
1199
+
1200
+ e = compile("{[0-9]+'.'?[0-9]*} -> sin", math)
1201
+ assert(e:match("2.34") == math.sin(2.34))
1202
+
1203
+
1204
+ function eq (_, _, a, b) return a == b end
1205
+
1206
+ c = re.compile([[
1207
+ longstring <- '[' {:init: '='* :} '[' close
1208
+ close <- ']' =init ']' / . close
1209
+ ]])
1210
+
1211
+ assert(c:match'[==[]]===]]]]==]===[]' == 17)
1212
+ assert(c:match'[[]=]====]=]]]==]===[]' == 14)
1213
+ assert(not c:match'[[]=]====]=]=]==]===[]')
1214
+
1215
+ c = re.compile" '[' {:init: '='* :} '[' (!(']' =init ']') .)* ']' =init ']' !. "
1216
+
1217
+ assert(c:match'[==[]]===]]]]==]')
1218
+ assert(c:match'[[]=]====]=][]==]===[]]')
1219
+ assert(not c:match'[[]=]====]=]=]==]===[]')
1220
+
1221
+ assert(re.find("hi alalo", "{:x:..:} =x") == 4)
1222
+ assert(re.find("hi alalo", "{:x:..:} =x", 4) == 4)
1223
+ assert(not re.find("hi alalo", "{:x:..:} =x", 5))
1224
+ assert(re.find("hi alalo", "{'al'}", 5) == 6)
1225
+ assert(re.find("hi aloalolo", "{:x:..:} =x") == 8)
1226
+ assert(re.find("alo alohi x x", "{:word:%w+:}%W*(=word)!%w") == 11)
1227
+
1228
+ -- re.find discards any captures
1229
+ local a,b,c = re.find("alo", "{.}{'o'}")
1230
+ assert(a == 2 and b == 3 and c == nil)
1231
+
1232
+ local function match (s,p)
1233
+ local i,e = re.find(s,p)
1234
+ if i then return s:sub(i, e) end
1235
+ end
1236
+ assert(match("alo alo", '[a-z]+') == "alo")
1237
+ assert(match("alo alo", '{:x: [a-z]+ :} =x') == nil)
1238
+ assert(match("alo alo", "{:x: [a-z]+ :} ' ' =x") == "alo alo")
1239
+
1240
+ assert(re.gsub("alo alo", "[abc]", "x") == "xlo xlo")
1241
+ assert(re.gsub("alo alo", "%w+", ".") == ". .")
1242
+ assert(re.gsub("hi, how are you", "[aeiou]", string.upper) ==
1243
+ "hI, hOw ArE yOU")
1244
+
1245
+ s = 'hi [[a comment[=]=] ending here]] and [=[another]]=]]'
1246
+ c = re.compile" '[' {:i: '='* :} '[' (!(']' =i ']') .)* ']' { =i } ']' "
1247
+ assert(re.gsub(s, c, "%2") == 'hi and =]')
1248
+ assert(re.gsub(s, c, "%0") == s)
1249
+ assert(re.gsub('[=[hi]=]', c, "%2") == '=')
1250
+
1251
+ assert(re.find("", "!.") == 1)
1252
+ assert(re.find("alo", "!.") == 4)
1253
+
1254
+ function addtag (s, i, t, tag) t.tag = tag; return i, t end
1255
+
1256
+ c = re.compile([[
1257
+ doc <- block !.
1258
+ block <- (start {| (block / { [^<]+ })* |} end?) => addtag
1259
+ start <- '<' {:tag: [a-z]+ :} '>'
1260
+ end <- '</' { =tag } '>'
1261
+ ]], {addtag = addtag})
1262
+
1263
+ x = c:match[[
1264
+ <x>hi<b>hello</b>but<b>totheend</x>]]
1265
+ checkeq(x, {tag='x', 'hi', {tag = 'b', 'hello'}, 'but',
1266
+ {'totheend'}})
1267
+
1268
+
1269
+ -- tests for look-ahead captures
1270
+ x = {re.match("alo", "&(&{.}) !{'b'} {&(...)} &{..} {...} {!.}")}
1271
+ checkeq(x, {"", "alo", ""})
1272
+
1273
+ assert(re.match("aloalo",
1274
+ "{~ (((&'al' {.}) -> 'A%1' / (&%l {.}) -> '%1%1') / .)* ~}")
1275
+ == "AallooAalloo")
1276
+
1277
+ -- bug in 0.9 (and older versions), due to captures in look-aheads
1278
+ x = re.compile[[ {~ (&(. ([a-z]* -> '*')) ([a-z]+ -> '+') ' '*)* ~} ]]
1279
+ assert(x:match"alo alo" == "+ +")
1280
+
1281
+ -- valid capture in look-ahead (used inside the look-ahead itself)
1282
+ x = re.compile[[
1283
+ S <- &({:two: .. :} . =two) {[a-z]+} / . S
1284
+ ]]
1285
+ assert(x:match("hello aloaLo aloalo xuxu") == "aloalo")
1286
+
1287
+
1288
+ p = re.compile[[
1289
+ block <- {| {:ident:space*:} line
1290
+ ((=ident !space line) / &(=ident space) block)* |}
1291
+ line <- {[^%nl]*} %nl
1292
+ space <- '_' -- should be ' ', but '_' is simpler for editors
1293
+ ]]
1294
+
1295
+ t= p:match[[
1296
+ 1
1297
+ __1.1
1298
+ __1.2
1299
+ ____1.2.1
1300
+ ____
1301
+ 2
1302
+ __2.1
1303
+ ]]
1304
+ checkeq(t, {"1", {"1.1", "1.2", {"1.2.1", "", ident = "____"}, ident = "__"},
1305
+ "2", {"2.1", ident = "__"}, ident = ""})
1306
+
1307
+
1308
+ -- nested grammars
1309
+ p = re.compile[[
1310
+ s <- a b !.
1311
+ b <- ( x <- ('b' x)? )
1312
+ a <- ( x <- 'a' x? )
1313
+ ]]
1314
+
1315
+ assert(p:match'aaabbb')
1316
+ assert(p:match'aaa')
1317
+ assert(not p:match'bbb')
1318
+ assert(not p:match'aaabbba')
1319
+
1320
+ -- testing groups
1321
+ t = {re.match("abc", "{:S <- {:.:} {S} / '':}")}
1322
+ checkeq(t, {"a", "bc", "b", "c", "c", ""})
1323
+
1324
+ t = re.match("1234", "{| {:a:.:} {:b:.:} {:c:.{.}:} |}")
1325
+ checkeq(t, {a="1", b="2", c="4"})
1326
+ t = re.match("1234", "{|{:a:.:} {:b:{.}{.}:} {:c:{.}:}|}")
1327
+ checkeq(t, {a="1", b="2", c="4"})
1328
+ t = re.match("12345", "{| {:.:} {:b:{.}{.}:} {:{.}{.}:} |}")
1329
+ checkeq(t, {"1", b="2", "4", "5"})
1330
+ t = re.match("12345", "{| {:.:} {:{:b:{.}{.}:}:} {:{.}{.}:} |}")
1331
+ checkeq(t, {"1", "23", "4", "5"})
1332
+ t = re.match("12345", "{| {:.:} {{:b:{.}{.}:}} {:{.}{.}:} |}")
1333
+ checkeq(t, {"1", "23", "4", "5"})
1334
+
1335
+
1336
+ -- testing pre-defined names
1337
+ assert(os.setlocale("C") == "C")
1338
+
1339
+ function eqlpeggsub (p1, p2)
1340
+ local s1 = cs2str(re.compile(p1))
1341
+ local s2 = string.gsub(allchar, "[^" .. p2 .. "]", "")
1342
+ -- if s1 ~= s2 then print(#s1,#s2) end
1343
+ assert(s1 == s2)
1344
+ end
1345
+
1346
+
1347
+ eqlpeggsub("%w", "%w")
1348
+ eqlpeggsub("%a", "%a")
1349
+ eqlpeggsub("%l", "%l")
1350
+ eqlpeggsub("%u", "%u")
1351
+ eqlpeggsub("%p", "%p")
1352
+ eqlpeggsub("%d", "%d")
1353
+ eqlpeggsub("%x", "%x")
1354
+ eqlpeggsub("%s", "%s")
1355
+ eqlpeggsub("%c", "%c")
1356
+
1357
+ eqlpeggsub("%W", "%W")
1358
+ eqlpeggsub("%A", "%A")
1359
+ eqlpeggsub("%L", "%L")
1360
+ eqlpeggsub("%U", "%U")
1361
+ eqlpeggsub("%P", "%P")
1362
+ eqlpeggsub("%D", "%D")
1363
+ eqlpeggsub("%X", "%X")
1364
+ eqlpeggsub("%S", "%S")
1365
+ eqlpeggsub("%C", "%C")
1366
+
1367
+ eqlpeggsub("[%w]", "%w")
1368
+ eqlpeggsub("[_%w]", "_%w")
1369
+ eqlpeggsub("[^%w]", "%W")
1370
+ eqlpeggsub("[%W%S]", "%W%S")
1371
+
1372
+ re.updatelocale()
1373
+
1374
+
1375
+ -- testing nested substitutions x string captures
1376
+
1377
+ p = re.compile[[
1378
+ text <- {~ item* ~}
1379
+ item <- macro / [^()] / '(' item* ')'
1380
+ arg <- ' '* {~ (!',' item)* ~}
1381
+ args <- '(' arg (',' arg)* ')'
1382
+ macro <- ('apply' args) -> '%1(%2)'
1383
+ / ('add' args) -> '%1 + %2'
1384
+ / ('mul' args) -> '%1 * %2'
1385
+ ]]
1386
+
1387
+ assert(p:match"add(mul(a,b), apply(f,x))" == "a * b + f(x)")
1388
+
1389
+ rev = re.compile[[ R <- (!.) -> '' / ({.} R) -> '%2%1']]
1390
+
1391
+ assert(rev:match"0123456789" == "9876543210")
1392
+
1393
+
1394
+ -- testing error messages in re
1395
+
1396
+ local function errmsg (p, err)
1397
+ checkerr(err, re.compile, p)
1398
+ end
1399
+
1400
+ errmsg('aaaa', "rule 'aaaa'")
1401
+ errmsg('a', 'outside')
1402
+ errmsg('b <- a', 'undefined')
1403
+ errmsg("x <- 'a' x <- 'b'", 'already defined')
1404
+ errmsg("'a' -", "near '-'")
1405
+
1406
+
1407
+ print"OK"
1408
+
1409
+