regexador 0.4.5 → 0.4.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,201 @@
1
+ ---
2
+ - !ruby/object:Program
3
+ description: Simple use of two vars
4
+ program: |
5
+ var1 = "abc"
6
+ var2 = "def"
7
+ match var1 var2 end
8
+ regex: !ruby/regexp /abcdef/
9
+ good:
10
+ - abcdefghi
11
+ - xyzabcdef
12
+ bad:
13
+ - ''
14
+ - abcxyzdef
15
+ - !ruby/object:Program
16
+ description: Multiline match with two vars
17
+ program: " var1 = \"abc\"\n var2 = \"def\"\n \n # Test a blank line
18
+ and comment as well.\n \n match # multiline match with comment\n var1\n
19
+ \ var2\n end\n"
20
+ regex: !ruby/regexp /abcdef/
21
+ good:
22
+ - abcdefghi
23
+ - xyzabcdef
24
+ bad:
25
+ - ''
26
+ - abcxyzdef
27
+ - !ruby/object:Program
28
+ description: IPv4 address
29
+ program: |
30
+ dot = "."
31
+ num = "25" D5 | `2 D4 D | maybe D1 1,2*D
32
+ match BOS num dot num dot num dot num EOS end
33
+ regex: !ruby/regexp /^(25[0-5]|2[0-4]\d|([01])?(\d){1,2})\.(25[0-5]|2[0-4]\d|([01])?(\d){1,2})\.(25[0-5]|2[0-4]\d|([01])?(\d){1,2})\.(25[0-5]|2[0-4]\d|([01])?(\d){1,2})$/
34
+ good:
35
+ - "127.0.0.1"
36
+ - "255.254.93.22"
37
+ - "255.254.93.22"
38
+ bad:
39
+ - ''
40
+ - "7.8.9"
41
+ - "3.4.5.6.7"
42
+ - "1.2.3.256"
43
+ - !ruby/object:Program
44
+ description: Identifying credit cards
45
+ program: |
46
+ # Warning: This one likely has errors!
47
+
48
+ visa = `4 12*D maybe 3*D
49
+ mc = `5 D5 14*D
50
+ discover = `6 ("011" | `5 2*D) 12*D
51
+ amex = `3 '47' 13*D
52
+ diners = `3 (`0 D5 | '68' D) 11*D
53
+ jcb = ("2131"|"1800"|"35" 3*D) 11*D
54
+
55
+ match visa | mc | discover | amex | diners | jcb end
56
+ regex: !ruby/regexp /(4(\d){12}((\d){3})?|5[0-5](\d){14}|6(011|5(\d){2})(\d){12}|3[47](\d){13}|3(0[0-5]|[68]\d)(\d){11}|(2131|1800|35(\d){3})(\d){11})/
57
+
58
+
59
+ good: []
60
+ bad: []
61
+ - !ruby/object:Program
62
+ description: Matching US phone num (with captures)
63
+ program: |
64
+ match
65
+ @area_code = 3 * D
66
+ `-
67
+ @prefix = 3*D
68
+ `-
69
+ @last4 = 4*D
70
+ end
71
+ # regex: !ruby/regexp /(?<area_code>\d{3}){0}(?<prefix>\d{3}){0}(?<last4>\d{4}){0}\g<area_code>-\g<prefix>-\g<last4>/
72
+ regex: !ruby/regexp /(?<area_code>(\d){3})\-(?<prefix>(\d){3})\-(?<last4>(\d){4})/
73
+ good:
74
+ - '601-555-2345'
75
+ - 'call me at 888-425-9000'
76
+ bad:
77
+ - '888-HAL-9000'
78
+ - '800.237.1234'
79
+ - !ruby/object:Program
80
+ description: KNOWNFAIL Matching a clock time, 12/24 hrs
81
+ program: |
82
+ hr12 = (maybe `0) `1-`9 | `1 D2
83
+ hr24 = (maybe `0) D | `1 D | `2 D3
84
+ sep = `: | `.
85
+ min = D5 D9
86
+ sec = D5 D9
87
+ ampm = (maybe SPACE) ("am" | "pm")
88
+ time12 = hr12 sep min maybe (sep sec) maybe ampm
89
+ time24 = hr24 sep min maybe (sep sec)
90
+ match BOS (time12 | time24) EOS end
91
+ regex: !ruby/regexp /^(((0)?[1-9]|1[0-2])(:|\.)[0-5]\d((:|\.)[0-5]\d)?(( )?(am|pm))?|((0)?\d|1\d|2[0-3])(:|\.)[0-5]\d((:|\.)[0-5]\d)?)$/
92
+ good:
93
+ - '12:34'
94
+ - '1:23'
95
+ - '5:14pm'
96
+ - '19:43'
97
+ - '1:23:45'
98
+ - '1:23:45 pm'
99
+ - '7:43 pm'
100
+ - '8:32:45'
101
+ - '8.34'
102
+ - '8.34 pm'
103
+ - '8.34.45'
104
+ bad:
105
+ - ''
106
+ - abc
107
+ - '24:30'
108
+ - '25:30'
109
+ - '19:43 pm'
110
+ - '5:14 pm'
111
+ - !ruby/object:Program
112
+ description: Using nocase
113
+ program: 'match BOS "abc" nocase "def" "ghi" EOS end'
114
+ regex: !ruby/regexp /^abc((?i)def)ghi$/
115
+ good:
116
+ - "abcdefghi"
117
+ - "abcDEFghi"
118
+ - "abcdEfghi"
119
+ bad:
120
+ - ""
121
+ - "x"
122
+ - "xabcdefghi"
123
+ - "abcdefghix"
124
+ - "aBcdefghi"
125
+ - "abcdefGhi"
126
+ - "abCdefghI"
127
+ - "abCdEfghI"
128
+ # - !ruby/object:Program
129
+ # description: Simple use of two vars
130
+ # program: |
131
+ # regex: !ruby/regexp //
132
+ # good:
133
+ # bad:
134
+ - !ruby/object:Program
135
+ description: Var used in simple repetition
136
+ program: |
137
+ n = 3
138
+ match BOS n * `x EOS end
139
+ regex: !ruby/regexp /^(x){3}$/
140
+ good:
141
+ - "xxx"
142
+ bad:
143
+ - ""
144
+ - "x"
145
+ - "xx x"
146
+ - "xxxx"
147
+ - !ruby/object:Program
148
+ description: Var used in complex repetition
149
+ program: |
150
+ m = 4
151
+ n = 6
152
+ match BOS m,n * `x EOS end
153
+ regex: !ruby/regexp /^(x){4,6}$/
154
+ good:
155
+ - "xxxx"
156
+ - "xxxxx"
157
+ - "xxxxxx"
158
+ bad:
159
+ - ""
160
+ - "x"
161
+ - "xx x"
162
+ - "xxx"
163
+ - "xxxxxxx"
164
+ - !ruby/object:Program
165
+ description: Using Unicode codepoint again
166
+ program: |
167
+ euro = &20ac
168
+ price = (euro | "$") SPACE many D maybe ("." 2*D)
169
+ match BOS price EOS end
170
+ regex: !ruby/regexp /^(€|\$) (\d)+(\.(\d){2})?$/
171
+ good:
172
+ - "€ 237"
173
+ - "$ 237"
174
+ - "€ 23.45"
175
+ - "€ 0.25"
176
+ bad:
177
+ - ""
178
+ - "x"
179
+ - "€"
180
+ - "€ "
181
+ - "€ 237"
182
+ - "$ 237"
183
+ - "€ 23.456"
184
+ - !ruby/object:Program
185
+ description: Using within (1)
186
+ program: |
187
+ match within `/ end
188
+ regex: !ruby/regexp /(\/.*?\/)/
189
+ good:
190
+ - "There is a /slash-delimited string/ here."
191
+ bad:
192
+ - "No such string here."
193
+ - !ruby/object:Program
194
+ description: Using escaping (1)
195
+ program: |
196
+ match escaping `/ end
197
+ regex: !ruby/regexp /\/|[^\/]*?\//
198
+ good:
199
+ - "This is /slash-delimited but \\/with embedded slashes \\/ also /."
200
+ bad:
201
+ - "No such string here."
@@ -0,0 +1,348 @@
1
+ # Encoding: UTF-8
2
+ require_relative '../lib/regexador'
3
+ require 'pp'
4
+
5
+ require 'parslet/convenience'
6
+ require 'parslet/rig/rspec'
7
+
8
+ class Object
9
+ def succeeds
10
+ self.should_not == nil
11
+ end
12
+ end
13
+
14
+ class Program
15
+ attr_accessor :description, :program, :regex, :good, :bad
16
+ end
17
+
18
+ class Capture
19
+ attr_accessor :description, :program, :regex, :examples
20
+ # examples is a hash of the form:
21
+ # { str1 => {var1 => exp1, var2 => exp2, ...},
22
+ # str2 => {var1 => exp1, var2 => exp2, ...},
23
+ # ...}
24
+ end
25
+
26
+
27
+ #### Actual tests...
28
+
29
+
30
+ describe Regexador do
31
+
32
+ @oneliners = YAML.load(File.read("spec/oneliners.yaml"))
33
+ @programs = YAML.load(File.read("spec/programs.yaml"))
34
+ @captures = YAML.load(File.read("spec/captures.yaml"))
35
+
36
+ before(:all) do
37
+ @parser = Regexador::Parser.new
38
+ @pattern = @parser.pattern
39
+ end
40
+
41
+ describe "A special character" do
42
+ it "can be matched correctly" do
43
+ @parser.cSQUOTE.parse_with_debug("'").succeeds
44
+ @parser.cHASH.parse('#').succeeds
45
+ @parser.cNEWLINE.parse("\n").succeeds
46
+ @parser.cEQUAL.parse('=').succeeds
47
+ end
48
+ end
49
+
50
+ describe "An international character" do
51
+ it "can follow a backtick" do #
52
+ @parser.char.parse_with_debug("`æ").succeeds
53
+ @parser.char.parse("`ß").succeeds
54
+ @parser.char.parse("`ç").succeeds
55
+ @parser.char.parse("`ö").succeeds
56
+ @parser.char.parse("`ñ").succeeds
57
+ end
58
+ end
59
+
60
+ describe "A Unicode codepoint expression" do
61
+ it "can be matched" do
62
+ @parser.codepoint.parse_with_debug("&1234").succeeds
63
+ @parser.codepoint.parse('&beef').succeeds
64
+ end
65
+ end
66
+
67
+ describe "A predefined token" do
68
+ %w(BOS EOS START END).each do |token|
69
+ describe token do
70
+ it 'matches using pattern' do
71
+ @parser.pattern.parse_with_debug(token).succeeds
72
+ end
73
+ end
74
+ end
75
+ end
76
+
77
+ describe "An assignment" do
78
+ it "can be parsed" do
79
+ @parser.assignment.parse("a = 5").succeeds
80
+ @parser.assignment.parse("a= 5").succeeds
81
+ @parser.assignment.parse("a =5").succeeds
82
+ @parser.assignment.parse("a=5").succeeds
83
+ @parser.assignment.parse("myvar = 'xyz'").succeeds
84
+ @parser.assignment.parse('var2 = "hello"').succeeds
85
+ @parser.assignment.parse('this_var = `x-`z').succeeds
86
+ @parser.assignment.parse_with_debug('pat = maybe many `x-`z').succeeds
87
+ end
88
+ end
89
+
90
+ describe "A keyword used as a variable name" do
91
+ it "will not parse" do
92
+ @parser.assignment.should_not parse("end = 'hello'")
93
+ # @parser.assignment.parse("endx = 'hello'")
94
+ end
95
+ end
96
+
97
+ describe "A definition section" do
98
+ it "can be parsed" do
99
+ defs1 = "a = 5\nstr = \"hello\"\n"
100
+ @parser.definitions.parse_with_debug(defs1).succeeds
101
+ defs2 = <<-EOF
102
+ a = 5
103
+ # comment...
104
+ pat = maybe many `a-`c
105
+ # empty line follows:
106
+
107
+ str = "hello"
108
+ # another comment...
109
+ EOF
110
+ @parser.definitions.parse_with_debug(defs2).succeeds
111
+ end
112
+ end
113
+
114
+ describe "A capture variable" do
115
+ it "can be parsed" do
116
+ str1 = "@myvar"
117
+ @parser.capture_var.parse(str1).succeeds
118
+ end
119
+ end
120
+
121
+ describe "A captured pattern" do
122
+ let(:prog) { "@myvar = maybe 'abc'" }
123
+
124
+ it "can be parsed (#capture)" do
125
+ @parser.capture.parse(prog).succeeds
126
+ end
127
+ it "can be parsed (#program)" do
128
+ @parser.parse("match #{prog} end").succeeds
129
+ end
130
+ end
131
+
132
+ describe "A back reference" do
133
+ let(:prog) { '@myvar' }
134
+
135
+ it 'can be parsed (#capture)' do
136
+ @parser.capture.parse(prog).succeeds
137
+ end
138
+ it 'can be parsed' do
139
+ @parser.parse("match #{prog} end").succeeds
140
+ end
141
+ end
142
+
143
+
144
+ describe "A one-line match clause" do
145
+ it "can be parsed" do
146
+ mc1 = <<-EOF
147
+ match `a~`x end
148
+ EOF
149
+ @parser.match_clause.parse_with_debug(mc1).succeeds
150
+ end
151
+ end
152
+
153
+ describe "A multiline match clause" do
154
+ it "can be parsed" do
155
+ mc2 = <<-EOF
156
+ match
157
+ `< "tag" WB
158
+ any ~`>
159
+ # blah blah blah
160
+ "</" "tag" `>
161
+ end
162
+ EOF
163
+ @parser.multiline_clause.parse_with_debug(mc2).succeeds
164
+ end
165
+ end
166
+
167
+ describe "An entire one-line program" do
168
+ it "can be parsed" do
169
+ prog = "match `a-`f end"
170
+ @parser.parse_with_debug(prog).succeeds
171
+ end
172
+ end
173
+
174
+
175
+ describe "An entire program" do
176
+ it "can be parsed" do
177
+ prog1 = <<-EOF
178
+ dot = "."
179
+ num = "25" D5 | `2 D4 D | maybe D1 1,2*D
180
+ match WB num dot num dot num dot num WB end
181
+ EOF
182
+ @parser.program.parse_with_debug(prog1).succeeds
183
+
184
+ prog2 = <<-EOF
185
+ # Warning: This one likely has errors!
186
+
187
+ visa = `4 12*D maybe 3*D
188
+ mc = `5 D5 14*D
189
+ amex = `3 '47' 13*D
190
+ diners = `3 (`0 D5 | '68' D) 11*D
191
+ discover = `6 ("011" | `5 2*D) 12*D
192
+ jcb = ("2131"|"1800"|"35" 3*D) 11*D
193
+
194
+ match visa | mc | amex | diners | discover | jcb end
195
+ EOF
196
+ @parser.program.parse_with_debug(prog2).succeeds
197
+ end
198
+ end
199
+
200
+ class Program
201
+
202
+ def initialize code
203
+ @code = code
204
+ @full_program = "match #{@code} end"
205
+ @parser = Regexador::Parser.new
206
+ end
207
+
208
+ def parseable?
209
+ @parser.parse_with_debug(@full_program) != nil
210
+ end
211
+
212
+ def parse
213
+ tree = @parser.pattern.parse(@code)
214
+ tree = tree[:alternation] \
215
+ if tree.size == 1 && tree.keys.first == :alternation
216
+ tree = tree[:sequence].first \
217
+ if tree.size == 1 && tree.keys.first == :sequence
218
+ tree
219
+ end
220
+
221
+ def regexp
222
+ Regexador.new(@full_program).to_regex
223
+ end
224
+
225
+ end
226
+
227
+ def self.program &block
228
+ let(:code, &block)
229
+ let(:program) { Program.new(code) }
230
+ let(:regexp) { program.regexp }
231
+
232
+ subject { program }
233
+ end
234
+
235
+ describe "Negative lookbehind" do
236
+ program { 'without "USD" find 3*D' }
237
+
238
+ it { should be_parseable }
239
+ it { regexp.should == /(?<!USD)(\d){3}/ }
240
+ end
241
+
242
+ describe "Negative lookahead" do
243
+ program { 'find 3*D without " pesos"' }
244
+
245
+ it "should parse as findpat/negpat" do
246
+ program.parse.should == {
247
+ findpat: {:num1=>"3", :match_item=>{:predef=>"D"}},
248
+ negpat: {:string=>" pesos"}
249
+ }
250
+ end
251
+ end
252
+
253
+ #### "Real" tests (data-driven)
254
+
255
+ @oneliners.each do |x|
256
+ desc, pat, wanted, good, bad =
257
+ x.description, x.program, x.regex, x.good, x.bad
258
+ describe "A one-pattern program (#{desc})" do
259
+ begin
260
+ prog = "match #{pat} end"
261
+ it("can be parsed") { @parser.parse_with_debug(prog).succeeds }
262
+ pattern = Regexador.new(prog)
263
+ rx = pattern.to_regex
264
+ it("can be converted to a regex") { rx.class.should == Regexp }
265
+ good.each {|str| it("should match #{str.inspect}") { rx.should =~ str } }
266
+ bad.each {|str| it("should not match #{str.inspect}") { rx.should_not =~ str } }
267
+ good.each {|str| it("should natively match #{str.inspect}") { (!!(pattern =~ str)).should == true } }
268
+ bad.each {|str| it("should not natively match #{str.inspect}") { (!!(pattern =~ str)).should == false } }
269
+ it("yields the expected regex") { (rx.to_s.should == wanted.to_s) if wanted }
270
+ # Sanity check... does the expected regex really match properly?
271
+ good.each {|str| it("has an expected regex matching #{str.inspect}") { wanted.should =~ str } }
272
+ bad.each {|str| it("has an expected regex not matching #{str.inspect}") { wanted.should_not =~ str } }
273
+ rescue => err
274
+ puts "--- ERROR: #{err}"
275
+ puts "--- Description = '#{desc}'"
276
+ puts err.backtrace.find(/regexador_/).first
277
+ end
278
+ end
279
+ end
280
+
281
+ # $debug = true
282
+
283
+ @programs.each do |x|
284
+ desc, prog, wanted, good, bad =
285
+ x.description, x.program, x.regex, x.good, x.bad
286
+ describe "A complete program (#{desc})" do
287
+ begin
288
+ it("can be parsed") { @parser.parse_with_debug(prog).succeeds }
289
+ pattern = Regexador.new(prog)
290
+ rx = pattern.to_regex
291
+ it("can be converted to a regex") { rx.class.should == Regexp }
292
+ good.each {|str| it("should match #{str.inspect}") { rx.should match(str) } }
293
+ bad.each {|str| it("should not match #{str.inspect}") { rx.should_not match(str) } }
294
+ good.each {|str| it("should natively match #{str.inspect}") { (!!(pattern =~ str)).should == true } }
295
+ bad.each {|str| it("should not natively match #{str.inspect}") { (!!(pattern =~ str)).should == false } }
296
+ it("yields the expected regex") { (rx.to_s.should == wanted.to_s) if wanted }
297
+ # Sanity check... does the expected regex really match properly?
298
+ good.each {|str| it("has an expected regex matching #{str.inspect}") { wanted.should =~ str } }
299
+ bad.each {|str| it("has an expected regex not matching #{str.inspect}") { wanted.should_not =~ str } }
300
+ rescue => err
301
+ puts "--- ERROR: #{err}"
302
+ puts "--- Description = '#{desc}'"
303
+ puts err.backtrace.find(/regexador_/).first
304
+ end
305
+ end
306
+ end
307
+
308
+ @captures.each do |x|
309
+ desc, prog, wanted, examples =
310
+ x.description, x.program, x.regex, x.examples
311
+ describe "A program with captures (#{desc})" do
312
+ begin
313
+ it("can be parsed") { @parser.parse(prog).succeeds }
314
+
315
+ pattern = Regexador.new(prog)
316
+ rx = pattern.to_regex
317
+ it("can be converted to a regex") { rx.class.should == Regexp }
318
+
319
+ examples.each do |example|
320
+ example.each_pair do |str, results|
321
+ mobj = rx.match(str) # ordinary Ruby match object
322
+ obj = pattern.match(str) # special object returned
323
+ results.each_pair do |cvar, val|
324
+ it("grabs captures correctly") { mobj[cvar].should == val }
325
+ it("exposes captures via method names") { obj.send(cvar).should == val }
326
+ end
327
+ end
328
+ end
329
+ it("yields the expected regex") { (rx.to_s.should == wanted.to_s) if wanted }
330
+ rescue => err
331
+ puts "Error: #{err}"
332
+ end
333
+ end
334
+ end
335
+
336
+ end
337
+
338
+ describe Regexador::Transform do
339
+ describe Regexador::Transform::StringNode do
340
+ let(:sn) { Regexador::Transform::StringNode.new('.string.') }
341
+
342
+ it 'converts to regexp escaped strings' do
343
+ sn.to_s.should == '\.string\.'
344
+ end
345
+ end
346
+ end
347
+
348
+