regexador 0.4.6 → 0.4.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,201 +0,0 @@
1
- ---
2
- - !ruby/object:Program
3
- description: Simple use of two vars
4
- program: |
5
- var1 = "abc"
6
- var2 = "def"
7
- match var1 var2 end
8
- regex: !ruby/regexp /abcdef/
9
- good:
10
- - abcdefghi
11
- - xyzabcdef
12
- bad:
13
- - ''
14
- - abcxyzdef
15
- - !ruby/object:Program
16
- description: Multiline match with two vars
17
- program: " var1 = \"abc\"\n var2 = \"def\"\n \n # Test a blank line
18
- and comment as well.\n \n match # multiline match with comment\n var1\n
19
- \ var2\n end\n"
20
- regex: !ruby/regexp /abcdef/
21
- good:
22
- - abcdefghi
23
- - xyzabcdef
24
- bad:
25
- - ''
26
- - abcxyzdef
27
- - !ruby/object:Program
28
- description: IPv4 address
29
- program: |
30
- dot = "."
31
- num = "25" D5 | `2 D4 D | maybe D1 1,2*D
32
- match BOS num dot num dot num dot num EOS end
33
- regex: !ruby/regexp /^(25[0-5]|2[0-4]\d|([01])?(\d){1,2})\.(25[0-5]|2[0-4]\d|([01])?(\d){1,2})\.(25[0-5]|2[0-4]\d|([01])?(\d){1,2})\.(25[0-5]|2[0-4]\d|([01])?(\d){1,2})$/
34
- good:
35
- - "127.0.0.1"
36
- - "255.254.93.22"
37
- - "255.254.93.22"
38
- bad:
39
- - ''
40
- - "7.8.9"
41
- - "3.4.5.6.7"
42
- - "1.2.3.256"
43
- - !ruby/object:Program
44
- description: Identifying credit cards
45
- program: |
46
- # Warning: This one likely has errors!
47
-
48
- visa = `4 12*D maybe 3*D
49
- mc = `5 D5 14*D
50
- discover = `6 ("011" | `5 2*D) 12*D
51
- amex = `3 '47' 13*D
52
- diners = `3 (`0 D5 | '68' D) 11*D
53
- jcb = ("2131"|"1800"|"35" 3*D) 11*D
54
-
55
- match visa | mc | discover | amex | diners | jcb end
56
- regex: !ruby/regexp /(4(\d){12}((\d){3})?|5[0-5](\d){14}|6(011|5(\d){2})(\d){12}|3[47](\d){13}|3(0[0-5]|[68]\d)(\d){11}|(2131|1800|35(\d){3})(\d){11})/
57
-
58
-
59
- good: []
60
- bad: []
61
- - !ruby/object:Program
62
- description: Matching US phone num (with captures)
63
- program: |
64
- match
65
- @area_code = 3 * D
66
- `-
67
- @prefix = 3*D
68
- `-
69
- @last4 = 4*D
70
- end
71
- # regex: !ruby/regexp /(?<area_code>\d{3}){0}(?<prefix>\d{3}){0}(?<last4>\d{4}){0}\g<area_code>-\g<prefix>-\g<last4>/
72
- regex: !ruby/regexp /(?<area_code>(\d){3})\-(?<prefix>(\d){3})\-(?<last4>(\d){4})/
73
- good:
74
- - '601-555-2345'
75
- - 'call me at 888-425-9000'
76
- bad:
77
- - '888-HAL-9000'
78
- - '800.237.1234'
79
- - !ruby/object:Program
80
- description: KNOWNFAIL Matching a clock time, 12/24 hrs
81
- program: |
82
- hr12 = (maybe `0) `1-`9 | `1 D2
83
- hr24 = (maybe `0) D | `1 D | `2 D3
84
- sep = `: | `.
85
- min = D5 D9
86
- sec = D5 D9
87
- ampm = (maybe SPACE) ("am" | "pm")
88
- time12 = hr12 sep min maybe (sep sec) maybe ampm
89
- time24 = hr24 sep min maybe (sep sec)
90
- match BOS (time12 | time24) EOS end
91
- regex: !ruby/regexp /^(((0)?[1-9]|1[0-2])(:|\.)[0-5]\d((:|\.)[0-5]\d)?(( )?(am|pm))?|((0)?\d|1\d|2[0-3])(:|\.)[0-5]\d((:|\.)[0-5]\d)?)$/
92
- good:
93
- - '12:34'
94
- - '1:23'
95
- - '5:14pm'
96
- - '19:43'
97
- - '1:23:45'
98
- - '1:23:45 pm'
99
- - '7:43 pm'
100
- - '8:32:45'
101
- - '8.34'
102
- - '8.34 pm'
103
- - '8.34.45'
104
- bad:
105
- - ''
106
- - abc
107
- - '24:30'
108
- - '25:30'
109
- - '19:43 pm'
110
- - '5:14 pm'
111
- - !ruby/object:Program
112
- description: Using nocase
113
- program: 'match BOS "abc" nocase "def" "ghi" EOS end'
114
- regex: !ruby/regexp /^abc((?i)def)ghi$/
115
- good:
116
- - "abcdefghi"
117
- - "abcDEFghi"
118
- - "abcdEfghi"
119
- bad:
120
- - ""
121
- - "x"
122
- - "xabcdefghi"
123
- - "abcdefghix"
124
- - "aBcdefghi"
125
- - "abcdefGhi"
126
- - "abCdefghI"
127
- - "abCdEfghI"
128
- # - !ruby/object:Program
129
- # description: Simple use of two vars
130
- # program: |
131
- # regex: !ruby/regexp //
132
- # good:
133
- # bad:
134
- - !ruby/object:Program
135
- description: Var used in simple repetition
136
- program: |
137
- n = 3
138
- match BOS n * `x EOS end
139
- regex: !ruby/regexp /^(x){3}$/
140
- good:
141
- - "xxx"
142
- bad:
143
- - ""
144
- - "x"
145
- - "xx x"
146
- - "xxxx"
147
- - !ruby/object:Program
148
- description: Var used in complex repetition
149
- program: |
150
- m = 4
151
- n = 6
152
- match BOS m,n * `x EOS end
153
- regex: !ruby/regexp /^(x){4,6}$/
154
- good:
155
- - "xxxx"
156
- - "xxxxx"
157
- - "xxxxxx"
158
- bad:
159
- - ""
160
- - "x"
161
- - "xx x"
162
- - "xxx"
163
- - "xxxxxxx"
164
- - !ruby/object:Program
165
- description: Using Unicode codepoint again
166
- program: |
167
- euro = &20ac
168
- price = (euro | "$") SPACE many D maybe ("." 2*D)
169
- match BOS price EOS end
170
- regex: !ruby/regexp /^(€|\$) (\d)+(\.(\d){2})?$/
171
- good:
172
- - "€ 237"
173
- - "$ 237"
174
- - "€ 23.45"
175
- - "€ 0.25"
176
- bad:
177
- - ""
178
- - "x"
179
- - "€"
180
- - "€ "
181
- - "€ 237"
182
- - "$ 237"
183
- - "€ 23.456"
184
- - !ruby/object:Program
185
- description: Using within (1)
186
- program: |
187
- match within `/ end
188
- regex: !ruby/regexp /(\/.*?\/)/
189
- good:
190
- - "There is a /slash-delimited string/ here."
191
- bad:
192
- - "No such string here."
193
- - !ruby/object:Program
194
- description: Using escaping (1)
195
- program: |
196
- match escaping `/ end
197
- regex: !ruby/regexp /\/|[^\/]*?\//
198
- good:
199
- - "This is /slash-delimited but \\/with embedded slashes \\/ also /."
200
- bad:
201
- - "No such string here."
@@ -1,348 +0,0 @@
1
- # Encoding: UTF-8
2
- require_relative '../lib/regexador'
3
- require 'pp'
4
-
5
- require 'parslet/convenience'
6
- require 'parslet/rig/rspec'
7
-
8
- class Object
9
- def succeeds
10
- self.should_not == nil
11
- end
12
- end
13
-
14
- class Program
15
- attr_accessor :description, :program, :regex, :good, :bad
16
- end
17
-
18
- class Capture
19
- attr_accessor :description, :program, :regex, :examples
20
- # examples is a hash of the form:
21
- # { str1 => {var1 => exp1, var2 => exp2, ...},
22
- # str2 => {var1 => exp1, var2 => exp2, ...},
23
- # ...}
24
- end
25
-
26
-
27
- #### Actual tests...
28
-
29
-
30
- describe Regexador do
31
-
32
- @oneliners = YAML.load(File.read("spec/oneliners.yaml"))
33
- @programs = YAML.load(File.read("spec/programs.yaml"))
34
- @captures = YAML.load(File.read("spec/captures.yaml"))
35
-
36
- before(:all) do
37
- @parser = Regexador::Parser.new
38
- @pattern = @parser.pattern
39
- end
40
-
41
- describe "A special character" do
42
- it "can be matched correctly" do
43
- @parser.cSQUOTE.parse_with_debug("'").succeeds
44
- @parser.cHASH.parse('#').succeeds
45
- @parser.cNEWLINE.parse("\n").succeeds
46
- @parser.cEQUAL.parse('=').succeeds
47
- end
48
- end
49
-
50
- describe "An international character" do
51
- it "can follow a backtick" do #
52
- @parser.char.parse_with_debug("`æ").succeeds
53
- @parser.char.parse("`ß").succeeds
54
- @parser.char.parse("`ç").succeeds
55
- @parser.char.parse("`ö").succeeds
56
- @parser.char.parse("`ñ").succeeds
57
- end
58
- end
59
-
60
- describe "A Unicode codepoint expression" do
61
- it "can be matched" do
62
- @parser.codepoint.parse_with_debug("&1234").succeeds
63
- @parser.codepoint.parse('&beef').succeeds
64
- end
65
- end
66
-
67
- describe "A predefined token" do
68
- %w(BOS EOS START END).each do |token|
69
- describe token do
70
- it 'matches using pattern' do
71
- @parser.pattern.parse_with_debug(token).succeeds
72
- end
73
- end
74
- end
75
- end
76
-
77
- describe "An assignment" do
78
- it "can be parsed" do
79
- @parser.assignment.parse("a = 5").succeeds
80
- @parser.assignment.parse("a= 5").succeeds
81
- @parser.assignment.parse("a =5").succeeds
82
- @parser.assignment.parse("a=5").succeeds
83
- @parser.assignment.parse("myvar = 'xyz'").succeeds
84
- @parser.assignment.parse('var2 = "hello"').succeeds
85
- @parser.assignment.parse('this_var = `x-`z').succeeds
86
- @parser.assignment.parse_with_debug('pat = maybe many `x-`z').succeeds
87
- end
88
- end
89
-
90
- describe "A keyword used as a variable name" do
91
- it "will not parse" do
92
- @parser.assignment.should_not parse("end = 'hello'")
93
- # @parser.assignment.parse("endx = 'hello'")
94
- end
95
- end
96
-
97
- describe "A definition section" do
98
- it "can be parsed" do
99
- defs1 = "a = 5\nstr = \"hello\"\n"
100
- @parser.definitions.parse_with_debug(defs1).succeeds
101
- defs2 = <<-EOF
102
- a = 5
103
- # comment...
104
- pat = maybe many `a-`c
105
- # empty line follows:
106
-
107
- str = "hello"
108
- # another comment...
109
- EOF
110
- @parser.definitions.parse_with_debug(defs2).succeeds
111
- end
112
- end
113
-
114
- describe "A capture variable" do
115
- it "can be parsed" do
116
- str1 = "@myvar"
117
- @parser.capture_var.parse(str1).succeeds
118
- end
119
- end
120
-
121
- describe "A captured pattern" do
122
- let(:prog) { "@myvar = maybe 'abc'" }
123
-
124
- it "can be parsed (#capture)" do
125
- @parser.capture.parse(prog).succeeds
126
- end
127
- it "can be parsed (#program)" do
128
- @parser.parse("match #{prog} end").succeeds
129
- end
130
- end
131
-
132
- describe "A back reference" do
133
- let(:prog) { '@myvar' }
134
-
135
- it 'can be parsed (#capture)' do
136
- @parser.capture.parse(prog).succeeds
137
- end
138
- it 'can be parsed' do
139
- @parser.parse("match #{prog} end").succeeds
140
- end
141
- end
142
-
143
-
144
- describe "A one-line match clause" do
145
- it "can be parsed" do
146
- mc1 = <<-EOF
147
- match `a~`x end
148
- EOF
149
- @parser.match_clause.parse_with_debug(mc1).succeeds
150
- end
151
- end
152
-
153
- describe "A multiline match clause" do
154
- it "can be parsed" do
155
- mc2 = <<-EOF
156
- match
157
- `< "tag" WB
158
- any ~`>
159
- # blah blah blah
160
- "</" "tag" `>
161
- end
162
- EOF
163
- @parser.multiline_clause.parse_with_debug(mc2).succeeds
164
- end
165
- end
166
-
167
- describe "An entire one-line program" do
168
- it "can be parsed" do
169
- prog = "match `a-`f end"
170
- @parser.parse_with_debug(prog).succeeds
171
- end
172
- end
173
-
174
-
175
- describe "An entire program" do
176
- it "can be parsed" do
177
- prog1 = <<-EOF
178
- dot = "."
179
- num = "25" D5 | `2 D4 D | maybe D1 1,2*D
180
- match WB num dot num dot num dot num WB end
181
- EOF
182
- @parser.program.parse_with_debug(prog1).succeeds
183
-
184
- prog2 = <<-EOF
185
- # Warning: This one likely has errors!
186
-
187
- visa = `4 12*D maybe 3*D
188
- mc = `5 D5 14*D
189
- amex = `3 '47' 13*D
190
- diners = `3 (`0 D5 | '68' D) 11*D
191
- discover = `6 ("011" | `5 2*D) 12*D
192
- jcb = ("2131"|"1800"|"35" 3*D) 11*D
193
-
194
- match visa | mc | amex | diners | discover | jcb end
195
- EOF
196
- @parser.program.parse_with_debug(prog2).succeeds
197
- end
198
- end
199
-
200
- class Program
201
-
202
- def initialize code
203
- @code = code
204
- @full_program = "match #{@code} end"
205
- @parser = Regexador::Parser.new
206
- end
207
-
208
- def parseable?
209
- @parser.parse_with_debug(@full_program) != nil
210
- end
211
-
212
- def parse
213
- tree = @parser.pattern.parse(@code)
214
- tree = tree[:alternation] \
215
- if tree.size == 1 && tree.keys.first == :alternation
216
- tree = tree[:sequence].first \
217
- if tree.size == 1 && tree.keys.first == :sequence
218
- tree
219
- end
220
-
221
- def regexp
222
- Regexador.new(@full_program).to_regex
223
- end
224
-
225
- end
226
-
227
- def self.program &block
228
- let(:code, &block)
229
- let(:program) { Program.new(code) }
230
- let(:regexp) { program.regexp }
231
-
232
- subject { program }
233
- end
234
-
235
- describe "Negative lookbehind" do
236
- program { 'without "USD" find 3*D' }
237
-
238
- it { should be_parseable }
239
- it { regexp.should == /(?<!USD)(\d){3}/ }
240
- end
241
-
242
- describe "Negative lookahead" do
243
- program { 'find 3*D without " pesos"' }
244
-
245
- it "should parse as findpat/negpat" do
246
- program.parse.should == {
247
- findpat: {:num1=>"3", :match_item=>{:predef=>"D"}},
248
- negpat: {:string=>" pesos"}
249
- }
250
- end
251
- end
252
-
253
- #### "Real" tests (data-driven)
254
-
255
- @oneliners.each do |x|
256
- desc, pat, wanted, good, bad =
257
- x.description, x.program, x.regex, x.good, x.bad
258
- describe "A one-pattern program (#{desc})" do
259
- begin
260
- prog = "match #{pat} end"
261
- it("can be parsed") { @parser.parse_with_debug(prog).succeeds }
262
- pattern = Regexador.new(prog)
263
- rx = pattern.to_regex
264
- it("can be converted to a regex") { rx.class.should == Regexp }
265
- good.each {|str| it("should match #{str.inspect}") { rx.should =~ str } }
266
- bad.each {|str| it("should not match #{str.inspect}") { rx.should_not =~ str } }
267
- good.each {|str| it("should natively match #{str.inspect}") { (!!(pattern =~ str)).should == true } }
268
- bad.each {|str| it("should not natively match #{str.inspect}") { (!!(pattern =~ str)).should == false } }
269
- it("yields the expected regex") { (rx.to_s.should == wanted.to_s) if wanted }
270
- # Sanity check... does the expected regex really match properly?
271
- good.each {|str| it("has an expected regex matching #{str.inspect}") { wanted.should =~ str } }
272
- bad.each {|str| it("has an expected regex not matching #{str.inspect}") { wanted.should_not =~ str } }
273
- rescue => err
274
- puts "--- ERROR: #{err}"
275
- puts "--- Description = '#{desc}'"
276
- puts err.backtrace.find(/regexador_/).first
277
- end
278
- end
279
- end
280
-
281
- # $debug = true
282
-
283
- @programs.each do |x|
284
- desc, prog, wanted, good, bad =
285
- x.description, x.program, x.regex, x.good, x.bad
286
- describe "A complete program (#{desc})" do
287
- begin
288
- it("can be parsed") { @parser.parse_with_debug(prog).succeeds }
289
- pattern = Regexador.new(prog)
290
- rx = pattern.to_regex
291
- it("can be converted to a regex") { rx.class.should == Regexp }
292
- good.each {|str| it("should match #{str.inspect}") { rx.should match(str) } }
293
- bad.each {|str| it("should not match #{str.inspect}") { rx.should_not match(str) } }
294
- good.each {|str| it("should natively match #{str.inspect}") { (!!(pattern =~ str)).should == true } }
295
- bad.each {|str| it("should not natively match #{str.inspect}") { (!!(pattern =~ str)).should == false } }
296
- it("yields the expected regex") { (rx.to_s.should == wanted.to_s) if wanted }
297
- # Sanity check... does the expected regex really match properly?
298
- good.each {|str| it("has an expected regex matching #{str.inspect}") { wanted.should =~ str } }
299
- bad.each {|str| it("has an expected regex not matching #{str.inspect}") { wanted.should_not =~ str } }
300
- rescue => err
301
- puts "--- ERROR: #{err}"
302
- puts "--- Description = '#{desc}'"
303
- puts err.backtrace.find(/regexador_/).first
304
- end
305
- end
306
- end
307
-
308
- @captures.each do |x|
309
- desc, prog, wanted, examples =
310
- x.description, x.program, x.regex, x.examples
311
- describe "A program with captures (#{desc})" do
312
- begin
313
- it("can be parsed") { @parser.parse(prog).succeeds }
314
-
315
- pattern = Regexador.new(prog)
316
- rx = pattern.to_regex
317
- it("can be converted to a regex") { rx.class.should == Regexp }
318
-
319
- examples.each do |example|
320
- example.each_pair do |str, results|
321
- mobj = rx.match(str) # ordinary Ruby match object
322
- obj = pattern.match(str) # special object returned
323
- results.each_pair do |cvar, val|
324
- it("grabs captures correctly") { mobj[cvar].should == val }
325
- it("exposes captures via method names") { obj.send(cvar).should == val }
326
- end
327
- end
328
- end
329
- it("yields the expected regex") { (rx.to_s.should == wanted.to_s) if wanted }
330
- rescue => err
331
- puts "Error: #{err}"
332
- end
333
- end
334
- end
335
-
336
- end
337
-
338
- describe Regexador::Transform do
339
- describe Regexador::Transform::StringNode do
340
- let(:sn) { Regexador::Transform::StringNode.new('.string.') }
341
-
342
- it 'converts to regexp escaped strings' do
343
- sn.to_s.should == '\.string\.'
344
- end
345
- end
346
- end
347
-
348
-