regexador 0.4.5 → 0.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,201 @@
1
+ ---
2
+ - !ruby/object:Program
3
+ description: Simple use of two vars
4
+ program: |
5
+ var1 = "abc"
6
+ var2 = "def"
7
+ match var1 var2 end
8
+ regex: !ruby/regexp /abcdef/
9
+ good:
10
+ - abcdefghi
11
+ - xyzabcdef
12
+ bad:
13
+ - ''
14
+ - abcxyzdef
15
+ - !ruby/object:Program
16
+ description: Multiline match with two vars
17
+ program: " var1 = \"abc\"\n var2 = \"def\"\n \n # Test a blank line
18
+ and comment as well.\n \n match # multiline match with comment\n var1\n
19
+ \ var2\n end\n"
20
+ regex: !ruby/regexp /abcdef/
21
+ good:
22
+ - abcdefghi
23
+ - xyzabcdef
24
+ bad:
25
+ - ''
26
+ - abcxyzdef
27
+ - !ruby/object:Program
28
+ description: IPv4 address
29
+ program: |
30
+ dot = "."
31
+ num = "25" D5 | `2 D4 D | maybe D1 1,2*D
32
+ match BOS num dot num dot num dot num EOS end
33
+ regex: !ruby/regexp /^(25[0-5]|2[0-4]\d|([01])?(\d){1,2})\.(25[0-5]|2[0-4]\d|([01])?(\d){1,2})\.(25[0-5]|2[0-4]\d|([01])?(\d){1,2})\.(25[0-5]|2[0-4]\d|([01])?(\d){1,2})$/
34
+ good:
35
+ - "127.0.0.1"
36
+ - "255.254.93.22"
37
+ - "255.254.93.22"
38
+ bad:
39
+ - ''
40
+ - "7.8.9"
41
+ - "3.4.5.6.7"
42
+ - "1.2.3.256"
43
+ - !ruby/object:Program
44
+ description: Identifying credit cards
45
+ program: |
46
+ # Warning: This one likely has errors!
47
+
48
+ visa = `4 12*D maybe 3*D
49
+ mc = `5 D5 14*D
50
+ discover = `6 ("011" | `5 2*D) 12*D
51
+ amex = `3 '47' 13*D
52
+ diners = `3 (`0 D5 | '68' D) 11*D
53
+ jcb = ("2131"|"1800"|"35" 3*D) 11*D
54
+
55
+ match visa | mc | discover | amex | diners | jcb end
56
+ regex: !ruby/regexp /(4(\d){12}((\d){3})?|5[0-5](\d){14}|6(011|5(\d){2})(\d){12}|3[47](\d){13}|3(0[0-5]|[68]\d)(\d){11}|(2131|1800|35(\d){3})(\d){11})/
57
+
58
+
59
+ good: []
60
+ bad: []
61
+ - !ruby/object:Program
62
+ description: Matching US phone num (with captures)
63
+ program: |
64
+ match
65
+ @area_code = 3 * D
66
+ `-
67
+ @prefix = 3*D
68
+ `-
69
+ @last4 = 4*D
70
+ end
71
+ # regex: !ruby/regexp /(?<area_code>\d{3}){0}(?<prefix>\d{3}){0}(?<last4>\d{4}){0}\g<area_code>-\g<prefix>-\g<last4>/
72
+ regex: !ruby/regexp /(?<area_code>(\d){3})\-(?<prefix>(\d){3})\-(?<last4>(\d){4})/
73
+ good:
74
+ - '601-555-2345'
75
+ - 'call me at 888-425-9000'
76
+ bad:
77
+ - '888-HAL-9000'
78
+ - '800.237.1234'
79
+ - !ruby/object:Program
80
+ description: KNOWNFAIL Matching a clock time, 12/24 hrs
81
+ program: |
82
+ hr12 = (maybe `0) `1-`9 | `1 D2
83
+ hr24 = (maybe `0) D | `1 D | `2 D3
84
+ sep = `: | `.
85
+ min = D5 D9
86
+ sec = D5 D9
87
+ ampm = (maybe SPACE) ("am" | "pm")
88
+ time12 = hr12 sep min maybe (sep sec) maybe ampm
89
+ time24 = hr24 sep min maybe (sep sec)
90
+ match BOS (time12 | time24) EOS end
91
+ regex: !ruby/regexp /^(((0)?[1-9]|1[0-2])(:|\.)[0-5]\d((:|\.)[0-5]\d)?(( )?(am|pm))?|((0)?\d|1\d|2[0-3])(:|\.)[0-5]\d((:|\.)[0-5]\d)?)$/
92
+ good:
93
+ - '12:34'
94
+ - '1:23'
95
+ - '5:14pm'
96
+ - '19:43'
97
+ - '1:23:45'
98
+ - '1:23:45 pm'
99
+ - '7:43 pm'
100
+ - '8:32:45'
101
+ - '8.34'
102
+ - '8.34 pm'
103
+ - '8.34.45'
104
+ bad:
105
+ - ''
106
+ - abc
107
+ - '24:30'
108
+ - '25:30'
109
+ - '19:43 pm'
110
+ - '5:14 pm'
111
+ - !ruby/object:Program
112
+ description: Using nocase
113
+ program: 'match BOS "abc" nocase "def" "ghi" EOS end'
114
+ regex: !ruby/regexp /^abc((?i)def)ghi$/
115
+ good:
116
+ - "abcdefghi"
117
+ - "abcDEFghi"
118
+ - "abcdEfghi"
119
+ bad:
120
+ - ""
121
+ - "x"
122
+ - "xabcdefghi"
123
+ - "abcdefghix"
124
+ - "aBcdefghi"
125
+ - "abcdefGhi"
126
+ - "abCdefghI"
127
+ - "abCdEfghI"
128
+ # - !ruby/object:Program
129
+ # description: Simple use of two vars
130
+ # program: |
131
+ # regex: !ruby/regexp //
132
+ # good:
133
+ # bad:
134
+ - !ruby/object:Program
135
+ description: Var used in simple repetition
136
+ program: |
137
+ n = 3
138
+ match BOS n * `x EOS end
139
+ regex: !ruby/regexp /^(x){3}$/
140
+ good:
141
+ - "xxx"
142
+ bad:
143
+ - ""
144
+ - "x"
145
+ - "xx x"
146
+ - "xxxx"
147
+ - !ruby/object:Program
148
+ description: Var used in complex repetition
149
+ program: |
150
+ m = 4
151
+ n = 6
152
+ match BOS m,n * `x EOS end
153
+ regex: !ruby/regexp /^(x){4,6}$/
154
+ good:
155
+ - "xxxx"
156
+ - "xxxxx"
157
+ - "xxxxxx"
158
+ bad:
159
+ - ""
160
+ - "x"
161
+ - "xx x"
162
+ - "xxx"
163
+ - "xxxxxxx"
164
+ - !ruby/object:Program
165
+ description: Using Unicode codepoint again
166
+ program: |
167
+ euro = &20ac
168
+ price = (euro | "$") SPACE many D maybe ("." 2*D)
169
+ match BOS price EOS end
170
+ regex: !ruby/regexp /^(€|\$) (\d)+(\.(\d){2})?$/
171
+ good:
172
+ - "€ 237"
173
+ - "$ 237"
174
+ - "€ 23.45"
175
+ - "€ 0.25"
176
+ bad:
177
+ - ""
178
+ - "x"
179
+ - "€"
180
+ - "€ "
181
+ - "€ 237"
182
+ - "$ 237"
183
+ - "€ 23.456"
184
+ - !ruby/object:Program
185
+ description: Using within (1)
186
+ program: |
187
+ match within `/ end
188
+ regex: !ruby/regexp /(\/.*?\/)/
189
+ good:
190
+ - "There is a /slash-delimited string/ here."
191
+ bad:
192
+ - "No such string here."
193
+ - !ruby/object:Program
194
+ description: Using escaping (1)
195
+ program: |
196
+ match escaping `/ end
197
+ regex: !ruby/regexp /\/|[^\/]*?\//
198
+ good:
199
+ - "This is /slash-delimited but \\/with embedded slashes \\/ also /."
200
+ bad:
201
+ - "No such string here."
@@ -0,0 +1,348 @@
1
+ # Encoding: UTF-8
2
+ require_relative '../lib/regexador'
3
+ require 'pp'
4
+
5
+ require 'parslet/convenience'
6
+ require 'parslet/rig/rspec'
7
+
8
+ class Object
9
+ def succeeds
10
+ self.should_not == nil
11
+ end
12
+ end
13
+
14
+ class Program
15
+ attr_accessor :description, :program, :regex, :good, :bad
16
+ end
17
+
18
+ class Capture
19
+ attr_accessor :description, :program, :regex, :examples
20
+ # examples is a hash of the form:
21
+ # { str1 => {var1 => exp1, var2 => exp2, ...},
22
+ # str2 => {var1 => exp1, var2 => exp2, ...},
23
+ # ...}
24
+ end
25
+
26
+
27
+ #### Actual tests...
28
+
29
+
30
+ describe Regexador do
31
+
32
+ @oneliners = YAML.load(File.read("spec/oneliners.yaml"))
33
+ @programs = YAML.load(File.read("spec/programs.yaml"))
34
+ @captures = YAML.load(File.read("spec/captures.yaml"))
35
+
36
+ before(:all) do
37
+ @parser = Regexador::Parser.new
38
+ @pattern = @parser.pattern
39
+ end
40
+
41
+ describe "A special character" do
42
+ it "can be matched correctly" do
43
+ @parser.cSQUOTE.parse_with_debug("'").succeeds
44
+ @parser.cHASH.parse('#').succeeds
45
+ @parser.cNEWLINE.parse("\n").succeeds
46
+ @parser.cEQUAL.parse('=').succeeds
47
+ end
48
+ end
49
+
50
+ describe "An international character" do
51
+ it "can follow a backtick" do #
52
+ @parser.char.parse_with_debug("`æ").succeeds
53
+ @parser.char.parse("`ß").succeeds
54
+ @parser.char.parse("`ç").succeeds
55
+ @parser.char.parse("`ö").succeeds
56
+ @parser.char.parse("`ñ").succeeds
57
+ end
58
+ end
59
+
60
+ describe "A Unicode codepoint expression" do
61
+ it "can be matched" do
62
+ @parser.codepoint.parse_with_debug("&1234").succeeds
63
+ @parser.codepoint.parse('&beef').succeeds
64
+ end
65
+ end
66
+
67
+ describe "A predefined token" do
68
+ %w(BOS EOS START END).each do |token|
69
+ describe token do
70
+ it 'matches using pattern' do
71
+ @parser.pattern.parse_with_debug(token).succeeds
72
+ end
73
+ end
74
+ end
75
+ end
76
+
77
+ describe "An assignment" do
78
+ it "can be parsed" do
79
+ @parser.assignment.parse("a = 5").succeeds
80
+ @parser.assignment.parse("a= 5").succeeds
81
+ @parser.assignment.parse("a =5").succeeds
82
+ @parser.assignment.parse("a=5").succeeds
83
+ @parser.assignment.parse("myvar = 'xyz'").succeeds
84
+ @parser.assignment.parse('var2 = "hello"').succeeds
85
+ @parser.assignment.parse('this_var = `x-`z').succeeds
86
+ @parser.assignment.parse_with_debug('pat = maybe many `x-`z').succeeds
87
+ end
88
+ end
89
+
90
+ describe "A keyword used as a variable name" do
91
+ it "will not parse" do
92
+ @parser.assignment.should_not parse("end = 'hello'")
93
+ # @parser.assignment.parse("endx = 'hello'")
94
+ end
95
+ end
96
+
97
+ describe "A definition section" do
98
+ it "can be parsed" do
99
+ defs1 = "a = 5\nstr = \"hello\"\n"
100
+ @parser.definitions.parse_with_debug(defs1).succeeds
101
+ defs2 = <<-EOF
102
+ a = 5
103
+ # comment...
104
+ pat = maybe many `a-`c
105
+ # empty line follows:
106
+
107
+ str = "hello"
108
+ # another comment...
109
+ EOF
110
+ @parser.definitions.parse_with_debug(defs2).succeeds
111
+ end
112
+ end
113
+
114
+ describe "A capture variable" do
115
+ it "can be parsed" do
116
+ str1 = "@myvar"
117
+ @parser.capture_var.parse(str1).succeeds
118
+ end
119
+ end
120
+
121
+ describe "A captured pattern" do
122
+ let(:prog) { "@myvar = maybe 'abc'" }
123
+
124
+ it "can be parsed (#capture)" do
125
+ @parser.capture.parse(prog).succeeds
126
+ end
127
+ it "can be parsed (#program)" do
128
+ @parser.parse("match #{prog} end").succeeds
129
+ end
130
+ end
131
+
132
+ describe "A back reference" do
133
+ let(:prog) { '@myvar' }
134
+
135
+ it 'can be parsed (#capture)' do
136
+ @parser.capture.parse(prog).succeeds
137
+ end
138
+ it 'can be parsed' do
139
+ @parser.parse("match #{prog} end").succeeds
140
+ end
141
+ end
142
+
143
+
144
+ describe "A one-line match clause" do
145
+ it "can be parsed" do
146
+ mc1 = <<-EOF
147
+ match `a~`x end
148
+ EOF
149
+ @parser.match_clause.parse_with_debug(mc1).succeeds
150
+ end
151
+ end
152
+
153
+ describe "A multiline match clause" do
154
+ it "can be parsed" do
155
+ mc2 = <<-EOF
156
+ match
157
+ `< "tag" WB
158
+ any ~`>
159
+ # blah blah blah
160
+ "</" "tag" `>
161
+ end
162
+ EOF
163
+ @parser.multiline_clause.parse_with_debug(mc2).succeeds
164
+ end
165
+ end
166
+
167
+ describe "An entire one-line program" do
168
+ it "can be parsed" do
169
+ prog = "match `a-`f end"
170
+ @parser.parse_with_debug(prog).succeeds
171
+ end
172
+ end
173
+
174
+
175
+ describe "An entire program" do
176
+ it "can be parsed" do
177
+ prog1 = <<-EOF
178
+ dot = "."
179
+ num = "25" D5 | `2 D4 D | maybe D1 1,2*D
180
+ match WB num dot num dot num dot num WB end
181
+ EOF
182
+ @parser.program.parse_with_debug(prog1).succeeds
183
+
184
+ prog2 = <<-EOF
185
+ # Warning: This one likely has errors!
186
+
187
+ visa = `4 12*D maybe 3*D
188
+ mc = `5 D5 14*D
189
+ amex = `3 '47' 13*D
190
+ diners = `3 (`0 D5 | '68' D) 11*D
191
+ discover = `6 ("011" | `5 2*D) 12*D
192
+ jcb = ("2131"|"1800"|"35" 3*D) 11*D
193
+
194
+ match visa | mc | amex | diners | discover | jcb end
195
+ EOF
196
+ @parser.program.parse_with_debug(prog2).succeeds
197
+ end
198
+ end
199
+
200
+ class Program
201
+
202
+ def initialize code
203
+ @code = code
204
+ @full_program = "match #{@code} end"
205
+ @parser = Regexador::Parser.new
206
+ end
207
+
208
+ def parseable?
209
+ @parser.parse_with_debug(@full_program) != nil
210
+ end
211
+
212
+ def parse
213
+ tree = @parser.pattern.parse(@code)
214
+ tree = tree[:alternation] \
215
+ if tree.size == 1 && tree.keys.first == :alternation
216
+ tree = tree[:sequence].first \
217
+ if tree.size == 1 && tree.keys.first == :sequence
218
+ tree
219
+ end
220
+
221
+ def regexp
222
+ Regexador.new(@full_program).to_regex
223
+ end
224
+
225
+ end
226
+
227
+ def self.program &block
228
+ let(:code, &block)
229
+ let(:program) { Program.new(code) }
230
+ let(:regexp) { program.regexp }
231
+
232
+ subject { program }
233
+ end
234
+
235
+ describe "Negative lookbehind" do
236
+ program { 'without "USD" find 3*D' }
237
+
238
+ it { should be_parseable }
239
+ it { regexp.should == /(?<!USD)(\d){3}/ }
240
+ end
241
+
242
+ describe "Negative lookahead" do
243
+ program { 'find 3*D without " pesos"' }
244
+
245
+ it "should parse as findpat/negpat" do
246
+ program.parse.should == {
247
+ findpat: {:num1=>"3", :match_item=>{:predef=>"D"}},
248
+ negpat: {:string=>" pesos"}
249
+ }
250
+ end
251
+ end
252
+
253
+ #### "Real" tests (data-driven)
254
+
255
+ @oneliners.each do |x|
256
+ desc, pat, wanted, good, bad =
257
+ x.description, x.program, x.regex, x.good, x.bad
258
+ describe "A one-pattern program (#{desc})" do
259
+ begin
260
+ prog = "match #{pat} end"
261
+ it("can be parsed") { @parser.parse_with_debug(prog).succeeds }
262
+ pattern = Regexador.new(prog)
263
+ rx = pattern.to_regex
264
+ it("can be converted to a regex") { rx.class.should == Regexp }
265
+ good.each {|str| it("should match #{str.inspect}") { rx.should =~ str } }
266
+ bad.each {|str| it("should not match #{str.inspect}") { rx.should_not =~ str } }
267
+ good.each {|str| it("should natively match #{str.inspect}") { (!!(pattern =~ str)).should == true } }
268
+ bad.each {|str| it("should not natively match #{str.inspect}") { (!!(pattern =~ str)).should == false } }
269
+ it("yields the expected regex") { (rx.to_s.should == wanted.to_s) if wanted }
270
+ # Sanity check... does the expected regex really match properly?
271
+ good.each {|str| it("has an expected regex matching #{str.inspect}") { wanted.should =~ str } }
272
+ bad.each {|str| it("has an expected regex not matching #{str.inspect}") { wanted.should_not =~ str } }
273
+ rescue => err
274
+ puts "--- ERROR: #{err}"
275
+ puts "--- Description = '#{desc}'"
276
+ puts err.backtrace.find(/regexador_/).first
277
+ end
278
+ end
279
+ end
280
+
281
+ # $debug = true
282
+
283
+ @programs.each do |x|
284
+ desc, prog, wanted, good, bad =
285
+ x.description, x.program, x.regex, x.good, x.bad
286
+ describe "A complete program (#{desc})" do
287
+ begin
288
+ it("can be parsed") { @parser.parse_with_debug(prog).succeeds }
289
+ pattern = Regexador.new(prog)
290
+ rx = pattern.to_regex
291
+ it("can be converted to a regex") { rx.class.should == Regexp }
292
+ good.each {|str| it("should match #{str.inspect}") { rx.should match(str) } }
293
+ bad.each {|str| it("should not match #{str.inspect}") { rx.should_not match(str) } }
294
+ good.each {|str| it("should natively match #{str.inspect}") { (!!(pattern =~ str)).should == true } }
295
+ bad.each {|str| it("should not natively match #{str.inspect}") { (!!(pattern =~ str)).should == false } }
296
+ it("yields the expected regex") { (rx.to_s.should == wanted.to_s) if wanted }
297
+ # Sanity check... does the expected regex really match properly?
298
+ good.each {|str| it("has an expected regex matching #{str.inspect}") { wanted.should =~ str } }
299
+ bad.each {|str| it("has an expected regex not matching #{str.inspect}") { wanted.should_not =~ str } }
300
+ rescue => err
301
+ puts "--- ERROR: #{err}"
302
+ puts "--- Description = '#{desc}'"
303
+ puts err.backtrace.find(/regexador_/).first
304
+ end
305
+ end
306
+ end
307
+
308
+ @captures.each do |x|
309
+ desc, prog, wanted, examples =
310
+ x.description, x.program, x.regex, x.examples
311
+ describe "A program with captures (#{desc})" do
312
+ begin
313
+ it("can be parsed") { @parser.parse(prog).succeeds }
314
+
315
+ pattern = Regexador.new(prog)
316
+ rx = pattern.to_regex
317
+ it("can be converted to a regex") { rx.class.should == Regexp }
318
+
319
+ examples.each do |example|
320
+ example.each_pair do |str, results|
321
+ mobj = rx.match(str) # ordinary Ruby match object
322
+ obj = pattern.match(str) # special object returned
323
+ results.each_pair do |cvar, val|
324
+ it("grabs captures correctly") { mobj[cvar].should == val }
325
+ it("exposes captures via method names") { obj.send(cvar).should == val }
326
+ end
327
+ end
328
+ end
329
+ it("yields the expected regex") { (rx.to_s.should == wanted.to_s) if wanted }
330
+ rescue => err
331
+ puts "Error: #{err}"
332
+ end
333
+ end
334
+ end
335
+
336
+ end
337
+
338
+ describe Regexador::Transform do
339
+ describe Regexador::Transform::StringNode do
340
+ let(:sn) { Regexador::Transform::StringNode.new('.string.') }
341
+
342
+ it 'converts to regexp escaped strings' do
343
+ sn.to_s.should == '\.string\.'
344
+ end
345
+ end
346
+ end
347
+
348
+