regexador 0.4.5 → 0.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,85 @@
1
+ ---
2
+ - !ruby/object:Capture
3
+ description: US phone number
4
+ program: |
5
+ match
6
+ @area_code = 3 * D
7
+ `-
8
+ @prefix = 3 * D
9
+ `-
10
+ @last4 = 4 * D
11
+ end
12
+ regex: !ruby/regexp /(?<area_code>(\d){3})\-(?<prefix>(\d){3})\-(?<last4>(\d){4})/
13
+ examples:
14
+ - "512-555-2001":
15
+ :area_code: "512"
16
+ :prefix: "555"
17
+ :last4: "2001"
18
+ - !ruby/object:Capture
19
+ description: A simple backreference
20
+ program: |2
21
+ tag = many %alpha
22
+ match
23
+ `<
24
+ @tag = tag
25
+ `>
26
+ @cdata = any X
27
+ "</"
28
+ @tag `>
29
+ end
30
+ regex: !ruby/regexp /<(?<tag>([[:alpha:]])+)>(?<cdata>(.)*)<\/\k<tag>>/
31
+ examples:
32
+ - "<body>abcd</body>":
33
+ :tag: "body"
34
+ :cdata: "abcd"
35
+ - "<table>table</table>":
36
+ :tag: "table"
37
+ :cdata: "table"
38
+ - !ruby/object:Capture
39
+ description: A simple backreference again
40
+ program: |2
41
+ tag = many %alpha
42
+ match
43
+ `<
44
+ @tag = tag
45
+ `>
46
+ @cdata = any X
47
+ "</" @tag `> # Slightly different code
48
+ end
49
+ regex: !ruby/regexp /<(?<tag>([[:alpha:]])+)>(?<cdata>(.)*)<\/\k<tag>>/
50
+ examples:
51
+ - "<body>abcd</body>":
52
+ :tag: "body"
53
+ :cdata: "abcd"
54
+ - "<table>table</table>":
55
+ :tag: "table"
56
+ :cdata: "table"
57
+ - !ruby/object:Capture
58
+ description: A simple inline backreference with alternation
59
+ program: |2
60
+ match
61
+ BOS
62
+ (@var = "x") | (@var = "y")
63
+ @var
64
+ EOS
65
+ end
66
+ regex: !ruby/regexp /^((?<var>x)|(?<var>y))\k<var>$/
67
+ examples:
68
+ - "xx":
69
+ :var: "x"
70
+ - "yy":
71
+ :var: "y"
72
+ - !ruby/object:Capture
73
+ description: A simple inline capture
74
+ program: "match `a @var = `b `c end"
75
+ regex: !ruby/regexp /a(?<var>bc)/
76
+ examples:
77
+ - "abc":
78
+ :var: "bc"
79
+ - !ruby/object:Capture
80
+ description: A simple inline capture with parens
81
+ program: "match `a (@var = `b) `c end"
82
+ regex: !ruby/regexp /a(?<var>b)c/
83
+ examples:
84
+ - "abc":
85
+ :var: "b"
@@ -0,0 +1,116 @@
1
+ require 'yaml'
2
+ require 'erb'
3
+
4
+ class Program
5
+ attr_accessor :description, :program, :regex, :good, :bad, :examples
6
+ end
7
+
8
+ Capture = Program
9
+
10
+ def escape(str)
11
+ str.inspect[1..-1].gsub(/\\n/, "\n")
12
+ end
13
+
14
+ @oneliners = YAML.load(File.read("spec/oneliners.yaml"))
15
+ @programs = YAML.load(File.read("spec/programs.yaml"))
16
+
17
+ text = <<-EOF
18
+ require './spec/testing'
19
+
20
+ describe Regexador do
21
+
22
+ before(:all) do
23
+ @parser = Regexador::Parser.new
24
+ @pattern = @parser.pattern
25
+ end
26
+
27
+ def self.program &block
28
+ let(:code, &block)
29
+ let(:program) { Program.new(code) }
30
+ let(:regexp) { program.regexp }
31
+
32
+ subject { program }
33
+ end
34
+
35
+ EOF
36
+
37
+ def sanity_check(good, bad)
38
+ piece2 = ""
39
+ if good or bad
40
+ piece2 = "\n # Check sanity: Is test valid?\n"
41
+ if good
42
+ piece2 << " good.each {|str| it('has expected regex matching ' + str.inspect) { wanted.should =~ str } }"
43
+ piece2 << "\n"
44
+ end
45
+ if bad
46
+ piece2 << " bad.each {|str| it('has expected regex NOT matching ' + str.inspect) { wanted.should_not =~ str } }"
47
+ piece2 << "\n"
48
+ end
49
+ end
50
+ piece2
51
+ end
52
+
53
+ def result_check(good, bad)
54
+ piece3 = "\n # Is compiled result valid?\n"
55
+ if good
56
+ piece3 << " good.each {|str| it('should match ' + str.inspect) { rx.should =~ str } }"
57
+ piece3 << "\n"
58
+ end
59
+ if bad
60
+ piece3 << " bad.each {|str| it('should NOT match ' + str.inspect) { rx.should_not =~ str } }"
61
+ piece3 << "\n"
62
+ end
63
+ piece3
64
+ end
65
+
66
+ num = 0
67
+
68
+ @oneliners.each {|x| x.program = "match #{x.program} end" }
69
+
70
+ programs = @oneliners + @programs
71
+
72
+ programs.each do |x|
73
+ num += 1
74
+ desc, prog, wanted, good, bad, examples =
75
+ x.description, x.program, x.regex, x.good, x.bad, x.examples
76
+ assign_prog = if prog.split("\n").size == 1
77
+ "prog = %q<#{prog.chomp}>"
78
+ else
79
+ "prog = <<-'END'\n #{prog.gsub("\\n", "\\n ")}\n END"
80
+ end
81
+ piece1 = <<-ERB
82
+
83
+ ### Test <%= num %>: <%= desc %>
84
+
85
+ describe "<%= desc %>:" do
86
+ <%= assign_prog %>
87
+ program { prog }
88
+
89
+ good = <%= good.inspect %>
90
+ bad = <%= bad.inspect %>
91
+ wanted = <%= wanted.inspect %>
92
+
93
+ it { should be_parseable }
94
+
95
+ rx = nil
96
+ it "can be converted to a regex" do
97
+ rx = regexp
98
+ rx.class.should == Regexp
99
+ rx.should == wanted
100
+ end
101
+ ERB
102
+
103
+ piece2 = sanity_check(good, bad)
104
+ piece3 = result_check(good, bad)
105
+
106
+ piece4 = ""
107
+ piece5 = " end # end of test\n\n"
108
+
109
+ pieces = piece1 + piece2 + piece3 + piece4 + piece5
110
+ text << ERB.new(pieces).result(binding)
111
+ end
112
+
113
+ text << "\nend"
114
+
115
+ puts text
116
+
@@ -0,0 +1,1036 @@
1
+ ---
2
+ - !ruby/object:Program
3
+ description: Single char
4
+ program: '`x'
5
+ regex: !ruby/regexp /x/
6
+ good:
7
+ - abcx
8
+ - xyzb
9
+ - x
10
+ bad:
11
+ - yz
12
+ - ''
13
+ - ABC
14
+ - !ruby/object:Program
15
+ description: Unicode codepoint
16
+ program: '&20ac'
17
+ regex: !ruby/regexp /€/
18
+ good:
19
+ - "€"
20
+ - "xyz€"
21
+ - "x€yz"
22
+ bad:
23
+ - yz
24
+ - ''
25
+ - ABC
26
+ - !ruby/object:Program
27
+ description: Manual international characters
28
+ program: '"ö"'
29
+ regex: !ruby/regexp /ö/
30
+ good:
31
+ - "öffnen"
32
+ - "xyzö"
33
+ - "xöyz"
34
+ bad:
35
+ - "offnen"
36
+ - yz
37
+ - ''
38
+ - ABC
39
+ - !ruby/object:Program
40
+ description: Simple range
41
+ program: '`a-`f'
42
+ regex: !ruby/regexp /[a-f]/
43
+ good:
44
+ - alpha
45
+ - xyzb
46
+ - c
47
+ bad:
48
+ - xyz
49
+ - ''
50
+ - ABC
51
+ - !ruby/object:Program
52
+ description: Negated range
53
+ program: '`c~`p'
54
+ regex: !ruby/regexp /[^c-p]/
55
+ good:
56
+ - ab
57
+ - rst
58
+ bad:
59
+ - def
60
+ - mno
61
+ - ''
62
+ - !ruby/object:Program
63
+ description: Negated char
64
+ program: ~`d
65
+ regex: !ruby/regexp /[^d]/
66
+ good:
67
+ - xyz
68
+ - '123'
69
+ bad:
70
+ - d
71
+ - dd
72
+ - !ruby/object:Program
73
+ description: POSIX class
74
+ program: '%alnum'
75
+ regex: !ruby/regexp /[[:alnum:]]/
76
+ good:
77
+ - abc365
78
+ - '237'
79
+ - xyz
80
+ bad:
81
+ - '---'
82
+ - ':,.-'
83
+ - !ruby/object:Program
84
+ description: Simple char class
85
+ program: '''prstu'''
86
+ regex: !ruby/regexp /[prstu]/
87
+ good:
88
+ - du
89
+ - ppp
90
+ - sr
91
+ bad:
92
+ - abc
93
+ - xyz
94
+ - !ruby/object:Program
95
+ description: Negated char class
96
+ program: ~'ilmnop'
97
+ regex: !ruby/regexp /[^ilmnop]/
98
+ good:
99
+ - abacus
100
+ - peccata
101
+ - hydrogen
102
+ bad:
103
+ - oil
104
+ - pill
105
+ - !ruby/object:Program
106
+ description: Predef Beginning of string
107
+ program: BOS
108
+ regex: !ruby/regexp /^/
109
+ good:
110
+ - ''
111
+ bad: [] # Matches anything
112
+ - !ruby/object:Program
113
+ description: Predef End of string
114
+ program: EOS
115
+ regex: !ruby/regexp /$/
116
+ good:
117
+ - ''
118
+ bad: [] # Matches anything
119
+ - !ruby/object:Program
120
+ description: Predef Word boundary
121
+ program: WB
122
+ regex: !ruby/regexp /\b/
123
+ good:
124
+ - xyz
125
+ bad:
126
+ - ''
127
+ - '---'
128
+ - !ruby/object:Program
129
+ description: Simple string
130
+ program: '"xyz"'
131
+ regex: !ruby/regexp /xyz/
132
+ good:
133
+ - xyz
134
+ - abcxyzdef
135
+ bad:
136
+ - abc
137
+ - xydefz
138
+ - !ruby/object:Program
139
+ description: Single-bounded repetition
140
+ program: 5 * "xyz"
141
+ regex: !ruby/regexp /(xyz){5}/
142
+ good:
143
+ - xyzxyzxyzxyzxyz
144
+ bad:
145
+ - xyzxyzxyzxyz
146
+ - !ruby/object:Program
147
+ description: Double-bounded repetition
148
+ program: 3,4 * %alpha
149
+ regex: !ruby/regexp /([[:alpha:]]){3,4}/
150
+ good:
151
+ - abc
152
+ - abcd
153
+ bad:
154
+ - ab
155
+ - x
156
+ - !ruby/object:Program
157
+ description: any-qualifier
158
+ program: any "abc"
159
+ regex: !ruby/regexp /(abc)*/
160
+ good:
161
+ - ''
162
+ - abc
163
+ - abcabc
164
+ - xyz
165
+ bad: [] # Matches anything
166
+ - !ruby/object:Program
167
+ description: many-qualifier
168
+ program: many "def"
169
+ regex: !ruby/regexp /(def)+/
170
+ good:
171
+ - def
172
+ - defdef
173
+ - defdefdef
174
+ bad:
175
+ - ''
176
+ - de
177
+ - xyz
178
+ - !ruby/object:Program
179
+ description: nocase-qualifier
180
+ program: nocase "ghi"
181
+ regex: !ruby/regexp /((?i)ghi)/
182
+ good:
183
+ - ghi
184
+ - GHI
185
+ - abGhicd
186
+ bad:
187
+ - ''
188
+ - gh
189
+ - abc
190
+ - !ruby/object:Program
191
+ description: maybe-qualifier
192
+ program: maybe "ghi"
193
+ regex: !ruby/regexp /(ghi)?/
194
+ good:
195
+ - ''
196
+ - ghi
197
+ - abghicd
198
+ - gh
199
+ bad: [] # Matches anything
200
+ - !ruby/object:Program
201
+ description: Simple concatenation of two strings
202
+ program: '"abc" "def"'
203
+ regex: !ruby/regexp /abcdef/
204
+ good:
205
+ - abcdefghi
206
+ - xyzabcdef
207
+ bad:
208
+ - ''
209
+ - abcxyzdef
210
+ - !ruby/object:Program
211
+ description: Concat of string and char class
212
+ program: '"abc"''def'''
213
+ regex: !ruby/regexp /abc[def]/
214
+ good:
215
+ - abcd
216
+ - abce
217
+ bad:
218
+ - ''
219
+ - abcx
220
+ - !ruby/object:Program
221
+ description: Simple alternation
222
+ program: '"abc" | "def"'
223
+ regex: !ruby/regexp /(abc|def)/
224
+ good:
225
+ - abc
226
+ - xyzabc123
227
+ - xdefy
228
+ bad:
229
+ - ''
230
+ - abde
231
+ - ab c d ef
232
+ - !ruby/object:Program
233
+ description: Alternation of concatenations
234
+ program: '"ab" "c" | "d" "ef"'
235
+ regex: !ruby/regexp /(abc|def)/
236
+ good:
237
+ - abc
238
+ - xyzabc123
239
+ - xdefy
240
+ bad:
241
+ - ''
242
+ - abde
243
+ - ab c d ef
244
+ - !ruby/object:Program
245
+ description: Precedence of concatenation over alternation
246
+ program: '"a" "b" | "c"'
247
+ regex: !ruby/regexp /(ab|c)/
248
+ good:
249
+ - ab
250
+ - c
251
+ bad:
252
+ - b
253
+ - a
254
+ - d
255
+ - !ruby/object:Program
256
+ description: Precedence of parens over concatenation
257
+ program: '"a" ("b" | "c")'
258
+ regex: !ruby/regexp /a(b|c)/
259
+ good:
260
+ - ab
261
+ - ac
262
+ bad:
263
+ - a
264
+ - b
265
+ - c
266
+ - !ruby/object:Program
267
+ description: Anchors and alternation
268
+ program: 'BOS "x" | "y" EOS'
269
+ regex: !ruby/regexp /(^x|y$)/
270
+ good:
271
+ - xabc
272
+ - abcy
273
+ bad:
274
+ - abc
275
+ - abcx
276
+ - yabc
277
+ - axb
278
+ - ayb
279
+ - axyb
280
+ - !ruby/object:Program
281
+ description: Anchors, alternation, parens
282
+ program: 'BOS ("x" | "y") EOS'
283
+ regex: !ruby/regexp /^(x|y)$/
284
+ good:
285
+ - x
286
+ - y
287
+ bad:
288
+ - abc
289
+ - abcx
290
+ - yabc
291
+ - xabc
292
+ - abcy
293
+ - !ruby/object:Program
294
+ description: Parens, concatenation, alternation
295
+ program: 'BOS ((maybe `0) `1-`9 | `1 D2) EOS'
296
+ regex: !ruby/regexp /^((0)?[1-9]|1[0-2])$/
297
+ good:
298
+ - '01'
299
+ - '09'
300
+ - '12'
301
+ bad:
302
+ - '0'
303
+ - '00'
304
+ - '13'
305
+ # - !ruby/object:Program
306
+ # description: My description
307
+ # program: 'string'
308
+ # regex: !ruby/regexp
309
+ # good: []
310
+ # bad: []
311
+ - !ruby/object:Program
312
+ description: Single backtick char
313
+ program: '``'
314
+ regex: !ruby/regexp /`/
315
+ good:
316
+ - "`"
317
+ - "this is a tick: `"
318
+ - "tock ` tock"
319
+ bad:
320
+ - ''
321
+ - 'abc'
322
+ - !ruby/object:Program
323
+ description: Single backslash char
324
+ program: '`\'
325
+ regex: !ruby/regexp /\\/
326
+ good:
327
+ - "\\"
328
+ - "trying \\n"
329
+ - "and \\b also"
330
+ bad:
331
+ - "\n"
332
+ - "\b"
333
+ - "neither \r nor \t"
334
+ - !ruby/object:Program
335
+ description: Empty string
336
+ program: '""'
337
+ regex: !ruby/regexp //
338
+ good:
339
+ - ""
340
+ - "abc"
341
+ bad: [] # Matches anything
342
+ - !ruby/object:Program
343
+ description: Simple char class
344
+ program: "'abcdef'"
345
+ regex: !ruby/regexp /[abcdef]/
346
+ good:
347
+ - "there's a cat here"
348
+ - "item c"
349
+ bad:
350
+ - ""
351
+ - "proton"
352
+ - !ruby/object:Program
353
+ description: Simple one-char class
354
+ program: "'x'"
355
+ regex: !ruby/regexp /[x]/
356
+ good:
357
+ - "x"
358
+ - "uvwxyz"
359
+ bad:
360
+ - ""
361
+ - "abc"
362
+ - !ruby/object:Program
363
+ description: Alternation of range and class
364
+ program: '`a-`f | ''xyz'''
365
+ regex: !ruby/regexp /([a-f]|[xyz])/
366
+ good:
367
+ - "a"
368
+ - "x"
369
+ - "z"
370
+ - "c"
371
+ bad:
372
+ - ""
373
+ - "jkl"
374
+ - "gw"
375
+ - !ruby/object:Program
376
+ description: Alternation of range and maybe-clause
377
+ program: '`1-`6| maybe "#"'
378
+ regex: !ruby/regexp /([1-6]|(\#)?)/
379
+ good:
380
+ - ""
381
+ - "1#"
382
+ - "1"
383
+ - " 2# abc"
384
+ bad: [] # Matches everything
385
+ - !ruby/object:Program
386
+ description: Four-way alternation
387
+ program: '`a | `b|`c|`d'
388
+ regex: !ruby/regexp /(a|b|c|d)/
389
+ good:
390
+ - "xyza"
391
+ - "xybz"
392
+ - "xcyz"
393
+ - "dxyz"
394
+ bad:
395
+ - ""
396
+ - "every"
397
+ - "ghijk"
398
+ - !ruby/object:Program
399
+ description: Concatenation of range and class
400
+ program: '`a-`f ''xyz'''
401
+ regex: !ruby/regexp /[a-f][xyz]/
402
+ good:
403
+ - "ax"
404
+ - "fz"
405
+ - "cy"
406
+ bad:
407
+ - "zf"
408
+ - "xa"
409
+ - "gz"
410
+ - "hp"
411
+ - "mx"
412
+ - !ruby/object:Program
413
+ description: Concat of strings and maybe-clause
414
+ program: '"this" "that" maybe "other"'
415
+ regex: !ruby/regexp /thisthat(other)?/
416
+ good:
417
+ - "thisthat"
418
+ - "thisthatother"
419
+ - "abc thisthat xyz"
420
+ - "abc thisthatother xyz"
421
+ bad:
422
+ - ""
423
+ - "abc"
424
+ - "this that"
425
+ - "this that other"
426
+ - !ruby/object:Program
427
+ description: Simple repetition of class
428
+ program: 3 * 'xyz'
429
+ regex: !ruby/regexp /([xyz]){3}/
430
+ good:
431
+ - "xyz"
432
+ - "xxx"
433
+ - "yzy"
434
+ - "xyzzy123"
435
+ bad:
436
+ - ""
437
+ - "abc"
438
+ - "xy"
439
+ - "axy"
440
+ - "xyb"
441
+ - "axyb"
442
+ - !ruby/object:Program
443
+ description: Simple repetition of range
444
+ program: 4 * `1-`6
445
+ regex: !ruby/regexp /([1-6]){4}/
446
+ good:
447
+ - "1111"
448
+ - "1234"
449
+ - "abc 6543 def"
450
+ bad:
451
+ - ""
452
+ - "abc"
453
+ - "123"
454
+ - "123 4"
455
+ - !ruby/object:Program
456
+ description: Complex repetition of char
457
+ program: 3,5 * (`a)
458
+ regex: !ruby/regexp /(a){3,5}/
459
+ good:
460
+ - "aaa"
461
+ - "aaaa"
462
+ - "aaaaa"
463
+ - "xaaay"
464
+ - "aaaaaaa"
465
+ bad:
466
+ - ""
467
+ - "abc"
468
+ - "aa"
469
+ - !ruby/object:Program
470
+ description: Complex repetition of parenthesized class
471
+ program: 4,7 * ('xyz')
472
+ regex: !ruby/regexp /([xyz]){4,7}/
473
+ good:
474
+ - "xxxx"
475
+ - "yyyy"
476
+ - "xyzy"
477
+ - "xyzzy"
478
+ - "zyzzyva"
479
+ - "xyzxyz"
480
+ - "xyzxyzx"
481
+ - "xyzxyzxyzxyz"
482
+ bad:
483
+ - ""
484
+ - "abc"
485
+ - "x"
486
+ - "xx"
487
+ - "xxx"
488
+ - "xyz xy"
489
+ - !ruby/object:Program
490
+ description: Complex repetition of parenthesized range
491
+ program: 0,3 * (`1-`6)
492
+ regex: !ruby/regexp /([1-6]){0,3}/
493
+ good:
494
+ - ""
495
+ - "1"
496
+ - "11"
497
+ - "111"
498
+ - "56"
499
+ - "654"
500
+ - "1111"
501
+ - "x123y"
502
+ - "x123456y"
503
+ bad: [] # Matches anything
504
+
505
+ #### Examples below are anchored with ^ and $
506
+
507
+ - !ruby/object:Program
508
+ description: Single char (anchored)
509
+ program: 'BOS `x EOS'
510
+ regex: !ruby/regexp /^x$/
511
+ good:
512
+ - "x"
513
+ bad:
514
+ - "yz"
515
+ - ''
516
+ - "ABC"
517
+ - !ruby/object:Program
518
+ description: Simple range (anchored)
519
+ program: 'BOS `a-`f EOS'
520
+ regex: !ruby/regexp /^[a-f]$/
521
+ good:
522
+ - a
523
+ - b
524
+ - c
525
+ - d
526
+ - e
527
+ - f
528
+ bad:
529
+ - "xyz"
530
+ - ''
531
+ - "ABC"
532
+ - !ruby/object:Program
533
+ description: Negated range (anchored)
534
+ program: 'BOS `c~`p EOS'
535
+ regex: !ruby/regexp /^[^c-p]$/
536
+ good:
537
+ - "a"
538
+ - "r"
539
+ bad:
540
+ - "def"
541
+ - "mno"
542
+ - ''
543
+ - !ruby/object:Program
544
+ description: Negated char (anchored)
545
+ program: 'BOS ~`d EOS'
546
+ regex: !ruby/regexp /^[^d]$/
547
+ good:
548
+ - "x"
549
+ - "1"
550
+ bad:
551
+ - "d"
552
+ - "dd"
553
+ - "abc"
554
+ - !ruby/object:Program
555
+ description: POSIX class (anchored)
556
+ program: 'BOS %alnum EOS'
557
+ regex: !ruby/regexp /^[[:alnum:]]$/
558
+ good:
559
+ - "c"
560
+ - "2"
561
+ bad:
562
+ - ""
563
+ - "abc"
564
+ - "123"
565
+ - "-"
566
+ - ":"
567
+ - ","
568
+ - "."
569
+ - !ruby/object:Program
570
+ description: Simple char class (anchored)
571
+ program: "BOS 'prstu' EOS"
572
+ regex: !ruby/regexp /^[prstu]$/
573
+ good:
574
+ - "u"
575
+ - "p"
576
+ - "s"
577
+ bad:
578
+ - ""
579
+ - "abc"
580
+ - "x"
581
+ - !ruby/object:Program
582
+ description: Negated char class (anchored)
583
+ program: "BOS ~'ilmnop' EOS"
584
+ regex: !ruby/regexp /^[^ilmnop]$/
585
+ good:
586
+ - "a"
587
+ - "e"
588
+ - "h"
589
+ bad:
590
+ - "o"
591
+ - "i"
592
+ - "l"
593
+ - !ruby/object:Program
594
+ description: Simple string (anchored)
595
+ program: 'BOS "xyz" EOS'
596
+ regex: !ruby/regexp /^xyz$/
597
+ good:
598
+ - "xyz"
599
+ bad:
600
+ - ""
601
+ - "abc"
602
+ - "abcxyzdef"
603
+ - "xydefz"
604
+ - !ruby/object:Program
605
+ description: Single-bounded repetition (anchored)
606
+ program: 'BOS 5 * "xyz" EOS'
607
+ regex: !ruby/regexp /^(xyz){5}$/
608
+ good:
609
+ - "xyzxyzxyzxyzxyz"
610
+ bad:
611
+ - "xyzxyzxyzxyz"
612
+ - "abcxyzxyzxyzxyz"
613
+ - "xyzxyzxyzxyzabc"
614
+ - !ruby/object:Program
615
+ description: Double-bounded repetition (anchored)
616
+ program: "BOS 3,4 * %alpha EOS"
617
+ regex: !ruby/regexp /^([[:alpha:]]){3,4}$/
618
+ good:
619
+ - "abc"
620
+ - "abcd"
621
+ bad:
622
+ - ""
623
+ - "ab"
624
+ - "x"
625
+ - "abcde"
626
+ - !ruby/object:Program
627
+ description: any-qualifier (anchored)
628
+ program: 'BOS any "abc" EOS'
629
+ regex: !ruby/regexp /^(abc)*$/
630
+ good:
631
+ - ""
632
+ - "abc"
633
+ - "abcabc"
634
+ - "abcabcabc"
635
+ bad:
636
+ - "ab"
637
+ - "abcab"
638
+ - "xyz"
639
+ - !ruby/object:Program
640
+ description: many-qualifier (anchored)
641
+ program: 'BOS many "def" EOS'
642
+ regex: !ruby/regexp /^(def)+$/
643
+ good:
644
+ - "def"
645
+ - "defdef"
646
+ - "defdefdef"
647
+ bad:
648
+ - ""
649
+ - "d"
650
+ - "de"
651
+ - "defd"
652
+ - "xyz"
653
+ - !ruby/object:Program
654
+ description: maybe-qualifier (anchored)
655
+ program: 'BOS maybe "ghi" EOS'
656
+ regex: !ruby/regexp /^(ghi)?$/
657
+ good:
658
+ - ""
659
+ - "ghi"
660
+ bad:
661
+ - "abghicd"
662
+ - "gh"
663
+ - !ruby/object:Program
664
+ description: Simple concatenation of two strings (anchored)
665
+ program: 'BOS "abc" "def" EOS'
666
+ regex: !ruby/regexp /^abcdef$/
667
+ good:
668
+ - "abcdef"
669
+ bad:
670
+ - ""
671
+ - "abcd"
672
+ - "xyzabcdef"
673
+ - "abcxyzdef"
674
+ - "abcdefxyz"
675
+ - !ruby/object:Program
676
+ description: Concat of string and char class (anchored)
677
+ program: "BOS \"abc\" 'def' EOS"
678
+ regex: !ruby/regexp /^abc[def]$/
679
+ good:
680
+ - "abcd"
681
+ - "abce"
682
+ - "abcf"
683
+ bad:
684
+ - ""
685
+ - "ab"
686
+ - "abc"
687
+ - "abcx"
688
+ - !ruby/object:Program
689
+ description: Simple alternation (anchored)
690
+ program: 'BOS ("abc" | "def") EOS'
691
+ regex: !ruby/regexp /^(abc|def)$/
692
+ good:
693
+ - "abc"
694
+ - "def"
695
+ bad:
696
+ - ""
697
+ - "abde"
698
+ - "ab c d ef"
699
+ - "xdefy"
700
+ - !ruby/object:Program
701
+ description: Alternation of concatenations (anchored)
702
+ program: 'BOS ("ab" "c" | "d" "ef") EOS'
703
+ regex: !ruby/regexp /^(abc|def)$/
704
+ good:
705
+ - "abc"
706
+ - "def"
707
+ bad:
708
+ - ""
709
+ - "abde"
710
+ - "ab c d ef"
711
+ - "xdefy"
712
+ - !ruby/object:Program
713
+ description: Precedence of concatenation over alternation (anchored)
714
+ program: 'BOS ("a" "b" | "c") EOS'
715
+ regex: !ruby/regexp /^(ab|c)$/
716
+ good:
717
+ - "ab"
718
+ - "c"
719
+ bad:
720
+ - ""
721
+ - "b"
722
+ - "a"
723
+ - "d"
724
+ - "abc"
725
+ - "abcde"
726
+ - !ruby/object:Program
727
+ description: Precedence of parens over concatenation (anchored)
728
+ program: 'BOS "a" ("b" | "c") EOS'
729
+ regex: !ruby/regexp /^a(b|c)$/
730
+ good:
731
+ - "ab"
732
+ - "ac"
733
+ bad:
734
+ - "a"
735
+ - "b"
736
+ - "c"
737
+ - "abc"
738
+ - "abx"
739
+ - "bac"
740
+ - !ruby/object:Program
741
+ description: Anchors and alternation (anchored)
742
+ program: 'BOS "x" | "y" EOS'
743
+ regex: !ruby/regexp /(^x|y$)/
744
+ good:
745
+ - xabc
746
+ - abcy
747
+ bad:
748
+ - abc
749
+ - abcx
750
+ - yabc
751
+ - axb
752
+ - ayb
753
+ - axyb
754
+ - !ruby/object:Program
755
+ description: Parens, concatenation, alternation (anchored)
756
+ program: 'BOS ((maybe `0) `1-`9 | `1 D2) EOS'
757
+ regex: !ruby/regexp /^((0)?[1-9]|1[0-2])$/
758
+ good:
759
+ - "01"
760
+ - "09"
761
+ - "12"
762
+ bad:
763
+ - "0"
764
+ - "00"
765
+ - "13"
766
+ - !ruby/object:Program
767
+ description: Single backtick char (anchored)
768
+ program: 'BOS `` EOS'
769
+ regex: !ruby/regexp /^`$/
770
+ good:
771
+ - "`"
772
+ bad:
773
+ - ''
774
+ - 'abc'
775
+ - "this is a tick: `"
776
+ - "tock ` tock"
777
+ - !ruby/object:Program
778
+ description: Single backslash char (anchored)
779
+ program: 'BOS `\ EOS'
780
+ regex: !ruby/regexp /^\\$/
781
+ good:
782
+ - "\\"
783
+ bad:
784
+ - "\n"
785
+ - "\b"
786
+ - "neither \r nor \t"
787
+ - "trying \\n"
788
+ - "and \\b also"
789
+ - !ruby/object:Program
790
+ description: Empty string (anchored)
791
+ program: 'BOS "" EOS'
792
+ regex: !ruby/regexp /^$/
793
+ good:
794
+ - ""
795
+ bad:
796
+ - "abc"
797
+ - !ruby/object:Program
798
+ description: Simple one-char class (anchored)
799
+ program: "BOS 'x' EOS"
800
+ regex: !ruby/regexp /^[x]$/
801
+ good:
802
+ - "x"
803
+ bad:
804
+ - ""
805
+ - "abc"
806
+ - "uvwxyz"
807
+ - !ruby/object:Program
808
+ description: Alternation of range and class (anchored)
809
+ program: "BOS (`a-`f | 'xyz') EOS"
810
+ regex: !ruby/regexp /^([a-f]|[xyz])$/
811
+ good:
812
+ - "a"
813
+ - "x"
814
+ - "z"
815
+ - "c"
816
+ bad:
817
+ - ""
818
+ - "ab"
819
+ - "abc"
820
+ - "xy"
821
+ - "jkl"
822
+ - "gw"
823
+ - !ruby/object:Program
824
+ description: Alternation of range and maybe-clause (anchored)
825
+ program: 'BOS (`1-`6| maybe "#") EOS'
826
+ regex: !ruby/regexp /^([1-6]|(\#)?)$/
827
+ good:
828
+ - ""
829
+ - "1"
830
+ - "#"
831
+ - "6"
832
+ bad:
833
+ - "55"
834
+ - "###"
835
+ - !ruby/object:Program
836
+ description: Four-way alternation (anchored)
837
+ program: 'BOS (`a | `b|`c|`d) EOS'
838
+ regex: !ruby/regexp /^(a|b|c|d)$/
839
+ good:
840
+ - "a"
841
+ - "b"
842
+ - "c"
843
+ - "d"
844
+ bad:
845
+ - ""
846
+ - "ab"
847
+ - "every"
848
+ - "ghijk"
849
+ - !ruby/object:Program
850
+ description: Concatenation of range and class (anchored)
851
+ program: "BOS `a-`f 'xyz' EOS"
852
+ regex: !ruby/regexp /^[a-f][xyz]$/
853
+ good:
854
+ - "ax"
855
+ - "fz"
856
+ - "cy"
857
+ bad:
858
+ - "axe"
859
+ - "fz123"
860
+ - "zf"
861
+ - "xa"
862
+ - "gz"
863
+ - "hp"
864
+ - "mx"
865
+ - !ruby/object:Program
866
+ description: Concat of strings and maybe-clause (anchored)
867
+ program: 'BOS "this" "that" maybe "other" EOS'
868
+ regex: !ruby/regexp /^thisthat(other)?$/
869
+ good:
870
+ - "thisthat"
871
+ - "thisthatother"
872
+ bad:
873
+ - ""
874
+ - "abc"
875
+ - "this that"
876
+ - "this that other"
877
+ - "abc thisthat xyz"
878
+ - "abc thisthatother xyz"
879
+ - !ruby/object:Program
880
+ description: Simple repetition of class (anchored)
881
+ program: "BOS 3 * 'xyz' EOS"
882
+ regex: !ruby/regexp /^([xyz]){3}$/
883
+ good:
884
+ - "xyz"
885
+ - "xxx"
886
+ - "yzy"
887
+ bad:
888
+ - ""
889
+ - "abc"
890
+ - "xy"
891
+ - "axy"
892
+ - "xyb"
893
+ - "axyb"
894
+ - "xyzzy123"
895
+ - !ruby/object:Program
896
+ description: Simple repetition of range (anchored)
897
+ program: "BOS 4 * `1-`6 EOS"
898
+ regex: !ruby/regexp /^([1-6]){4}$/
899
+ good:
900
+ - "1111"
901
+ - "1234"
902
+ bad:
903
+ - ""
904
+ - "abc"
905
+ - "123"
906
+ - "123 4"
907
+ - "abc 6543 def"
908
+ - !ruby/object:Program
909
+ description: Complex repetition of char (anchored)
910
+ program: "BOS 3,5 * (`a) EOS"
911
+ regex: !ruby/regexp /^(a){3,5}$/
912
+ good:
913
+ - "aaa"
914
+ - "aaaa"
915
+ - "aaaaa"
916
+ bad:
917
+ - ""
918
+ - "abc"
919
+ - "aa"
920
+ - "xaaay"
921
+ - "aaaaaaa"
922
+ - !ruby/object:Program
923
+ description: Complex repetition of parenthesized class (anchored)
924
+ program: "BOS 4,7 * ('xyz') EOS"
925
+ regex: !ruby/regexp /^([xyz]){4,7}$/
926
+ good:
927
+ - "xxxx"
928
+ - "yyyy"
929
+ - "xyzy"
930
+ - "xyzzy"
931
+ - "xyzxyz"
932
+ - "xyzxyzx"
933
+ bad:
934
+ - ""
935
+ - "abc"
936
+ - "x"
937
+ - "xx"
938
+ - "xxx"
939
+ - "xyz xy"
940
+ - "xyzxyzxyzxyz"
941
+ - "zyzzyva"
942
+ - !ruby/object:Program
943
+ description: Complex repetition of parenthesized range (anchored)
944
+ program: "BOS 0,3 * (`1-`6) EOS"
945
+ regex: !ruby/regexp /^([1-6]){0,3}$/
946
+ good:
947
+ - ""
948
+ - "1"
949
+ - "11"
950
+ - "111"
951
+ - "56"
952
+ - "654"
953
+ bad:
954
+ - "1111"
955
+ - "x123y"
956
+ - "x123456y"
957
+ #
958
+ # find X with Y # /(?=XY)X/ - pos lookahead
959
+ # find X without Y # /(?!XY)X/ - neg lookahead
960
+ # with X find Y # /(?<=X)Y/ - pos lookbehind
961
+ # without X find Y # /(?<!X)Y/ - neg lookbehind
962
+
963
+ - !ruby/object:Program
964
+ description: Simple lookaround (pos-ahead)
965
+ program: 'find "X" with "Y"'
966
+ regex: !ruby/regexp /(?=XY)X/
967
+ good:
968
+ - "XY"
969
+ bad:
970
+ - "X"
971
+ - "Y"
972
+ - "YX"
973
+ - !ruby/object:Program
974
+ description: Simple lookaround (neg-ahead)
975
+ program: 'find "X" without "Y"'
976
+ regex: !ruby/regexp /(?!XY)X/
977
+ good:
978
+ - "X"
979
+ - "YX"
980
+ bad:
981
+ - "XY"
982
+ - "Y"
983
+ - !ruby/object:Program
984
+ description: Simple lookaround (pos-behind)
985
+ program: 'with "X" find "Y"'
986
+ regex: !ruby/regexp /(?<=X)Y/
987
+ good:
988
+ - "XY"
989
+ bad:
990
+ - "YX"
991
+ - "Y"
992
+ - "X"
993
+ - !ruby/object:Program
994
+ description: Simple lookaround (neg-behind)
995
+ program: 'without "X" find "Y"'
996
+ regex: !ruby/regexp /(?<!X)Y/
997
+ good:
998
+ - "aY"
999
+ - "Y"
1000
+ bad:
1001
+ - "XY"
1002
+ - "X"
1003
+ #
1004
+ - !ruby/object:Program
1005
+ description: Positive lookahead
1006
+ program: "find (3*D \" dollars\") with 3*D"
1007
+ regex: !ruby/regexp /(?=\d{3} dollars)\d{3}/
1008
+ good:
1009
+ - "101 dollars"
1010
+ bad:
1011
+ - "102 pesos"
1012
+ - !ruby/object:Program
1013
+ description: Negative lookahead
1014
+ program: 'find 3*D without " pesos"'
1015
+ regex: !ruby/regexp /(?!\d{3} pesos)\d{3}/
1016
+ good:
1017
+ - "103 dollars"
1018
+ - "104 euros"
1019
+ bad:
1020
+ - "105 pesos"
1021
+ - !ruby/object:Program
1022
+ description: Positive lookbehind
1023
+ program: 'with "USD" find 3*D'
1024
+ regex: !ruby/regexp /(?<=USD)\d{3}/
1025
+ good:
1026
+ - "USD106"
1027
+ bad:
1028
+ - "EUR107"
1029
+ - !ruby/object:Program
1030
+ description: Negative lookbehind
1031
+ program: 'without "USD" find 3*D'
1032
+ regex: !ruby/regexp /(?<!USD)\d{3}/
1033
+ good:
1034
+ - "EUR108"
1035
+ bad:
1036
+ - "USD109"