regexador 0.4.5 → 0.4.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,85 @@
1
+ ---
2
+ - !ruby/object:Capture
3
+ description: US phone number
4
+ program: |
5
+ match
6
+ @area_code = 3 * D
7
+ `-
8
+ @prefix = 3 * D
9
+ `-
10
+ @last4 = 4 * D
11
+ end
12
+ regex: !ruby/regexp /(?<area_code>(\d){3})\-(?<prefix>(\d){3})\-(?<last4>(\d){4})/
13
+ examples:
14
+ - "512-555-2001":
15
+ :area_code: "512"
16
+ :prefix: "555"
17
+ :last4: "2001"
18
+ - !ruby/object:Capture
19
+ description: A simple backreference
20
+ program: |2
21
+ tag = many %alpha
22
+ match
23
+ `<
24
+ @tag = tag
25
+ `>
26
+ @cdata = any X
27
+ "</"
28
+ @tag `>
29
+ end
30
+ regex: !ruby/regexp /<(?<tag>([[:alpha:]])+)>(?<cdata>(.)*)<\/\k<tag>>/
31
+ examples:
32
+ - "<body>abcd</body>":
33
+ :tag: "body"
34
+ :cdata: "abcd"
35
+ - "<table>table</table>":
36
+ :tag: "table"
37
+ :cdata: "table"
38
+ - !ruby/object:Capture
39
+ description: A simple backreference again
40
+ program: |2
41
+ tag = many %alpha
42
+ match
43
+ `<
44
+ @tag = tag
45
+ `>
46
+ @cdata = any X
47
+ "</" @tag `> # Slightly different code
48
+ end
49
+ regex: !ruby/regexp /<(?<tag>([[:alpha:]])+)>(?<cdata>(.)*)<\/\k<tag>>/
50
+ examples:
51
+ - "<body>abcd</body>":
52
+ :tag: "body"
53
+ :cdata: "abcd"
54
+ - "<table>table</table>":
55
+ :tag: "table"
56
+ :cdata: "table"
57
+ - !ruby/object:Capture
58
+ description: A simple inline backreference with alternation
59
+ program: |2
60
+ match
61
+ BOS
62
+ (@var = "x") | (@var = "y")
63
+ @var
64
+ EOS
65
+ end
66
+ regex: !ruby/regexp /^((?<var>x)|(?<var>y))\k<var>$/
67
+ examples:
68
+ - "xx":
69
+ :var: "x"
70
+ - "yy":
71
+ :var: "y"
72
+ - !ruby/object:Capture
73
+ description: A simple inline capture
74
+ program: "match `a @var = `b `c end"
75
+ regex: !ruby/regexp /a(?<var>bc)/
76
+ examples:
77
+ - "abc":
78
+ :var: "bc"
79
+ - !ruby/object:Capture
80
+ description: A simple inline capture with parens
81
+ program: "match `a (@var = `b) `c end"
82
+ regex: !ruby/regexp /a(?<var>b)c/
83
+ examples:
84
+ - "abc":
85
+ :var: "b"
@@ -0,0 +1,122 @@
1
+ require 'yaml'
2
+
3
+ class Program
4
+ attr_accessor :description, :program, :regex, :good, :bad, :examples
5
+ end
6
+
7
+ def escape(str)
8
+ str.inspect[1..-1].gsub(/\\n/, "\n")
9
+ end
10
+
11
+
12
+ # Main...
13
+
14
+ @oneliners = YAML.load(File.read("spec/oneliners.yaml"))
15
+ @programs = YAML.load(File.read("spec/programs.yaml"))
16
+
17
+ text = <<-RUBY
18
+ $LOAD_PATH << "." << "./lib"
19
+
20
+ require 'regexador'
21
+
22
+ require "minitest/autorun"
23
+
24
+
25
+ class TestRegexador < Minitest::Test
26
+
27
+ RUBY
28
+
29
+ def sanity_check(good, bad)
30
+ fragment = ""
31
+ return fragment unless good || bad
32
+ fragment = <<-RUBY
33
+ # Check sanity: Is test valid?
34
+ RUBY
35
+
36
+ if good
37
+ fragment << <<-RUBY
38
+ good.each {|str| assert regex =~ str, "Invalid test! No match for #{'#{str.inspect}'}" }
39
+ RUBY
40
+ end
41
+
42
+ if bad
43
+ fragment << <<-RUBY
44
+ bad.each {|str| refute regex =~ str, "Invalid test! Unexpected match for #{'#{str.inspect}'}" }
45
+ RUBY
46
+ end
47
+
48
+ fragment
49
+ end
50
+
51
+ def result_check(good, bad)
52
+ fragment = ""
53
+ return fragment unless good || bad
54
+
55
+ fragment = <<-RUBY
56
+ # Is compiled result valid?
57
+ RUBY
58
+
59
+ if good
60
+ fragment << <<-RUBY
61
+ good.each {|str| assert regex =~ str, "Did not match: #{'#{str.inspect}'}" }
62
+ RUBY
63
+ end
64
+
65
+ if bad
66
+ fragment << <<-RUBY
67
+ bad.each {|str| refute regex =~ str, "Unexpected match: #{'#{str.inspect}'}" }
68
+ RUBY
69
+ end
70
+ fragment
71
+ end
72
+
73
+ num = 0
74
+
75
+ @oneliners.each {|x| x.program = "match #{x.program} end" }
76
+
77
+ programs = @oneliners + @programs
78
+
79
+ programs.each do |x|
80
+ num += 1
81
+ desc, code, regex, good, bad, examples =
82
+ x.description, x.program, x.regex, x.good, x.bad, x.examples
83
+ assign_code =
84
+ if code.split("\n").size == 1
85
+ "code = %q<#{code.chomp}>"
86
+ else
87
+ "code = <<-'END'\n #{code.gsub("\\n", "\\n ")}\n END"
88
+ end
89
+ slug = desc.downcase.gsub(" ", "_").gsub("(", "").gsub(")", "").gsub("-", "_").gsub(",", "_").gsub("/", "_")
90
+ number = '%03d' % num
91
+ piece1 = <<-RUBY
92
+ def test_#{number}_#{slug}
93
+ #{assign_code}
94
+ parser = Regexador::Parser.new
95
+ pattern = parser.pattern
96
+
97
+ assert parser.program.parse(code)
98
+
99
+ prog = Regexador.new(code)
100
+
101
+ good = #{good.inspect}
102
+ bad = #{bad.inspect}
103
+ regex = #{regex.inspect}
104
+
105
+ assert regex.class == Regexp, "Not a regex! #{'#{regex.inspect}'}"
106
+
107
+ RUBY
108
+
109
+ piece2 = sanity_check(good, bad)
110
+ piece3 = result_check(good, bad)
111
+
112
+ piece4 = ""
113
+ piece5 = " end\n\n"
114
+
115
+ pieces = piece1 + piece2 + piece3 + piece4 + piece5
116
+ text << pieces
117
+ end
118
+
119
+ text << "\nend"
120
+
121
+ puts text
122
+
@@ -0,0 +1,1036 @@
1
+ ---
2
+ - !ruby/object:Program
3
+ description: Single char
4
+ program: '`x'
5
+ regex: !ruby/regexp /x/
6
+ good:
7
+ - abcx
8
+ - xyzb
9
+ - x
10
+ bad:
11
+ - yz
12
+ - ''
13
+ - ABC
14
+ - !ruby/object:Program
15
+ description: Unicode codepoint
16
+ program: '&20ac'
17
+ regex: !ruby/regexp /€/
18
+ good:
19
+ - "€"
20
+ - "xyz€"
21
+ - "x€yz"
22
+ bad:
23
+ - yz
24
+ - ''
25
+ - ABC
26
+ - !ruby/object:Program
27
+ description: Manual international characters
28
+ program: '"ö"'
29
+ regex: !ruby/regexp /ö/
30
+ good:
31
+ - "öffnen"
32
+ - "xyzö"
33
+ - "xöyz"
34
+ bad:
35
+ - "offnen"
36
+ - yz
37
+ - ''
38
+ - ABC
39
+ - !ruby/object:Program
40
+ description: Simple range
41
+ program: '`a-`f'
42
+ regex: !ruby/regexp /[a-f]/
43
+ good:
44
+ - alpha
45
+ - xyzb
46
+ - c
47
+ bad:
48
+ - xyz
49
+ - ''
50
+ - ABC
51
+ - !ruby/object:Program
52
+ description: Negated range
53
+ program: '`c~`p'
54
+ regex: !ruby/regexp /[^c-p]/
55
+ good:
56
+ - ab
57
+ - rst
58
+ bad:
59
+ - def
60
+ - mno
61
+ - ''
62
+ - !ruby/object:Program
63
+ description: Negated char
64
+ program: ~`d
65
+ regex: !ruby/regexp /[^d]/
66
+ good:
67
+ - xyz
68
+ - '123'
69
+ bad:
70
+ - d
71
+ - dd
72
+ - !ruby/object:Program
73
+ description: POSIX class
74
+ program: '%alnum'
75
+ regex: !ruby/regexp /[[:alnum:]]/
76
+ good:
77
+ - abc365
78
+ - '237'
79
+ - xyz
80
+ bad:
81
+ - '---'
82
+ - ':,.-'
83
+ - !ruby/object:Program
84
+ description: Simple char class
85
+ program: '''prstu'''
86
+ regex: !ruby/regexp /[prstu]/
87
+ good:
88
+ - du
89
+ - ppp
90
+ - sr
91
+ bad:
92
+ - abc
93
+ - xyz
94
+ - !ruby/object:Program
95
+ description: Negated char class
96
+ program: ~'ilmnop'
97
+ regex: !ruby/regexp /[^ilmnop]/
98
+ good:
99
+ - abacus
100
+ - peccata
101
+ - hydrogen
102
+ bad:
103
+ - oil
104
+ - pill
105
+ - !ruby/object:Program
106
+ description: Predef Beginning of string
107
+ program: BOS
108
+ regex: !ruby/regexp /^/
109
+ good:
110
+ - ''
111
+ bad: [] # Matches anything
112
+ - !ruby/object:Program
113
+ description: Predef End of string
114
+ program: EOS
115
+ regex: !ruby/regexp /$/
116
+ good:
117
+ - ''
118
+ bad: [] # Matches anything
119
+ - !ruby/object:Program
120
+ description: Predef Word boundary
121
+ program: WB
122
+ regex: !ruby/regexp /\b/
123
+ good:
124
+ - xyz
125
+ bad:
126
+ - ''
127
+ - '---'
128
+ - !ruby/object:Program
129
+ description: Simple string
130
+ program: '"xyz"'
131
+ regex: !ruby/regexp /xyz/
132
+ good:
133
+ - xyz
134
+ - abcxyzdef
135
+ bad:
136
+ - abc
137
+ - xydefz
138
+ - !ruby/object:Program
139
+ description: Single-bounded repetition
140
+ program: 5 * "xyz"
141
+ regex: !ruby/regexp /(xyz){5}/
142
+ good:
143
+ - xyzxyzxyzxyzxyz
144
+ bad:
145
+ - xyzxyzxyzxyz
146
+ - !ruby/object:Program
147
+ description: Double-bounded repetition
148
+ program: 3,4 * %alpha
149
+ regex: !ruby/regexp /([[:alpha:]]){3,4}/
150
+ good:
151
+ - abc
152
+ - abcd
153
+ bad:
154
+ - ab
155
+ - x
156
+ - !ruby/object:Program
157
+ description: any-qualifier
158
+ program: any "abc"
159
+ regex: !ruby/regexp /(abc)*/
160
+ good:
161
+ - ''
162
+ - abc
163
+ - abcabc
164
+ - xyz
165
+ bad: [] # Matches anything
166
+ - !ruby/object:Program
167
+ description: many-qualifier
168
+ program: many "def"
169
+ regex: !ruby/regexp /(def)+/
170
+ good:
171
+ - def
172
+ - defdef
173
+ - defdefdef
174
+ bad:
175
+ - ''
176
+ - de
177
+ - xyz
178
+ - !ruby/object:Program
179
+ description: nocase-qualifier
180
+ program: nocase "ghi"
181
+ regex: !ruby/regexp /((?i)ghi)/
182
+ good:
183
+ - ghi
184
+ - GHI
185
+ - abGhicd
186
+ bad:
187
+ - ''
188
+ - gh
189
+ - abc
190
+ - !ruby/object:Program
191
+ description: maybe-qualifier
192
+ program: maybe "ghi"
193
+ regex: !ruby/regexp /(ghi)?/
194
+ good:
195
+ - ''
196
+ - ghi
197
+ - abghicd
198
+ - gh
199
+ bad: [] # Matches anything
200
+ - !ruby/object:Program
201
+ description: Simple concatenation of two strings
202
+ program: '"abc" "def"'
203
+ regex: !ruby/regexp /abcdef/
204
+ good:
205
+ - abcdefghi
206
+ - xyzabcdef
207
+ bad:
208
+ - ''
209
+ - abcxyzdef
210
+ - !ruby/object:Program
211
+ description: Concat of string and char class
212
+ program: '"abc"''def'''
213
+ regex: !ruby/regexp /abc[def]/
214
+ good:
215
+ - abcd
216
+ - abce
217
+ bad:
218
+ - ''
219
+ - abcx
220
+ - !ruby/object:Program
221
+ description: Simple alternation
222
+ program: '"abc" | "def"'
223
+ regex: !ruby/regexp /(abc|def)/
224
+ good:
225
+ - abc
226
+ - xyzabc123
227
+ - xdefy
228
+ bad:
229
+ - ''
230
+ - abde
231
+ - ab c d ef
232
+ - !ruby/object:Program
233
+ description: Alternation of concatenations
234
+ program: '"ab" "c" | "d" "ef"'
235
+ regex: !ruby/regexp /(abc|def)/
236
+ good:
237
+ - abc
238
+ - xyzabc123
239
+ - xdefy
240
+ bad:
241
+ - ''
242
+ - abde
243
+ - ab c d ef
244
+ - !ruby/object:Program
245
+ description: Precedence of concatenation over alternation
246
+ program: '"a" "b" | "c"'
247
+ regex: !ruby/regexp /(ab|c)/
248
+ good:
249
+ - ab
250
+ - c
251
+ bad:
252
+ - b
253
+ - a
254
+ - d
255
+ - !ruby/object:Program
256
+ description: Precedence of parens over concatenation
257
+ program: '"a" ("b" | "c")'
258
+ regex: !ruby/regexp /a(b|c)/
259
+ good:
260
+ - ab
261
+ - ac
262
+ bad:
263
+ - a
264
+ - b
265
+ - c
266
+ - !ruby/object:Program
267
+ description: Anchors and alternation
268
+ program: 'BOS "x" | "y" EOS'
269
+ regex: !ruby/regexp /(^x|y$)/
270
+ good:
271
+ - xabc
272
+ - abcy
273
+ bad:
274
+ - abc
275
+ - abcx
276
+ - yabc
277
+ - axb
278
+ - ayb
279
+ - axyb
280
+ - !ruby/object:Program
281
+ description: Anchors, alternation, parens
282
+ program: 'BOS ("x" | "y") EOS'
283
+ regex: !ruby/regexp /^(x|y)$/
284
+ good:
285
+ - x
286
+ - y
287
+ bad:
288
+ - abc
289
+ - abcx
290
+ - yabc
291
+ - xabc
292
+ - abcy
293
+ - !ruby/object:Program
294
+ description: Parens, concatenation, alternation
295
+ program: 'BOS ((maybe `0) `1-`9 | `1 D2) EOS'
296
+ regex: !ruby/regexp /^((0)?[1-9]|1[0-2])$/
297
+ good:
298
+ - '01'
299
+ - '09'
300
+ - '12'
301
+ bad:
302
+ - '0'
303
+ - '00'
304
+ - '13'
305
+ # - !ruby/object:Program
306
+ # description: My description
307
+ # program: 'string'
308
+ # regex: !ruby/regexp
309
+ # good: []
310
+ # bad: []
311
+ - !ruby/object:Program
312
+ description: Single backtick char
313
+ program: '``'
314
+ regex: !ruby/regexp /`/
315
+ good:
316
+ - "`"
317
+ - "this is a tick: `"
318
+ - "tock ` tock"
319
+ bad:
320
+ - ''
321
+ - 'abc'
322
+ - !ruby/object:Program
323
+ description: Single backslash char
324
+ program: '`\'
325
+ regex: !ruby/regexp /\\/
326
+ good:
327
+ - "\\"
328
+ - "trying \\n"
329
+ - "and \\b also"
330
+ bad:
331
+ - "\n"
332
+ - "\b"
333
+ - "neither \r nor \t"
334
+ - !ruby/object:Program
335
+ description: Empty string
336
+ program: '""'
337
+ regex: !ruby/regexp //
338
+ good:
339
+ - ""
340
+ - "abc"
341
+ bad: [] # Matches anything
342
+ - !ruby/object:Program
343
+ description: Simple char class
344
+ program: "'abcdef'"
345
+ regex: !ruby/regexp /[abcdef]/
346
+ good:
347
+ - "there's a cat here"
348
+ - "item c"
349
+ bad:
350
+ - ""
351
+ - "proton"
352
+ - !ruby/object:Program
353
+ description: Simple one-char class
354
+ program: "'x'"
355
+ regex: !ruby/regexp /[x]/
356
+ good:
357
+ - "x"
358
+ - "uvwxyz"
359
+ bad:
360
+ - ""
361
+ - "abc"
362
+ - !ruby/object:Program
363
+ description: Alternation of range and class
364
+ program: '`a-`f | ''xyz'''
365
+ regex: !ruby/regexp /([a-f]|[xyz])/
366
+ good:
367
+ - "a"
368
+ - "x"
369
+ - "z"
370
+ - "c"
371
+ bad:
372
+ - ""
373
+ - "jkl"
374
+ - "gw"
375
+ - !ruby/object:Program
376
+ description: Alternation of range and maybe-clause
377
+ program: '`1-`6| maybe "#"'
378
+ regex: !ruby/regexp /([1-6]|(\#)?)/
379
+ good:
380
+ - ""
381
+ - "1#"
382
+ - "1"
383
+ - " 2# abc"
384
+ bad: [] # Matches everything
385
+ - !ruby/object:Program
386
+ description: Four-way alternation
387
+ program: '`a | `b|`c|`d'
388
+ regex: !ruby/regexp /(a|b|c|d)/
389
+ good:
390
+ - "xyza"
391
+ - "xybz"
392
+ - "xcyz"
393
+ - "dxyz"
394
+ bad:
395
+ - ""
396
+ - "every"
397
+ - "ghijk"
398
+ - !ruby/object:Program
399
+ description: Concatenation of range and class
400
+ program: '`a-`f ''xyz'''
401
+ regex: !ruby/regexp /[a-f][xyz]/
402
+ good:
403
+ - "ax"
404
+ - "fz"
405
+ - "cy"
406
+ bad:
407
+ - "zf"
408
+ - "xa"
409
+ - "gz"
410
+ - "hp"
411
+ - "mx"
412
+ - !ruby/object:Program
413
+ description: Concat of strings and maybe-clause
414
+ program: '"this" "that" maybe "other"'
415
+ regex: !ruby/regexp /thisthat(other)?/
416
+ good:
417
+ - "thisthat"
418
+ - "thisthatother"
419
+ - "abc thisthat xyz"
420
+ - "abc thisthatother xyz"
421
+ bad:
422
+ - ""
423
+ - "abc"
424
+ - "this that"
425
+ - "this that other"
426
+ - !ruby/object:Program
427
+ description: Simple repetition of class
428
+ program: 3 * 'xyz'
429
+ regex: !ruby/regexp /([xyz]){3}/
430
+ good:
431
+ - "xyz"
432
+ - "xxx"
433
+ - "yzy"
434
+ - "xyzzy123"
435
+ bad:
436
+ - ""
437
+ - "abc"
438
+ - "xy"
439
+ - "axy"
440
+ - "xyb"
441
+ - "axyb"
442
+ - !ruby/object:Program
443
+ description: Simple repetition of range
444
+ program: 4 * `1-`6
445
+ regex: !ruby/regexp /([1-6]){4}/
446
+ good:
447
+ - "1111"
448
+ - "1234"
449
+ - "abc 6543 def"
450
+ bad:
451
+ - ""
452
+ - "abc"
453
+ - "123"
454
+ - "123 4"
455
+ - !ruby/object:Program
456
+ description: Complex repetition of char
457
+ program: 3,5 * (`a)
458
+ regex: !ruby/regexp /(a){3,5}/
459
+ good:
460
+ - "aaa"
461
+ - "aaaa"
462
+ - "aaaaa"
463
+ - "xaaay"
464
+ - "aaaaaaa"
465
+ bad:
466
+ - ""
467
+ - "abc"
468
+ - "aa"
469
+ - !ruby/object:Program
470
+ description: Complex repetition of parenthesized class
471
+ program: 4,7 * ('xyz')
472
+ regex: !ruby/regexp /([xyz]){4,7}/
473
+ good:
474
+ - "xxxx"
475
+ - "yyyy"
476
+ - "xyzy"
477
+ - "xyzzy"
478
+ - "zyzzyva"
479
+ - "xyzxyz"
480
+ - "xyzxyzx"
481
+ - "xyzxyzxyzxyz"
482
+ bad:
483
+ - ""
484
+ - "abc"
485
+ - "x"
486
+ - "xx"
487
+ - "xxx"
488
+ - "xyz xy"
489
+ - !ruby/object:Program
490
+ description: Complex repetition of parenthesized range
491
+ program: 0,3 * (`1-`6)
492
+ regex: !ruby/regexp /([1-6]){0,3}/
493
+ good:
494
+ - ""
495
+ - "1"
496
+ - "11"
497
+ - "111"
498
+ - "56"
499
+ - "654"
500
+ - "1111"
501
+ - "x123y"
502
+ - "x123456y"
503
+ bad: [] # Matches anything
504
+
505
+ #### Examples below are anchored with ^ and $
506
+
507
+ - !ruby/object:Program
508
+ description: Single char (anchored)
509
+ program: 'BOS `x EOS'
510
+ regex: !ruby/regexp /^x$/
511
+ good:
512
+ - "x"
513
+ bad:
514
+ - "yz"
515
+ - ''
516
+ - "ABC"
517
+ - !ruby/object:Program
518
+ description: Simple range (anchored)
519
+ program: 'BOS `a-`f EOS'
520
+ regex: !ruby/regexp /^[a-f]$/
521
+ good:
522
+ - a
523
+ - b
524
+ - c
525
+ - d
526
+ - e
527
+ - f
528
+ bad:
529
+ - "xyz"
530
+ - ''
531
+ - "ABC"
532
+ - !ruby/object:Program
533
+ description: Negated range (anchored)
534
+ program: 'BOS `c~`p EOS'
535
+ regex: !ruby/regexp /^[^c-p]$/
536
+ good:
537
+ - "a"
538
+ - "r"
539
+ bad:
540
+ - "def"
541
+ - "mno"
542
+ - ''
543
+ - !ruby/object:Program
544
+ description: Negated char (anchored)
545
+ program: 'BOS ~`d EOS'
546
+ regex: !ruby/regexp /^[^d]$/
547
+ good:
548
+ - "x"
549
+ - "1"
550
+ bad:
551
+ - "d"
552
+ - "dd"
553
+ - "abc"
554
+ - !ruby/object:Program
555
+ description: POSIX class (anchored)
556
+ program: 'BOS %alnum EOS'
557
+ regex: !ruby/regexp /^[[:alnum:]]$/
558
+ good:
559
+ - "c"
560
+ - "2"
561
+ bad:
562
+ - ""
563
+ - "abc"
564
+ - "123"
565
+ - "-"
566
+ - ":"
567
+ - ","
568
+ - "."
569
+ - !ruby/object:Program
570
+ description: Simple char class (anchored)
571
+ program: "BOS 'prstu' EOS"
572
+ regex: !ruby/regexp /^[prstu]$/
573
+ good:
574
+ - "u"
575
+ - "p"
576
+ - "s"
577
+ bad:
578
+ - ""
579
+ - "abc"
580
+ - "x"
581
+ - !ruby/object:Program
582
+ description: Negated char class (anchored)
583
+ program: "BOS ~'ilmnop' EOS"
584
+ regex: !ruby/regexp /^[^ilmnop]$/
585
+ good:
586
+ - "a"
587
+ - "e"
588
+ - "h"
589
+ bad:
590
+ - "o"
591
+ - "i"
592
+ - "l"
593
+ - !ruby/object:Program
594
+ description: Simple string (anchored)
595
+ program: 'BOS "xyz" EOS'
596
+ regex: !ruby/regexp /^xyz$/
597
+ good:
598
+ - "xyz"
599
+ bad:
600
+ - ""
601
+ - "abc"
602
+ - "abcxyzdef"
603
+ - "xydefz"
604
+ - !ruby/object:Program
605
+ description: Single-bounded repetition (anchored)
606
+ program: 'BOS 5 * "xyz" EOS'
607
+ regex: !ruby/regexp /^(xyz){5}$/
608
+ good:
609
+ - "xyzxyzxyzxyzxyz"
610
+ bad:
611
+ - "xyzxyzxyzxyz"
612
+ - "abcxyzxyzxyzxyz"
613
+ - "xyzxyzxyzxyzabc"
614
+ - !ruby/object:Program
615
+ description: Double-bounded repetition (anchored)
616
+ program: "BOS 3,4 * %alpha EOS"
617
+ regex: !ruby/regexp /^([[:alpha:]]){3,4}$/
618
+ good:
619
+ - "abc"
620
+ - "abcd"
621
+ bad:
622
+ - ""
623
+ - "ab"
624
+ - "x"
625
+ - "abcde"
626
+ - !ruby/object:Program
627
+ description: any-qualifier (anchored)
628
+ program: 'BOS any "abc" EOS'
629
+ regex: !ruby/regexp /^(abc)*$/
630
+ good:
631
+ - ""
632
+ - "abc"
633
+ - "abcabc"
634
+ - "abcabcabc"
635
+ bad:
636
+ - "ab"
637
+ - "abcab"
638
+ - "xyz"
639
+ - !ruby/object:Program
640
+ description: many-qualifier (anchored)
641
+ program: 'BOS many "def" EOS'
642
+ regex: !ruby/regexp /^(def)+$/
643
+ good:
644
+ - "def"
645
+ - "defdef"
646
+ - "defdefdef"
647
+ bad:
648
+ - ""
649
+ - "d"
650
+ - "de"
651
+ - "defd"
652
+ - "xyz"
653
+ - !ruby/object:Program
654
+ description: maybe-qualifier (anchored)
655
+ program: 'BOS maybe "ghi" EOS'
656
+ regex: !ruby/regexp /^(ghi)?$/
657
+ good:
658
+ - ""
659
+ - "ghi"
660
+ bad:
661
+ - "abghicd"
662
+ - "gh"
663
+ - !ruby/object:Program
664
+ description: Simple concatenation of two strings (anchored)
665
+ program: 'BOS "abc" "def" EOS'
666
+ regex: !ruby/regexp /^abcdef$/
667
+ good:
668
+ - "abcdef"
669
+ bad:
670
+ - ""
671
+ - "abcd"
672
+ - "xyzabcdef"
673
+ - "abcxyzdef"
674
+ - "abcdefxyz"
675
+ - !ruby/object:Program
676
+ description: Concat of string and char class (anchored)
677
+ program: "BOS \"abc\" 'def' EOS"
678
+ regex: !ruby/regexp /^abc[def]$/
679
+ good:
680
+ - "abcd"
681
+ - "abce"
682
+ - "abcf"
683
+ bad:
684
+ - ""
685
+ - "ab"
686
+ - "abc"
687
+ - "abcx"
688
+ - !ruby/object:Program
689
+ description: Simple alternation (anchored)
690
+ program: 'BOS ("abc" | "def") EOS'
691
+ regex: !ruby/regexp /^(abc|def)$/
692
+ good:
693
+ - "abc"
694
+ - "def"
695
+ bad:
696
+ - ""
697
+ - "abde"
698
+ - "ab c d ef"
699
+ - "xdefy"
700
+ - !ruby/object:Program
701
+ description: Alternation of concatenations (anchored)
702
+ program: 'BOS ("ab" "c" | "d" "ef") EOS'
703
+ regex: !ruby/regexp /^(abc|def)$/
704
+ good:
705
+ - "abc"
706
+ - "def"
707
+ bad:
708
+ - ""
709
+ - "abde"
710
+ - "ab c d ef"
711
+ - "xdefy"
712
+ - !ruby/object:Program
713
+ description: Precedence of concatenation over alternation (anchored)
714
+ program: 'BOS ("a" "b" | "c") EOS'
715
+ regex: !ruby/regexp /^(ab|c)$/
716
+ good:
717
+ - "ab"
718
+ - "c"
719
+ bad:
720
+ - ""
721
+ - "b"
722
+ - "a"
723
+ - "d"
724
+ - "abc"
725
+ - "abcde"
726
+ - !ruby/object:Program
727
+ description: Precedence of parens over concatenation (anchored)
728
+ program: 'BOS "a" ("b" | "c") EOS'
729
+ regex: !ruby/regexp /^a(b|c)$/
730
+ good:
731
+ - "ab"
732
+ - "ac"
733
+ bad:
734
+ - "a"
735
+ - "b"
736
+ - "c"
737
+ - "abc"
738
+ - "abx"
739
+ - "bac"
740
+ - !ruby/object:Program
741
+ description: Anchors and alternation (anchored)
742
+ program: 'BOS "x" | "y" EOS'
743
+ regex: !ruby/regexp /(^x|y$)/
744
+ good:
745
+ - xabc
746
+ - abcy
747
+ bad:
748
+ - abc
749
+ - abcx
750
+ - yabc
751
+ - axb
752
+ - ayb
753
+ - axyb
754
+ - !ruby/object:Program
755
+ description: Parens, concatenation, alternation (anchored)
756
+ program: 'BOS ((maybe `0) `1-`9 | `1 D2) EOS'
757
+ regex: !ruby/regexp /^((0)?[1-9]|1[0-2])$/
758
+ good:
759
+ - "01"
760
+ - "09"
761
+ - "12"
762
+ bad:
763
+ - "0"
764
+ - "00"
765
+ - "13"
766
+ - !ruby/object:Program
767
+ description: Single backtick char (anchored)
768
+ program: 'BOS `` EOS'
769
+ regex: !ruby/regexp /^`$/
770
+ good:
771
+ - "`"
772
+ bad:
773
+ - ''
774
+ - 'abc'
775
+ - "this is a tick: `"
776
+ - "tock ` tock"
777
+ - !ruby/object:Program
778
+ description: Single backslash char (anchored)
779
+ program: 'BOS `\ EOS'
780
+ regex: !ruby/regexp /^\\$/
781
+ good:
782
+ - "\\"
783
+ bad:
784
+ - "\n"
785
+ - "\b"
786
+ - "neither \r nor \t"
787
+ - "trying \\n"
788
+ - "and \\b also"
789
+ - !ruby/object:Program
790
+ description: Empty string (anchored)
791
+ program: 'BOS "" EOS'
792
+ regex: !ruby/regexp /^$/
793
+ good:
794
+ - ""
795
+ bad:
796
+ - "abc"
797
+ - !ruby/object:Program
798
+ description: Simple one-char class (anchored)
799
+ program: "BOS 'x' EOS"
800
+ regex: !ruby/regexp /^[x]$/
801
+ good:
802
+ - "x"
803
+ bad:
804
+ - ""
805
+ - "abc"
806
+ - "uvwxyz"
807
+ - !ruby/object:Program
808
+ description: Alternation of range and class (anchored)
809
+ program: "BOS (`a-`f | 'xyz') EOS"
810
+ regex: !ruby/regexp /^([a-f]|[xyz])$/
811
+ good:
812
+ - "a"
813
+ - "x"
814
+ - "z"
815
+ - "c"
816
+ bad:
817
+ - ""
818
+ - "ab"
819
+ - "abc"
820
+ - "xy"
821
+ - "jkl"
822
+ - "gw"
823
+ - !ruby/object:Program
824
+ description: Alternation of range and maybe-clause (anchored)
825
+ program: 'BOS (`1-`6| maybe "#") EOS'
826
+ regex: !ruby/regexp /^([1-6]|(\#)?)$/
827
+ good:
828
+ - ""
829
+ - "1"
830
+ - "#"
831
+ - "6"
832
+ bad:
833
+ - "55"
834
+ - "###"
835
+ - !ruby/object:Program
836
+ description: Four-way alternation (anchored)
837
+ program: 'BOS (`a | `b|`c|`d) EOS'
838
+ regex: !ruby/regexp /^(a|b|c|d)$/
839
+ good:
840
+ - "a"
841
+ - "b"
842
+ - "c"
843
+ - "d"
844
+ bad:
845
+ - ""
846
+ - "ab"
847
+ - "every"
848
+ - "ghijk"
849
+ - !ruby/object:Program
850
+ description: Concatenation of range and class (anchored)
851
+ program: "BOS `a-`f 'xyz' EOS"
852
+ regex: !ruby/regexp /^[a-f][xyz]$/
853
+ good:
854
+ - "ax"
855
+ - "fz"
856
+ - "cy"
857
+ bad:
858
+ - "axe"
859
+ - "fz123"
860
+ - "zf"
861
+ - "xa"
862
+ - "gz"
863
+ - "hp"
864
+ - "mx"
865
+ - !ruby/object:Program
866
+ description: Concat of strings and maybe-clause (anchored)
867
+ program: 'BOS "this" "that" maybe "other" EOS'
868
+ regex: !ruby/regexp /^thisthat(other)?$/
869
+ good:
870
+ - "thisthat"
871
+ - "thisthatother"
872
+ bad:
873
+ - ""
874
+ - "abc"
875
+ - "this that"
876
+ - "this that other"
877
+ - "abc thisthat xyz"
878
+ - "abc thisthatother xyz"
879
+ - !ruby/object:Program
880
+ description: Simple repetition of class (anchored)
881
+ program: "BOS 3 * 'xyz' EOS"
882
+ regex: !ruby/regexp /^([xyz]){3}$/
883
+ good:
884
+ - "xyz"
885
+ - "xxx"
886
+ - "yzy"
887
+ bad:
888
+ - ""
889
+ - "abc"
890
+ - "xy"
891
+ - "axy"
892
+ - "xyb"
893
+ - "axyb"
894
+ - "xyzzy123"
895
+ - !ruby/object:Program
896
+ description: Simple repetition of range (anchored)
897
+ program: "BOS 4 * `1-`6 EOS"
898
+ regex: !ruby/regexp /^([1-6]){4}$/
899
+ good:
900
+ - "1111"
901
+ - "1234"
902
+ bad:
903
+ - ""
904
+ - "abc"
905
+ - "123"
906
+ - "123 4"
907
+ - "abc 6543 def"
908
+ - !ruby/object:Program
909
+ description: Complex repetition of char (anchored)
910
+ program: "BOS 3,5 * (`a) EOS"
911
+ regex: !ruby/regexp /^(a){3,5}$/
912
+ good:
913
+ - "aaa"
914
+ - "aaaa"
915
+ - "aaaaa"
916
+ bad:
917
+ - ""
918
+ - "abc"
919
+ - "aa"
920
+ - "xaaay"
921
+ - "aaaaaaa"
922
+ - !ruby/object:Program
923
+ description: Complex repetition of parenthesized class (anchored)
924
+ program: "BOS 4,7 * ('xyz') EOS"
925
+ regex: !ruby/regexp /^([xyz]){4,7}$/
926
+ good:
927
+ - "xxxx"
928
+ - "yyyy"
929
+ - "xyzy"
930
+ - "xyzzy"
931
+ - "xyzxyz"
932
+ - "xyzxyzx"
933
+ bad:
934
+ - ""
935
+ - "abc"
936
+ - "x"
937
+ - "xx"
938
+ - "xxx"
939
+ - "xyz xy"
940
+ - "xyzxyzxyzxyz"
941
+ - "zyzzyva"
942
+ - !ruby/object:Program
943
+ description: Complex repetition of parenthesized range (anchored)
944
+ program: "BOS 0,3 * (`1-`6) EOS"
945
+ regex: !ruby/regexp /^([1-6]){0,3}$/
946
+ good:
947
+ - ""
948
+ - "1"
949
+ - "11"
950
+ - "111"
951
+ - "56"
952
+ - "654"
953
+ bad:
954
+ - "1111"
955
+ - "x123y"
956
+ - "x123456y"
957
+ #
958
+ # find X with Y # /(?=XY)X/ - pos lookahead
959
+ # find X without Y # /(?!XY)X/ - neg lookahead
960
+ # with X find Y # /(?<=X)Y/ - pos lookbehind
961
+ # without X find Y # /(?<!X)Y/ - neg lookbehind
962
+
963
+ - !ruby/object:Program
964
+ description: Simple lookaround (pos-ahead)
965
+ program: 'find "X" with "Y"'
966
+ regex: !ruby/regexp /(?=XY)X/
967
+ good:
968
+ - "XY"
969
+ bad:
970
+ - "X"
971
+ - "Y"
972
+ - "YX"
973
+ - !ruby/object:Program
974
+ description: Simple lookaround (neg-ahead)
975
+ program: 'find "X" without "Y"'
976
+ regex: !ruby/regexp /(?!XY)X/
977
+ good:
978
+ - "X"
979
+ - "YX"
980
+ bad:
981
+ - "XY"
982
+ - "Y"
983
+ - !ruby/object:Program
984
+ description: Simple lookaround (pos-behind)
985
+ program: 'with "X" find "Y"'
986
+ regex: !ruby/regexp /(?<=X)Y/
987
+ good:
988
+ - "XY"
989
+ bad:
990
+ - "YX"
991
+ - "Y"
992
+ - "X"
993
+ - !ruby/object:Program
994
+ description: Simple lookaround (neg-behind)
995
+ program: 'without "X" find "Y"'
996
+ regex: !ruby/regexp /(?<!X)Y/
997
+ good:
998
+ - "aY"
999
+ - "Y"
1000
+ bad:
1001
+ - "XY"
1002
+ - "X"
1003
+ #
1004
+ - !ruby/object:Program
1005
+ description: Positive lookahead
1006
+ program: "find (3*D \" dollars\") with 3*D"
1007
+ regex: !ruby/regexp /(?=\d{3} dollars)\d{3}/
1008
+ good:
1009
+ - "101 dollars"
1010
+ bad:
1011
+ - "102 pesos"
1012
+ - !ruby/object:Program
1013
+ description: Negative lookahead
1014
+ program: 'find 3*D without " pesos"'
1015
+ regex: !ruby/regexp /(?!\d{3} pesos)\d{3}/
1016
+ good:
1017
+ - "103 dollars"
1018
+ - "104 euros"
1019
+ bad:
1020
+ - "105 pesos"
1021
+ - !ruby/object:Program
1022
+ description: Positive lookbehind
1023
+ program: 'with "USD" find 3*D'
1024
+ regex: !ruby/regexp /(?<=USD)\d{3}/
1025
+ good:
1026
+ - "USD106"
1027
+ bad:
1028
+ - "EUR107"
1029
+ - !ruby/object:Program
1030
+ description: Negative lookbehind
1031
+ program: 'without "USD" find 3*D'
1032
+ regex: !ruby/regexp /(?<!USD)\d{3}/
1033
+ good:
1034
+ - "EUR108"
1035
+ bad:
1036
+ - "USD109"