regexador 0.4.6 → 0.4.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +46 -54
- metadata +12 -32
- data/spec/GENERATED_spec.rb +0 -3347
- data/spec/captures.yaml +0 -85
- data/spec/old_mkcode.rb +0 -116
- data/spec/oneliners.yaml +0 -1036
- data/spec/programs.yaml +0 -201
- data/spec/regexador_spec.rb +0 -348
- data/test/captures.yaml +0 -85
- data/test/mkcode.rb +0 -122
- data/test/oneliners.yaml +0 -1036
- data/test/programs.yaml +0 -201
- data/test/test.rb +0 -170
data/test/programs.yaml
DELETED
@@ -1,201 +0,0 @@
|
|
1
|
-
---
|
2
|
-
- !ruby/object:Program
|
3
|
-
description: Simple use of two vars
|
4
|
-
program: |
|
5
|
-
var1 = "abc"
|
6
|
-
var2 = "def"
|
7
|
-
match var1 var2 end
|
8
|
-
regex: !ruby/regexp /abcdef/
|
9
|
-
good:
|
10
|
-
- abcdefghi
|
11
|
-
- xyzabcdef
|
12
|
-
bad:
|
13
|
-
- ''
|
14
|
-
- abcxyzdef
|
15
|
-
- !ruby/object:Program
|
16
|
-
description: Multiline match with two vars
|
17
|
-
program: " var1 = \"abc\"\n var2 = \"def\"\n \n # Test a blank line
|
18
|
-
and comment as well.\n \n match # multiline match with comment\n var1\n
|
19
|
-
\ var2\n end\n"
|
20
|
-
regex: !ruby/regexp /abcdef/
|
21
|
-
good:
|
22
|
-
- abcdefghi
|
23
|
-
- xyzabcdef
|
24
|
-
bad:
|
25
|
-
- ''
|
26
|
-
- abcxyzdef
|
27
|
-
- !ruby/object:Program
|
28
|
-
description: IPv4 address
|
29
|
-
program: |
|
30
|
-
dot = "."
|
31
|
-
num = "25" D5 | `2 D4 D | maybe D1 1,2*D
|
32
|
-
match BOS num dot num dot num dot num EOS end
|
33
|
-
regex: !ruby/regexp /^(25[0-5]|2[0-4]\d|([01])?(\d){1,2})\.(25[0-5]|2[0-4]\d|([01])?(\d){1,2})\.(25[0-5]|2[0-4]\d|([01])?(\d){1,2})\.(25[0-5]|2[0-4]\d|([01])?(\d){1,2})$/
|
34
|
-
good:
|
35
|
-
- "127.0.0.1"
|
36
|
-
- "255.254.93.22"
|
37
|
-
- "255.254.93.22"
|
38
|
-
bad:
|
39
|
-
- ''
|
40
|
-
- "7.8.9"
|
41
|
-
- "3.4.5.6.7"
|
42
|
-
- "1.2.3.256"
|
43
|
-
- !ruby/object:Program
|
44
|
-
description: Identifying credit cards
|
45
|
-
program: |
|
46
|
-
# Warning: This one likely has errors!
|
47
|
-
|
48
|
-
visa = `4 12*D maybe 3*D
|
49
|
-
mc = `5 D5 14*D
|
50
|
-
discover = `6 ("011" | `5 2*D) 12*D
|
51
|
-
amex = `3 '47' 13*D
|
52
|
-
diners = `3 (`0 D5 | '68' D) 11*D
|
53
|
-
jcb = ("2131"|"1800"|"35" 3*D) 11*D
|
54
|
-
|
55
|
-
match visa | mc | discover | amex | diners | jcb end
|
56
|
-
regex: !ruby/regexp /(4(\d){12}((\d){3})?|5[0-5](\d){14}|6(011|5(\d){2})(\d){12}|3[47](\d){13}|3(0[0-5]|[68]\d)(\d){11}|(2131|1800|35(\d){3})(\d){11})/
|
57
|
-
|
58
|
-
|
59
|
-
good: []
|
60
|
-
bad: []
|
61
|
-
- !ruby/object:Program
|
62
|
-
description: Matching US phone num (with captures)
|
63
|
-
program: |
|
64
|
-
match
|
65
|
-
@area_code = 3 * D
|
66
|
-
`-
|
67
|
-
@prefix = 3*D
|
68
|
-
`-
|
69
|
-
@last4 = 4*D
|
70
|
-
end
|
71
|
-
# regex: !ruby/regexp /(?<area_code>\d{3}){0}(?<prefix>\d{3}){0}(?<last4>\d{4}){0}\g<area_code>-\g<prefix>-\g<last4>/
|
72
|
-
regex: !ruby/regexp /(?<area_code>(\d){3})\-(?<prefix>(\d){3})\-(?<last4>(\d){4})/
|
73
|
-
good:
|
74
|
-
- '601-555-2345'
|
75
|
-
- 'call me at 888-425-9000'
|
76
|
-
bad:
|
77
|
-
- '888-HAL-9000'
|
78
|
-
- '800.237.1234'
|
79
|
-
- !ruby/object:Program
|
80
|
-
description: KNOWNFAIL Matching a clock time, 12/24 hrs
|
81
|
-
program: |
|
82
|
-
hr12 = (maybe `0) `1-`9 | `1 D2
|
83
|
-
hr24 = (maybe `0) D | `1 D | `2 D3
|
84
|
-
sep = `: | `.
|
85
|
-
min = D5 D9
|
86
|
-
sec = D5 D9
|
87
|
-
ampm = (maybe SPACE) ("am" | "pm")
|
88
|
-
time12 = hr12 sep min maybe (sep sec) maybe ampm
|
89
|
-
time24 = hr24 sep min maybe (sep sec)
|
90
|
-
match BOS (time12 | time24) EOS end
|
91
|
-
regex: !ruby/regexp /^(((0)?[1-9]|1[0-2])(:|\.)[0-5]\d((:|\.)[0-5]\d)?(( )?(am|pm))?|((0)?\d|1\d|2[0-3])(:|\.)[0-5]\d((:|\.)[0-5]\d)?)$/
|
92
|
-
good:
|
93
|
-
- '12:34'
|
94
|
-
- '1:23'
|
95
|
-
- '5:14pm'
|
96
|
-
- '19:43'
|
97
|
-
- '1:23:45'
|
98
|
-
- '1:23:45 pm'
|
99
|
-
- '7:43 pm'
|
100
|
-
- '8:32:45'
|
101
|
-
- '8.34'
|
102
|
-
- '8.34 pm'
|
103
|
-
- '8.34.45'
|
104
|
-
bad:
|
105
|
-
- ''
|
106
|
-
- abc
|
107
|
-
- '24:30'
|
108
|
-
- '25:30'
|
109
|
-
- '19:43 pm'
|
110
|
-
- '5:14 pm'
|
111
|
-
- !ruby/object:Program
|
112
|
-
description: Using nocase
|
113
|
-
program: 'match BOS "abc" nocase "def" "ghi" EOS end'
|
114
|
-
regex: !ruby/regexp /^abc((?i)def)ghi$/
|
115
|
-
good:
|
116
|
-
- "abcdefghi"
|
117
|
-
- "abcDEFghi"
|
118
|
-
- "abcdEfghi"
|
119
|
-
bad:
|
120
|
-
- ""
|
121
|
-
- "x"
|
122
|
-
- "xabcdefghi"
|
123
|
-
- "abcdefghix"
|
124
|
-
- "aBcdefghi"
|
125
|
-
- "abcdefGhi"
|
126
|
-
- "abCdefghI"
|
127
|
-
- "abCdEfghI"
|
128
|
-
# - !ruby/object:Program
|
129
|
-
# description: Simple use of two vars
|
130
|
-
# program: |
|
131
|
-
# regex: !ruby/regexp //
|
132
|
-
# good:
|
133
|
-
# bad:
|
134
|
-
- !ruby/object:Program
|
135
|
-
description: Var used in simple repetition
|
136
|
-
program: |
|
137
|
-
n = 3
|
138
|
-
match BOS n * `x EOS end
|
139
|
-
regex: !ruby/regexp /^(x){3}$/
|
140
|
-
good:
|
141
|
-
- "xxx"
|
142
|
-
bad:
|
143
|
-
- ""
|
144
|
-
- "x"
|
145
|
-
- "xx x"
|
146
|
-
- "xxxx"
|
147
|
-
- !ruby/object:Program
|
148
|
-
description: Var used in complex repetition
|
149
|
-
program: |
|
150
|
-
m = 4
|
151
|
-
n = 6
|
152
|
-
match BOS m,n * `x EOS end
|
153
|
-
regex: !ruby/regexp /^(x){4,6}$/
|
154
|
-
good:
|
155
|
-
- "xxxx"
|
156
|
-
- "xxxxx"
|
157
|
-
- "xxxxxx"
|
158
|
-
bad:
|
159
|
-
- ""
|
160
|
-
- "x"
|
161
|
-
- "xx x"
|
162
|
-
- "xxx"
|
163
|
-
- "xxxxxxx"
|
164
|
-
- !ruby/object:Program
|
165
|
-
description: Using Unicode codepoint again
|
166
|
-
program: |
|
167
|
-
euro = &20ac
|
168
|
-
price = (euro | "$") SPACE many D maybe ("." 2*D)
|
169
|
-
match BOS price EOS end
|
170
|
-
regex: !ruby/regexp /^(€|\$) (\d)+(\.(\d){2})?$/
|
171
|
-
good:
|
172
|
-
- "€ 237"
|
173
|
-
- "$ 237"
|
174
|
-
- "€ 23.45"
|
175
|
-
- "€ 0.25"
|
176
|
-
bad:
|
177
|
-
- ""
|
178
|
-
- "x"
|
179
|
-
- "€"
|
180
|
-
- "€ "
|
181
|
-
- "€ 237"
|
182
|
-
- "$ 237"
|
183
|
-
- "€ 23.456"
|
184
|
-
- !ruby/object:Program
|
185
|
-
description: Using within (1)
|
186
|
-
program: |
|
187
|
-
match within `/ end
|
188
|
-
regex: !ruby/regexp /(\/.*?\/)/
|
189
|
-
good:
|
190
|
-
- "There is a /slash-delimited string/ here."
|
191
|
-
bad:
|
192
|
-
- "No such string here."
|
193
|
-
- !ruby/object:Program
|
194
|
-
description: Using escaping (1)
|
195
|
-
program: |
|
196
|
-
match escaping `/ end
|
197
|
-
regex: !ruby/regexp /\/|[^\/]*?\//
|
198
|
-
good:
|
199
|
-
- "This is /slash-delimited but \\/with embedded slashes \\/ also /."
|
200
|
-
bad:
|
201
|
-
- "No such string here."
|
data/test/test.rb
DELETED
@@ -1,170 +0,0 @@
|
|
1
|
-
$LOAD_PATH << "." << "./lib"
|
2
|
-
|
3
|
-
require 'regexador'
|
4
|
-
|
5
|
-
require "minitest/autorun"
|
6
|
-
|
7
|
-
|
8
|
-
class TestRegexador < Minitest::Test
|
9
|
-
|
10
|
-
def test_001_special_chars
|
11
|
-
parser = Regexador::Parser.new
|
12
|
-
assert parser.cSQUOTE.parse("'")
|
13
|
-
assert parser.cHASH.parse('#')
|
14
|
-
assert parser.cNEWLINE.parse("\n")
|
15
|
-
assert parser.cEQUAL.parse('=')
|
16
|
-
end
|
17
|
-
|
18
|
-
def test_002_intl_chars
|
19
|
-
parser = Regexador::Parser.new
|
20
|
-
assert parser.char.parse_with_debug("`æ")
|
21
|
-
assert parser.char.parse("`ß")
|
22
|
-
assert parser.char.parse("`ç")
|
23
|
-
assert parser.char.parse("`ö")
|
24
|
-
assert parser.char.parse("`ñ")
|
25
|
-
end
|
26
|
-
|
27
|
-
def test_003_codepoints
|
28
|
-
parser = Regexador::Parser.new
|
29
|
-
assert parser.codepoint.parse_with_debug("&1234")
|
30
|
-
assert parser.codepoint.parse('&beef')
|
31
|
-
end
|
32
|
-
|
33
|
-
def test_004_predef_tokens
|
34
|
-
parser = Regexador::Parser.new
|
35
|
-
%w(BOS EOS START END).each do |token|
|
36
|
-
assert parser.pattern.parse_with_debug(token)
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
def test_005_assignment
|
41
|
-
parser = Regexador::Parser.new
|
42
|
-
assert parser.assignment.parse("a = 5")
|
43
|
-
assert parser.assignment.parse("a= 5")
|
44
|
-
assert parser.assignment.parse("a =5")
|
45
|
-
assert parser.assignment.parse("a=5")
|
46
|
-
assert parser.assignment.parse("myvar = 'xyz'")
|
47
|
-
assert parser.assignment.parse('var2 = "hello"')
|
48
|
-
assert parser.assignment.parse('this_var = `x-`z')
|
49
|
-
assert parser.assignment.parse('pat = maybe many `x-`z')
|
50
|
-
end
|
51
|
-
|
52
|
-
def test_006_keyword_as_var
|
53
|
-
parser = Regexador::Parser.new
|
54
|
-
assert_raises { parser.assignment.parse("end = 'hello'") }
|
55
|
-
parser = Regexador::Parser.new
|
56
|
-
assert parser.assignment.parse_with_debug("endx = 'hello'")
|
57
|
-
assert parser.assignment.parse_with_debug("end5 = 'hello'")
|
58
|
-
assert parser.assignment.parse_with_debug("end_ = 'hello'")
|
59
|
-
assert parser.assignment.parse_with_debug("anyx = 'hello'")
|
60
|
-
end
|
61
|
-
|
62
|
-
def test_007_def_section
|
63
|
-
parser = Regexador::Parser.new
|
64
|
-
defs1 = <<-EOS
|
65
|
-
a = 5
|
66
|
-
str = "hello"
|
67
|
-
EOS
|
68
|
-
assert parser.definitions.parse(defs1), "assertion 1"
|
69
|
-
defs2 = <<-EOF
|
70
|
-
a = 5
|
71
|
-
pat = maybe many `a-`c
|
72
|
-
# empty line follows:
|
73
|
-
|
74
|
-
str = "hello"
|
75
|
-
# another comment...
|
76
|
-
EOF
|
77
|
-
assert parser.definitions.parse(defs2), "assertion 2"
|
78
|
-
end
|
79
|
-
|
80
|
-
def test_008_capture_var
|
81
|
-
parser = Regexador::Parser.new
|
82
|
-
str1 = "@myvar"
|
83
|
-
assert parser.capture_var.parse(str1)
|
84
|
-
end
|
85
|
-
|
86
|
-
def test_009_captured_pattern
|
87
|
-
parser = Regexador::Parser.new
|
88
|
-
prog = "@myvar = maybe 'abc'"
|
89
|
-
assert parser.capture.parse(prog)
|
90
|
-
assert parser.parse("match #{prog} end")
|
91
|
-
end
|
92
|
-
|
93
|
-
def test_010_back_ref
|
94
|
-
parser = Regexador::Parser.new
|
95
|
-
prog = '@myvar'
|
96
|
-
parser.capture.parse(prog)
|
97
|
-
assert parser.parse("match #{prog} end")
|
98
|
-
end
|
99
|
-
|
100
|
-
def test_011_one_line_match_clause
|
101
|
-
parser = Regexador::Parser.new
|
102
|
-
mc1 = "match `a~`x end"
|
103
|
-
assert parser.match_clause.parse(mc1)
|
104
|
-
end
|
105
|
-
|
106
|
-
def test_012_multiline_match_clause
|
107
|
-
parser = Regexador::Parser.new
|
108
|
-
mc2 = <<-EOF
|
109
|
-
match
|
110
|
-
`< "tag" WB
|
111
|
-
any ~`>
|
112
|
-
# blah blah blah
|
113
|
-
"</" "tag" `>
|
114
|
-
end
|
115
|
-
EOF
|
116
|
-
assert parser.multiline_clause.parse(mc2)
|
117
|
-
end
|
118
|
-
|
119
|
-
def test_013_oneline_program
|
120
|
-
parser = Regexador::Parser.new
|
121
|
-
prog = "match `a-`f end"
|
122
|
-
assert parser.parse_with_debug(prog)
|
123
|
-
end
|
124
|
-
|
125
|
-
def test_014_multiline_program
|
126
|
-
parser = Regexador::Parser.new
|
127
|
-
prog1 = <<-EOF
|
128
|
-
dot = "."
|
129
|
-
num = "25" D5 | `2 D4 D | maybe D1 1,2*D
|
130
|
-
match WB num dot num dot num dot num WB end
|
131
|
-
EOF
|
132
|
-
assert parser.program.parse(prog1)
|
133
|
-
|
134
|
-
prog2 = <<-EOF
|
135
|
-
# Warning: This one likely has errors!
|
136
|
-
|
137
|
-
visa = `4 12*D maybe 3*D
|
138
|
-
mc = `5 D5 14*D
|
139
|
-
amex = `3 '47' 13*D
|
140
|
-
diners = `3 (`0 D5 | '68' D) 11*D
|
141
|
-
discover = `6 ("011" | `5 2*D) 12*D
|
142
|
-
jcb = ("2131"|"1800"|"35" 3*D) 11*D
|
143
|
-
|
144
|
-
match visa | mc | amex | diners | discover | jcb end
|
145
|
-
EOF
|
146
|
-
assert parser.program.parse(prog2)
|
147
|
-
end
|
148
|
-
|
149
|
-
def test_015_neg_lookbehind
|
150
|
-
parser = Regexador::Parser.new
|
151
|
-
prog = ' match without "USD" find 3*D end'
|
152
|
-
|
153
|
-
assert parser.program.parse(prog)
|
154
|
-
rx = Regexador.new(prog)
|
155
|
-
assert rx.regexp == /(?<!USD)(\d){3}/
|
156
|
-
end
|
157
|
-
|
158
|
-
def test_010_neg_lookahead
|
159
|
-
end
|
160
|
-
|
161
|
-
def test_010_
|
162
|
-
end
|
163
|
-
|
164
|
-
def test_010_
|
165
|
-
end
|
166
|
-
|
167
|
-
def test_010_
|
168
|
-
end
|
169
|
-
|
170
|
-
end
|