regexador 0.4.6 → 0.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +46 -54
- metadata +12 -32
- data/spec/GENERATED_spec.rb +0 -3347
- data/spec/captures.yaml +0 -85
- data/spec/old_mkcode.rb +0 -116
- data/spec/oneliners.yaml +0 -1036
- data/spec/programs.yaml +0 -201
- data/spec/regexador_spec.rb +0 -348
- data/test/captures.yaml +0 -85
- data/test/mkcode.rb +0 -122
- data/test/oneliners.yaml +0 -1036
- data/test/programs.yaml +0 -201
- data/test/test.rb +0 -170
data/test/programs.yaml
DELETED
@@ -1,201 +0,0 @@
|
|
1
|
-
---
|
2
|
-
- !ruby/object:Program
|
3
|
-
description: Simple use of two vars
|
4
|
-
program: |
|
5
|
-
var1 = "abc"
|
6
|
-
var2 = "def"
|
7
|
-
match var1 var2 end
|
8
|
-
regex: !ruby/regexp /abcdef/
|
9
|
-
good:
|
10
|
-
- abcdefghi
|
11
|
-
- xyzabcdef
|
12
|
-
bad:
|
13
|
-
- ''
|
14
|
-
- abcxyzdef
|
15
|
-
- !ruby/object:Program
|
16
|
-
description: Multiline match with two vars
|
17
|
-
program: " var1 = \"abc\"\n var2 = \"def\"\n \n # Test a blank line
|
18
|
-
and comment as well.\n \n match # multiline match with comment\n var1\n
|
19
|
-
\ var2\n end\n"
|
20
|
-
regex: !ruby/regexp /abcdef/
|
21
|
-
good:
|
22
|
-
- abcdefghi
|
23
|
-
- xyzabcdef
|
24
|
-
bad:
|
25
|
-
- ''
|
26
|
-
- abcxyzdef
|
27
|
-
- !ruby/object:Program
|
28
|
-
description: IPv4 address
|
29
|
-
program: |
|
30
|
-
dot = "."
|
31
|
-
num = "25" D5 | `2 D4 D | maybe D1 1,2*D
|
32
|
-
match BOS num dot num dot num dot num EOS end
|
33
|
-
regex: !ruby/regexp /^(25[0-5]|2[0-4]\d|([01])?(\d){1,2})\.(25[0-5]|2[0-4]\d|([01])?(\d){1,2})\.(25[0-5]|2[0-4]\d|([01])?(\d){1,2})\.(25[0-5]|2[0-4]\d|([01])?(\d){1,2})$/
|
34
|
-
good:
|
35
|
-
- "127.0.0.1"
|
36
|
-
- "255.254.93.22"
|
37
|
-
- "255.254.93.22"
|
38
|
-
bad:
|
39
|
-
- ''
|
40
|
-
- "7.8.9"
|
41
|
-
- "3.4.5.6.7"
|
42
|
-
- "1.2.3.256"
|
43
|
-
- !ruby/object:Program
|
44
|
-
description: Identifying credit cards
|
45
|
-
program: |
|
46
|
-
# Warning: This one likely has errors!
|
47
|
-
|
48
|
-
visa = `4 12*D maybe 3*D
|
49
|
-
mc = `5 D5 14*D
|
50
|
-
discover = `6 ("011" | `5 2*D) 12*D
|
51
|
-
amex = `3 '47' 13*D
|
52
|
-
diners = `3 (`0 D5 | '68' D) 11*D
|
53
|
-
jcb = ("2131"|"1800"|"35" 3*D) 11*D
|
54
|
-
|
55
|
-
match visa | mc | discover | amex | diners | jcb end
|
56
|
-
regex: !ruby/regexp /(4(\d){12}((\d){3})?|5[0-5](\d){14}|6(011|5(\d){2})(\d){12}|3[47](\d){13}|3(0[0-5]|[68]\d)(\d){11}|(2131|1800|35(\d){3})(\d){11})/
|
57
|
-
|
58
|
-
|
59
|
-
good: []
|
60
|
-
bad: []
|
61
|
-
- !ruby/object:Program
|
62
|
-
description: Matching US phone num (with captures)
|
63
|
-
program: |
|
64
|
-
match
|
65
|
-
@area_code = 3 * D
|
66
|
-
`-
|
67
|
-
@prefix = 3*D
|
68
|
-
`-
|
69
|
-
@last4 = 4*D
|
70
|
-
end
|
71
|
-
# regex: !ruby/regexp /(?<area_code>\d{3}){0}(?<prefix>\d{3}){0}(?<last4>\d{4}){0}\g<area_code>-\g<prefix>-\g<last4>/
|
72
|
-
regex: !ruby/regexp /(?<area_code>(\d){3})\-(?<prefix>(\d){3})\-(?<last4>(\d){4})/
|
73
|
-
good:
|
74
|
-
- '601-555-2345'
|
75
|
-
- 'call me at 888-425-9000'
|
76
|
-
bad:
|
77
|
-
- '888-HAL-9000'
|
78
|
-
- '800.237.1234'
|
79
|
-
- !ruby/object:Program
|
80
|
-
description: KNOWNFAIL Matching a clock time, 12/24 hrs
|
81
|
-
program: |
|
82
|
-
hr12 = (maybe `0) `1-`9 | `1 D2
|
83
|
-
hr24 = (maybe `0) D | `1 D | `2 D3
|
84
|
-
sep = `: | `.
|
85
|
-
min = D5 D9
|
86
|
-
sec = D5 D9
|
87
|
-
ampm = (maybe SPACE) ("am" | "pm")
|
88
|
-
time12 = hr12 sep min maybe (sep sec) maybe ampm
|
89
|
-
time24 = hr24 sep min maybe (sep sec)
|
90
|
-
match BOS (time12 | time24) EOS end
|
91
|
-
regex: !ruby/regexp /^(((0)?[1-9]|1[0-2])(:|\.)[0-5]\d((:|\.)[0-5]\d)?(( )?(am|pm))?|((0)?\d|1\d|2[0-3])(:|\.)[0-5]\d((:|\.)[0-5]\d)?)$/
|
92
|
-
good:
|
93
|
-
- '12:34'
|
94
|
-
- '1:23'
|
95
|
-
- '5:14pm'
|
96
|
-
- '19:43'
|
97
|
-
- '1:23:45'
|
98
|
-
- '1:23:45 pm'
|
99
|
-
- '7:43 pm'
|
100
|
-
- '8:32:45'
|
101
|
-
- '8.34'
|
102
|
-
- '8.34 pm'
|
103
|
-
- '8.34.45'
|
104
|
-
bad:
|
105
|
-
- ''
|
106
|
-
- abc
|
107
|
-
- '24:30'
|
108
|
-
- '25:30'
|
109
|
-
- '19:43 pm'
|
110
|
-
- '5:14 pm'
|
111
|
-
- !ruby/object:Program
|
112
|
-
description: Using nocase
|
113
|
-
program: 'match BOS "abc" nocase "def" "ghi" EOS end'
|
114
|
-
regex: !ruby/regexp /^abc((?i)def)ghi$/
|
115
|
-
good:
|
116
|
-
- "abcdefghi"
|
117
|
-
- "abcDEFghi"
|
118
|
-
- "abcdEfghi"
|
119
|
-
bad:
|
120
|
-
- ""
|
121
|
-
- "x"
|
122
|
-
- "xabcdefghi"
|
123
|
-
- "abcdefghix"
|
124
|
-
- "aBcdefghi"
|
125
|
-
- "abcdefGhi"
|
126
|
-
- "abCdefghI"
|
127
|
-
- "abCdEfghI"
|
128
|
-
# - !ruby/object:Program
|
129
|
-
# description: Simple use of two vars
|
130
|
-
# program: |
|
131
|
-
# regex: !ruby/regexp //
|
132
|
-
# good:
|
133
|
-
# bad:
|
134
|
-
- !ruby/object:Program
|
135
|
-
description: Var used in simple repetition
|
136
|
-
program: |
|
137
|
-
n = 3
|
138
|
-
match BOS n * `x EOS end
|
139
|
-
regex: !ruby/regexp /^(x){3}$/
|
140
|
-
good:
|
141
|
-
- "xxx"
|
142
|
-
bad:
|
143
|
-
- ""
|
144
|
-
- "x"
|
145
|
-
- "xx x"
|
146
|
-
- "xxxx"
|
147
|
-
- !ruby/object:Program
|
148
|
-
description: Var used in complex repetition
|
149
|
-
program: |
|
150
|
-
m = 4
|
151
|
-
n = 6
|
152
|
-
match BOS m,n * `x EOS end
|
153
|
-
regex: !ruby/regexp /^(x){4,6}$/
|
154
|
-
good:
|
155
|
-
- "xxxx"
|
156
|
-
- "xxxxx"
|
157
|
-
- "xxxxxx"
|
158
|
-
bad:
|
159
|
-
- ""
|
160
|
-
- "x"
|
161
|
-
- "xx x"
|
162
|
-
- "xxx"
|
163
|
-
- "xxxxxxx"
|
164
|
-
- !ruby/object:Program
|
165
|
-
description: Using Unicode codepoint again
|
166
|
-
program: |
|
167
|
-
euro = &20ac
|
168
|
-
price = (euro | "$") SPACE many D maybe ("." 2*D)
|
169
|
-
match BOS price EOS end
|
170
|
-
regex: !ruby/regexp /^(€|\$) (\d)+(\.(\d){2})?$/
|
171
|
-
good:
|
172
|
-
- "€ 237"
|
173
|
-
- "$ 237"
|
174
|
-
- "€ 23.45"
|
175
|
-
- "€ 0.25"
|
176
|
-
bad:
|
177
|
-
- ""
|
178
|
-
- "x"
|
179
|
-
- "€"
|
180
|
-
- "€ "
|
181
|
-
- "€ 237"
|
182
|
-
- "$ 237"
|
183
|
-
- "€ 23.456"
|
184
|
-
- !ruby/object:Program
|
185
|
-
description: Using within (1)
|
186
|
-
program: |
|
187
|
-
match within `/ end
|
188
|
-
regex: !ruby/regexp /(\/.*?\/)/
|
189
|
-
good:
|
190
|
-
- "There is a /slash-delimited string/ here."
|
191
|
-
bad:
|
192
|
-
- "No such string here."
|
193
|
-
- !ruby/object:Program
|
194
|
-
description: Using escaping (1)
|
195
|
-
program: |
|
196
|
-
match escaping `/ end
|
197
|
-
regex: !ruby/regexp /\/|[^\/]*?\//
|
198
|
-
good:
|
199
|
-
- "This is /slash-delimited but \\/with embedded slashes \\/ also /."
|
200
|
-
bad:
|
201
|
-
- "No such string here."
|
data/test/test.rb
DELETED
@@ -1,170 +0,0 @@
|
|
1
|
-
$LOAD_PATH << "." << "./lib"
|
2
|
-
|
3
|
-
require 'regexador'
|
4
|
-
|
5
|
-
require "minitest/autorun"
|
6
|
-
|
7
|
-
|
8
|
-
class TestRegexador < Minitest::Test
|
9
|
-
|
10
|
-
def test_001_special_chars
|
11
|
-
parser = Regexador::Parser.new
|
12
|
-
assert parser.cSQUOTE.parse("'")
|
13
|
-
assert parser.cHASH.parse('#')
|
14
|
-
assert parser.cNEWLINE.parse("\n")
|
15
|
-
assert parser.cEQUAL.parse('=')
|
16
|
-
end
|
17
|
-
|
18
|
-
def test_002_intl_chars
|
19
|
-
parser = Regexador::Parser.new
|
20
|
-
assert parser.char.parse_with_debug("`æ")
|
21
|
-
assert parser.char.parse("`ß")
|
22
|
-
assert parser.char.parse("`ç")
|
23
|
-
assert parser.char.parse("`ö")
|
24
|
-
assert parser.char.parse("`ñ")
|
25
|
-
end
|
26
|
-
|
27
|
-
def test_003_codepoints
|
28
|
-
parser = Regexador::Parser.new
|
29
|
-
assert parser.codepoint.parse_with_debug("&1234")
|
30
|
-
assert parser.codepoint.parse('&beef')
|
31
|
-
end
|
32
|
-
|
33
|
-
def test_004_predef_tokens
|
34
|
-
parser = Regexador::Parser.new
|
35
|
-
%w(BOS EOS START END).each do |token|
|
36
|
-
assert parser.pattern.parse_with_debug(token)
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
def test_005_assignment
|
41
|
-
parser = Regexador::Parser.new
|
42
|
-
assert parser.assignment.parse("a = 5")
|
43
|
-
assert parser.assignment.parse("a= 5")
|
44
|
-
assert parser.assignment.parse("a =5")
|
45
|
-
assert parser.assignment.parse("a=5")
|
46
|
-
assert parser.assignment.parse("myvar = 'xyz'")
|
47
|
-
assert parser.assignment.parse('var2 = "hello"')
|
48
|
-
assert parser.assignment.parse('this_var = `x-`z')
|
49
|
-
assert parser.assignment.parse('pat = maybe many `x-`z')
|
50
|
-
end
|
51
|
-
|
52
|
-
def test_006_keyword_as_var
|
53
|
-
parser = Regexador::Parser.new
|
54
|
-
assert_raises { parser.assignment.parse("end = 'hello'") }
|
55
|
-
parser = Regexador::Parser.new
|
56
|
-
assert parser.assignment.parse_with_debug("endx = 'hello'")
|
57
|
-
assert parser.assignment.parse_with_debug("end5 = 'hello'")
|
58
|
-
assert parser.assignment.parse_with_debug("end_ = 'hello'")
|
59
|
-
assert parser.assignment.parse_with_debug("anyx = 'hello'")
|
60
|
-
end
|
61
|
-
|
62
|
-
def test_007_def_section
|
63
|
-
parser = Regexador::Parser.new
|
64
|
-
defs1 = <<-EOS
|
65
|
-
a = 5
|
66
|
-
str = "hello"
|
67
|
-
EOS
|
68
|
-
assert parser.definitions.parse(defs1), "assertion 1"
|
69
|
-
defs2 = <<-EOF
|
70
|
-
a = 5
|
71
|
-
pat = maybe many `a-`c
|
72
|
-
# empty line follows:
|
73
|
-
|
74
|
-
str = "hello"
|
75
|
-
# another comment...
|
76
|
-
EOF
|
77
|
-
assert parser.definitions.parse(defs2), "assertion 2"
|
78
|
-
end
|
79
|
-
|
80
|
-
def test_008_capture_var
|
81
|
-
parser = Regexador::Parser.new
|
82
|
-
str1 = "@myvar"
|
83
|
-
assert parser.capture_var.parse(str1)
|
84
|
-
end
|
85
|
-
|
86
|
-
def test_009_captured_pattern
|
87
|
-
parser = Regexador::Parser.new
|
88
|
-
prog = "@myvar = maybe 'abc'"
|
89
|
-
assert parser.capture.parse(prog)
|
90
|
-
assert parser.parse("match #{prog} end")
|
91
|
-
end
|
92
|
-
|
93
|
-
def test_010_back_ref
|
94
|
-
parser = Regexador::Parser.new
|
95
|
-
prog = '@myvar'
|
96
|
-
parser.capture.parse(prog)
|
97
|
-
assert parser.parse("match #{prog} end")
|
98
|
-
end
|
99
|
-
|
100
|
-
def test_011_one_line_match_clause
|
101
|
-
parser = Regexador::Parser.new
|
102
|
-
mc1 = "match `a~`x end"
|
103
|
-
assert parser.match_clause.parse(mc1)
|
104
|
-
end
|
105
|
-
|
106
|
-
def test_012_multiline_match_clause
|
107
|
-
parser = Regexador::Parser.new
|
108
|
-
mc2 = <<-EOF
|
109
|
-
match
|
110
|
-
`< "tag" WB
|
111
|
-
any ~`>
|
112
|
-
# blah blah blah
|
113
|
-
"</" "tag" `>
|
114
|
-
end
|
115
|
-
EOF
|
116
|
-
assert parser.multiline_clause.parse(mc2)
|
117
|
-
end
|
118
|
-
|
119
|
-
def test_013_oneline_program
|
120
|
-
parser = Regexador::Parser.new
|
121
|
-
prog = "match `a-`f end"
|
122
|
-
assert parser.parse_with_debug(prog)
|
123
|
-
end
|
124
|
-
|
125
|
-
def test_014_multiline_program
|
126
|
-
parser = Regexador::Parser.new
|
127
|
-
prog1 = <<-EOF
|
128
|
-
dot = "."
|
129
|
-
num = "25" D5 | `2 D4 D | maybe D1 1,2*D
|
130
|
-
match WB num dot num dot num dot num WB end
|
131
|
-
EOF
|
132
|
-
assert parser.program.parse(prog1)
|
133
|
-
|
134
|
-
prog2 = <<-EOF
|
135
|
-
# Warning: This one likely has errors!
|
136
|
-
|
137
|
-
visa = `4 12*D maybe 3*D
|
138
|
-
mc = `5 D5 14*D
|
139
|
-
amex = `3 '47' 13*D
|
140
|
-
diners = `3 (`0 D5 | '68' D) 11*D
|
141
|
-
discover = `6 ("011" | `5 2*D) 12*D
|
142
|
-
jcb = ("2131"|"1800"|"35" 3*D) 11*D
|
143
|
-
|
144
|
-
match visa | mc | amex | diners | discover | jcb end
|
145
|
-
EOF
|
146
|
-
assert parser.program.parse(prog2)
|
147
|
-
end
|
148
|
-
|
149
|
-
def test_015_neg_lookbehind
|
150
|
-
parser = Regexador::Parser.new
|
151
|
-
prog = ' match without "USD" find 3*D end'
|
152
|
-
|
153
|
-
assert parser.program.parse(prog)
|
154
|
-
rx = Regexador.new(prog)
|
155
|
-
assert rx.regexp == /(?<!USD)(\d){3}/
|
156
|
-
end
|
157
|
-
|
158
|
-
def test_010_neg_lookahead
|
159
|
-
end
|
160
|
-
|
161
|
-
def test_010_
|
162
|
-
end
|
163
|
-
|
164
|
-
def test_010_
|
165
|
-
end
|
166
|
-
|
167
|
-
def test_010_
|
168
|
-
end
|
169
|
-
|
170
|
-
end
|