regexador 0.4.5 → 0.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +54 -46
- data/lib/chars.rb +1 -0
- data/lib/keywords.rb +13 -11
- data/lib/regexador_parser.rb +3 -2
- data/spec/GENERATED_spec.rb +3347 -0
- data/spec/captures.yaml +85 -0
- data/spec/old_mkcode.rb +116 -0
- data/spec/oneliners.yaml +1036 -0
- data/spec/programs.yaml +201 -0
- data/spec/regexador_spec.rb +348 -0
- data/test/captures.yaml +85 -0
- data/test/mkcode.rb +122 -0
- data/test/oneliners.yaml +1036 -0
- data/test/programs.yaml +201 -0
- data/test/test.rb +147 -16
- metadata +30 -11
data/test/programs.yaml
ADDED
@@ -0,0 +1,201 @@
|
|
1
|
+
---
|
2
|
+
- !ruby/object:Program
|
3
|
+
description: Simple use of two vars
|
4
|
+
program: |
|
5
|
+
var1 = "abc"
|
6
|
+
var2 = "def"
|
7
|
+
match var1 var2 end
|
8
|
+
regex: !ruby/regexp /abcdef/
|
9
|
+
good:
|
10
|
+
- abcdefghi
|
11
|
+
- xyzabcdef
|
12
|
+
bad:
|
13
|
+
- ''
|
14
|
+
- abcxyzdef
|
15
|
+
- !ruby/object:Program
|
16
|
+
description: Multiline match with two vars
|
17
|
+
program: " var1 = \"abc\"\n var2 = \"def\"\n \n # Test a blank line
|
18
|
+
and comment as well.\n \n match # multiline match with comment\n var1\n
|
19
|
+
\ var2\n end\n"
|
20
|
+
regex: !ruby/regexp /abcdef/
|
21
|
+
good:
|
22
|
+
- abcdefghi
|
23
|
+
- xyzabcdef
|
24
|
+
bad:
|
25
|
+
- ''
|
26
|
+
- abcxyzdef
|
27
|
+
- !ruby/object:Program
|
28
|
+
description: IPv4 address
|
29
|
+
program: |
|
30
|
+
dot = "."
|
31
|
+
num = "25" D5 | `2 D4 D | maybe D1 1,2*D
|
32
|
+
match BOS num dot num dot num dot num EOS end
|
33
|
+
regex: !ruby/regexp /^(25[0-5]|2[0-4]\d|([01])?(\d){1,2})\.(25[0-5]|2[0-4]\d|([01])?(\d){1,2})\.(25[0-5]|2[0-4]\d|([01])?(\d){1,2})\.(25[0-5]|2[0-4]\d|([01])?(\d){1,2})$/
|
34
|
+
good:
|
35
|
+
- "127.0.0.1"
|
36
|
+
- "255.254.93.22"
|
37
|
+
- "255.254.93.22"
|
38
|
+
bad:
|
39
|
+
- ''
|
40
|
+
- "7.8.9"
|
41
|
+
- "3.4.5.6.7"
|
42
|
+
- "1.2.3.256"
|
43
|
+
- !ruby/object:Program
|
44
|
+
description: Identifying credit cards
|
45
|
+
program: |
|
46
|
+
# Warning: This one likely has errors!
|
47
|
+
|
48
|
+
visa = `4 12*D maybe 3*D
|
49
|
+
mc = `5 D5 14*D
|
50
|
+
discover = `6 ("011" | `5 2*D) 12*D
|
51
|
+
amex = `3 '47' 13*D
|
52
|
+
diners = `3 (`0 D5 | '68' D) 11*D
|
53
|
+
jcb = ("2131"|"1800"|"35" 3*D) 11*D
|
54
|
+
|
55
|
+
match visa | mc | discover | amex | diners | jcb end
|
56
|
+
regex: !ruby/regexp /(4(\d){12}((\d){3})?|5[0-5](\d){14}|6(011|5(\d){2})(\d){12}|3[47](\d){13}|3(0[0-5]|[68]\d)(\d){11}|(2131|1800|35(\d){3})(\d){11})/
|
57
|
+
|
58
|
+
|
59
|
+
good: []
|
60
|
+
bad: []
|
61
|
+
- !ruby/object:Program
|
62
|
+
description: Matching US phone num (with captures)
|
63
|
+
program: |
|
64
|
+
match
|
65
|
+
@area_code = 3 * D
|
66
|
+
`-
|
67
|
+
@prefix = 3*D
|
68
|
+
`-
|
69
|
+
@last4 = 4*D
|
70
|
+
end
|
71
|
+
# regex: !ruby/regexp /(?<area_code>\d{3}){0}(?<prefix>\d{3}){0}(?<last4>\d{4}){0}\g<area_code>-\g<prefix>-\g<last4>/
|
72
|
+
regex: !ruby/regexp /(?<area_code>(\d){3})\-(?<prefix>(\d){3})\-(?<last4>(\d){4})/
|
73
|
+
good:
|
74
|
+
- '601-555-2345'
|
75
|
+
- 'call me at 888-425-9000'
|
76
|
+
bad:
|
77
|
+
- '888-HAL-9000'
|
78
|
+
- '800.237.1234'
|
79
|
+
- !ruby/object:Program
|
80
|
+
description: KNOWNFAIL Matching a clock time, 12/24 hrs
|
81
|
+
program: |
|
82
|
+
hr12 = (maybe `0) `1-`9 | `1 D2
|
83
|
+
hr24 = (maybe `0) D | `1 D | `2 D3
|
84
|
+
sep = `: | `.
|
85
|
+
min = D5 D9
|
86
|
+
sec = D5 D9
|
87
|
+
ampm = (maybe SPACE) ("am" | "pm")
|
88
|
+
time12 = hr12 sep min maybe (sep sec) maybe ampm
|
89
|
+
time24 = hr24 sep min maybe (sep sec)
|
90
|
+
match BOS (time12 | time24) EOS end
|
91
|
+
regex: !ruby/regexp /^(((0)?[1-9]|1[0-2])(:|\.)[0-5]\d((:|\.)[0-5]\d)?(( )?(am|pm))?|((0)?\d|1\d|2[0-3])(:|\.)[0-5]\d((:|\.)[0-5]\d)?)$/
|
92
|
+
good:
|
93
|
+
- '12:34'
|
94
|
+
- '1:23'
|
95
|
+
- '5:14pm'
|
96
|
+
- '19:43'
|
97
|
+
- '1:23:45'
|
98
|
+
- '1:23:45 pm'
|
99
|
+
- '7:43 pm'
|
100
|
+
- '8:32:45'
|
101
|
+
- '8.34'
|
102
|
+
- '8.34 pm'
|
103
|
+
- '8.34.45'
|
104
|
+
bad:
|
105
|
+
- ''
|
106
|
+
- abc
|
107
|
+
- '24:30'
|
108
|
+
- '25:30'
|
109
|
+
- '19:43 pm'
|
110
|
+
- '5:14 pm'
|
111
|
+
- !ruby/object:Program
|
112
|
+
description: Using nocase
|
113
|
+
program: 'match BOS "abc" nocase "def" "ghi" EOS end'
|
114
|
+
regex: !ruby/regexp /^abc((?i)def)ghi$/
|
115
|
+
good:
|
116
|
+
- "abcdefghi"
|
117
|
+
- "abcDEFghi"
|
118
|
+
- "abcdEfghi"
|
119
|
+
bad:
|
120
|
+
- ""
|
121
|
+
- "x"
|
122
|
+
- "xabcdefghi"
|
123
|
+
- "abcdefghix"
|
124
|
+
- "aBcdefghi"
|
125
|
+
- "abcdefGhi"
|
126
|
+
- "abCdefghI"
|
127
|
+
- "abCdEfghI"
|
128
|
+
# - !ruby/object:Program
|
129
|
+
# description: Simple use of two vars
|
130
|
+
# program: |
|
131
|
+
# regex: !ruby/regexp //
|
132
|
+
# good:
|
133
|
+
# bad:
|
134
|
+
- !ruby/object:Program
|
135
|
+
description: Var used in simple repetition
|
136
|
+
program: |
|
137
|
+
n = 3
|
138
|
+
match BOS n * `x EOS end
|
139
|
+
regex: !ruby/regexp /^(x){3}$/
|
140
|
+
good:
|
141
|
+
- "xxx"
|
142
|
+
bad:
|
143
|
+
- ""
|
144
|
+
- "x"
|
145
|
+
- "xx x"
|
146
|
+
- "xxxx"
|
147
|
+
- !ruby/object:Program
|
148
|
+
description: Var used in complex repetition
|
149
|
+
program: |
|
150
|
+
m = 4
|
151
|
+
n = 6
|
152
|
+
match BOS m,n * `x EOS end
|
153
|
+
regex: !ruby/regexp /^(x){4,6}$/
|
154
|
+
good:
|
155
|
+
- "xxxx"
|
156
|
+
- "xxxxx"
|
157
|
+
- "xxxxxx"
|
158
|
+
bad:
|
159
|
+
- ""
|
160
|
+
- "x"
|
161
|
+
- "xx x"
|
162
|
+
- "xxx"
|
163
|
+
- "xxxxxxx"
|
164
|
+
- !ruby/object:Program
|
165
|
+
description: Using Unicode codepoint again
|
166
|
+
program: |
|
167
|
+
euro = &20ac
|
168
|
+
price = (euro | "$") SPACE many D maybe ("." 2*D)
|
169
|
+
match BOS price EOS end
|
170
|
+
regex: !ruby/regexp /^(€|\$) (\d)+(\.(\d){2})?$/
|
171
|
+
good:
|
172
|
+
- "€ 237"
|
173
|
+
- "$ 237"
|
174
|
+
- "€ 23.45"
|
175
|
+
- "€ 0.25"
|
176
|
+
bad:
|
177
|
+
- ""
|
178
|
+
- "x"
|
179
|
+
- "€"
|
180
|
+
- "€ "
|
181
|
+
- "€ 237"
|
182
|
+
- "$ 237"
|
183
|
+
- "€ 23.456"
|
184
|
+
- !ruby/object:Program
|
185
|
+
description: Using within (1)
|
186
|
+
program: |
|
187
|
+
match within `/ end
|
188
|
+
regex: !ruby/regexp /(\/.*?\/)/
|
189
|
+
good:
|
190
|
+
- "There is a /slash-delimited string/ here."
|
191
|
+
bad:
|
192
|
+
- "No such string here."
|
193
|
+
- !ruby/object:Program
|
194
|
+
description: Using escaping (1)
|
195
|
+
program: |
|
196
|
+
match escaping `/ end
|
197
|
+
regex: !ruby/regexp /\/|[^\/]*?\//
|
198
|
+
good:
|
199
|
+
- "This is /slash-delimited but \\/with embedded slashes \\/ also /."
|
200
|
+
bad:
|
201
|
+
- "No such string here."
|
data/test/test.rb
CHANGED
@@ -8,32 +8,163 @@ require "minitest/autorun"
|
|
8
8
|
class TestRegexador < Minitest::Test
|
9
9
|
|
10
10
|
def test_001_special_chars
|
11
|
-
|
12
|
-
assert
|
13
|
-
assert
|
14
|
-
assert
|
15
|
-
assert
|
11
|
+
parser = Regexador::Parser.new
|
12
|
+
assert parser.cSQUOTE.parse("'")
|
13
|
+
assert parser.cHASH.parse('#')
|
14
|
+
assert parser.cNEWLINE.parse("\n")
|
15
|
+
assert parser.cEQUAL.parse('=')
|
16
16
|
end
|
17
17
|
|
18
18
|
def test_002_intl_chars
|
19
|
-
|
20
|
-
assert
|
21
|
-
assert
|
22
|
-
assert
|
23
|
-
assert
|
24
|
-
assert
|
19
|
+
parser = Regexador::Parser.new
|
20
|
+
assert parser.char.parse_with_debug("`æ")
|
21
|
+
assert parser.char.parse("`ß")
|
22
|
+
assert parser.char.parse("`ç")
|
23
|
+
assert parser.char.parse("`ö")
|
24
|
+
assert parser.char.parse("`ñ")
|
25
25
|
end
|
26
26
|
|
27
27
|
def test_003_codepoints
|
28
|
-
|
29
|
-
assert
|
30
|
-
assert
|
28
|
+
parser = Regexador::Parser.new
|
29
|
+
assert parser.codepoint.parse_with_debug("&1234")
|
30
|
+
assert parser.codepoint.parse('&beef')
|
31
31
|
end
|
32
32
|
|
33
33
|
def test_004_predef_tokens
|
34
|
-
|
34
|
+
parser = Regexador::Parser.new
|
35
35
|
%w(BOS EOS START END).each do |token|
|
36
|
-
assert
|
36
|
+
assert parser.pattern.parse_with_debug(token)
|
37
37
|
end
|
38
38
|
end
|
39
|
+
|
40
|
+
def test_005_assignment
|
41
|
+
parser = Regexador::Parser.new
|
42
|
+
assert parser.assignment.parse("a = 5")
|
43
|
+
assert parser.assignment.parse("a= 5")
|
44
|
+
assert parser.assignment.parse("a =5")
|
45
|
+
assert parser.assignment.parse("a=5")
|
46
|
+
assert parser.assignment.parse("myvar = 'xyz'")
|
47
|
+
assert parser.assignment.parse('var2 = "hello"')
|
48
|
+
assert parser.assignment.parse('this_var = `x-`z')
|
49
|
+
assert parser.assignment.parse('pat = maybe many `x-`z')
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_006_keyword_as_var
|
53
|
+
parser = Regexador::Parser.new
|
54
|
+
assert_raises { parser.assignment.parse("end = 'hello'") }
|
55
|
+
parser = Regexador::Parser.new
|
56
|
+
assert parser.assignment.parse_with_debug("endx = 'hello'")
|
57
|
+
assert parser.assignment.parse_with_debug("end5 = 'hello'")
|
58
|
+
assert parser.assignment.parse_with_debug("end_ = 'hello'")
|
59
|
+
assert parser.assignment.parse_with_debug("anyx = 'hello'")
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_007_def_section
|
63
|
+
parser = Regexador::Parser.new
|
64
|
+
defs1 = <<-EOS
|
65
|
+
a = 5
|
66
|
+
str = "hello"
|
67
|
+
EOS
|
68
|
+
assert parser.definitions.parse(defs1), "assertion 1"
|
69
|
+
defs2 = <<-EOF
|
70
|
+
a = 5
|
71
|
+
pat = maybe many `a-`c
|
72
|
+
# empty line follows:
|
73
|
+
|
74
|
+
str = "hello"
|
75
|
+
# another comment...
|
76
|
+
EOF
|
77
|
+
assert parser.definitions.parse(defs2), "assertion 2"
|
78
|
+
end
|
79
|
+
|
80
|
+
def test_008_capture_var
|
81
|
+
parser = Regexador::Parser.new
|
82
|
+
str1 = "@myvar"
|
83
|
+
assert parser.capture_var.parse(str1)
|
84
|
+
end
|
85
|
+
|
86
|
+
def test_009_captured_pattern
|
87
|
+
parser = Regexador::Parser.new
|
88
|
+
prog = "@myvar = maybe 'abc'"
|
89
|
+
assert parser.capture.parse(prog)
|
90
|
+
assert parser.parse("match #{prog} end")
|
91
|
+
end
|
92
|
+
|
93
|
+
def test_010_back_ref
|
94
|
+
parser = Regexador::Parser.new
|
95
|
+
prog = '@myvar'
|
96
|
+
parser.capture.parse(prog)
|
97
|
+
assert parser.parse("match #{prog} end")
|
98
|
+
end
|
99
|
+
|
100
|
+
def test_011_one_line_match_clause
|
101
|
+
parser = Regexador::Parser.new
|
102
|
+
mc1 = "match `a~`x end"
|
103
|
+
assert parser.match_clause.parse(mc1)
|
104
|
+
end
|
105
|
+
|
106
|
+
def test_012_multiline_match_clause
|
107
|
+
parser = Regexador::Parser.new
|
108
|
+
mc2 = <<-EOF
|
109
|
+
match
|
110
|
+
`< "tag" WB
|
111
|
+
any ~`>
|
112
|
+
# blah blah blah
|
113
|
+
"</" "tag" `>
|
114
|
+
end
|
115
|
+
EOF
|
116
|
+
assert parser.multiline_clause.parse(mc2)
|
117
|
+
end
|
118
|
+
|
119
|
+
def test_013_oneline_program
|
120
|
+
parser = Regexador::Parser.new
|
121
|
+
prog = "match `a-`f end"
|
122
|
+
assert parser.parse_with_debug(prog)
|
123
|
+
end
|
124
|
+
|
125
|
+
def test_014_multiline_program
|
126
|
+
parser = Regexador::Parser.new
|
127
|
+
prog1 = <<-EOF
|
128
|
+
dot = "."
|
129
|
+
num = "25" D5 | `2 D4 D | maybe D1 1,2*D
|
130
|
+
match WB num dot num dot num dot num WB end
|
131
|
+
EOF
|
132
|
+
assert parser.program.parse(prog1)
|
133
|
+
|
134
|
+
prog2 = <<-EOF
|
135
|
+
# Warning: This one likely has errors!
|
136
|
+
|
137
|
+
visa = `4 12*D maybe 3*D
|
138
|
+
mc = `5 D5 14*D
|
139
|
+
amex = `3 '47' 13*D
|
140
|
+
diners = `3 (`0 D5 | '68' D) 11*D
|
141
|
+
discover = `6 ("011" | `5 2*D) 12*D
|
142
|
+
jcb = ("2131"|"1800"|"35" 3*D) 11*D
|
143
|
+
|
144
|
+
match visa | mc | amex | diners | discover | jcb end
|
145
|
+
EOF
|
146
|
+
assert parser.program.parse(prog2)
|
147
|
+
end
|
148
|
+
|
149
|
+
def test_015_neg_lookbehind
|
150
|
+
parser = Regexador::Parser.new
|
151
|
+
prog = ' match without "USD" find 3*D end'
|
152
|
+
|
153
|
+
assert parser.program.parse(prog)
|
154
|
+
rx = Regexador.new(prog)
|
155
|
+
assert rx.regexp == /(?<!USD)(\d){3}/
|
156
|
+
end
|
157
|
+
|
158
|
+
def test_010_neg_lookahead
|
159
|
+
end
|
160
|
+
|
161
|
+
def test_010_
|
162
|
+
end
|
163
|
+
|
164
|
+
def test_010_
|
165
|
+
end
|
166
|
+
|
167
|
+
def test_010_
|
168
|
+
end
|
169
|
+
|
39
170
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexador
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hal Fulton
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2019-
|
12
|
+
date: 2019-12-29 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: parslet
|
@@ -56,11 +56,11 @@ dependencies:
|
|
56
56
|
description: "This is implemented as an \"external DSL\" in Ruby; that is (like SQL
|
57
57
|
for example), \na \"program\" in a Ruby string is passed into some kind of parser/interpreter
|
58
58
|
method.\nIn this case, it is possible to use the result \"as is\" or to convert
|
59
|
-
to an ordinary \nRuby regular expression.\n\
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
59
|
+
to an ordinary \nRuby regular expression.\n\nThis project was originally implemented
|
60
|
+
\"for Ruby, using Ruby.\" Tentative efforts\nare being made for ports to Elixir
|
61
|
+
and Python.\n\nDevelopment on this project resumed in 2019 after being untouched
|
62
|
+
since 2015. As such, \nit is not 100% mature. Syntax and semantics may change. Feel
|
63
|
+
free to offer comments \nor suggestions.\n"
|
64
64
|
email:
|
65
65
|
- rubyhacker@gmail.com
|
66
66
|
executables: []
|
@@ -74,13 +74,23 @@ files:
|
|
74
74
|
- lib/regexador.rb
|
75
75
|
- lib/regexador_parser.rb
|
76
76
|
- lib/regexador_xform.rb
|
77
|
+
- spec/GENERATED_spec.rb
|
78
|
+
- spec/captures.yaml
|
79
|
+
- spec/old_mkcode.rb
|
80
|
+
- spec/oneliners.yaml
|
77
81
|
- spec/parsing_spec.rb
|
82
|
+
- spec/programs.yaml
|
78
83
|
- spec/programs_spec.rb
|
84
|
+
- spec/regexador_spec.rb
|
79
85
|
- spec/testing.rb
|
86
|
+
- test/captures.yaml
|
87
|
+
- test/mkcode.rb
|
88
|
+
- test/oneliners.yaml
|
89
|
+
- test/programs.yaml
|
80
90
|
- test/test.rb
|
81
91
|
homepage: http://github.com/hal9000/regexador
|
82
92
|
licenses:
|
83
|
-
-
|
93
|
+
- Ruby
|
84
94
|
metadata: {}
|
85
95
|
post_install_message:
|
86
96
|
rdoc_options: []
|
@@ -97,13 +107,22 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
97
107
|
- !ruby/object:Gem::Version
|
98
108
|
version: '0'
|
99
109
|
requirements: []
|
100
|
-
|
101
|
-
rubygems_version: 2.2.2
|
110
|
+
rubygems_version: 3.0.4
|
102
111
|
signing_key:
|
103
112
|
specification_version: 4
|
104
113
|
summary: A mini-language to make regular expressions more readable.
|
105
114
|
test_files:
|
115
|
+
- test/captures.yaml
|
116
|
+
- test/mkcode.rb
|
117
|
+
- test/oneliners.yaml
|
118
|
+
- test/programs.yaml
|
106
119
|
- test/test.rb
|
107
|
-
- spec/
|
120
|
+
- spec/GENERATED_spec.rb
|
121
|
+
- spec/captures.yaml
|
122
|
+
- spec/old_mkcode.rb
|
123
|
+
- spec/oneliners.yaml
|
108
124
|
- spec/parsing_spec.rb
|
125
|
+
- spec/programs.yaml
|
109
126
|
- spec/programs_spec.rb
|
127
|
+
- spec/regexador_spec.rb
|
128
|
+
- spec/testing.rb
|