arugula 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -0
- data/Gemfile.lock +1 -1
- data/README.md +3 -0
- data/lib/arugula.rb +12 -0
- data/lib/arugula/match_data.rb +46 -3
- data/lib/arugula/parser.rb +26 -3
- data/lib/arugula/parts.rb +91 -19
- data/lib/arugula/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9b9342abba4049dbc32ce57a0d0b6326f0290d36
|
4
|
+
data.tar.gz: 2ec7ae8575d2267627a7b9bfb7e8da5e296f7567
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 48655a01f947db1e92761c07373eb7590770de16fd88c6a89ae4a0a0c4ec3df0e611a5e43f25796e2ad56e3279922fc01fb95678fcd8d522ef41dc9207e1d265
|
7
|
+
data.tar.gz: 6c35efef0d7555482792a794ac625958520abf76f2c7eb96b81d1499d033c06de297311953fa27a9d221ad0e3b7d6114c36c79de210fd1962caf983c8658da37
|
data/.rubocop.yml
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -25,6 +25,9 @@ Or install it yourself as:
|
|
25
25
|
regexp = Arugula.new('[A-Z][a-z]+')
|
26
26
|
regexp.match?('ahoy! my name is Samuel') # => 17
|
27
27
|
regexp.match?('foobar') # => null
|
28
|
+
|
29
|
+
regexp = Arugula.new('Hello ([a-z]+)!')
|
30
|
+
regexp.match('Hello world!') # => #<MatchData "Hello world!" 1:"world">
|
28
31
|
```
|
29
32
|
|
30
33
|
## Development
|
data/lib/arugula.rb
CHANGED
@@ -19,6 +19,7 @@ class Arugula
|
|
19
19
|
def match(str, index = 0)
|
20
20
|
match_data = MatchData.new(self, str)
|
21
21
|
loop do
|
22
|
+
match_data.reset_captures!
|
22
23
|
match, end_index = @root.match(str, index, match_data)
|
23
24
|
if match
|
24
25
|
match_data.start_index = index
|
@@ -33,4 +34,15 @@ class Arugula
|
|
33
34
|
def to_s
|
34
35
|
"/#{@root}/"
|
35
36
|
end
|
37
|
+
|
38
|
+
alias inspect to_s
|
39
|
+
|
40
|
+
def hash
|
41
|
+
to_s.hash
|
42
|
+
end
|
43
|
+
|
44
|
+
def ==(other)
|
45
|
+
return false unless other.is_a?(Arugula) || other.is_a?(Regexp)
|
46
|
+
inspect == other.inspect
|
47
|
+
end
|
36
48
|
end
|
data/lib/arugula/match_data.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
class Arugula
|
2
2
|
class MatchData
|
3
|
+
attr_reader :string, :regexp
|
3
4
|
def initialize(regexp, string)
|
4
5
|
# require "awesome_print"
|
5
6
|
# ap regexp, raw: true
|
@@ -12,6 +13,12 @@ class Arugula
|
|
12
13
|
@captures[name] = start_index...end_index
|
13
14
|
end
|
14
15
|
|
16
|
+
def reset_captures!
|
17
|
+
@captures.keys.each do |key|
|
18
|
+
@captures[key] = nil
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
15
22
|
attr_accessor :start_index
|
16
23
|
attr_accessor :end_index
|
17
24
|
|
@@ -21,18 +28,54 @@ class Arugula
|
|
21
28
|
|
22
29
|
def inspect
|
23
30
|
captures_part = @captures.map do |name, range|
|
24
|
-
" #{name}:#{@string[range]
|
31
|
+
" #{name}:#{dump_str(range && @string[range])}"
|
25
32
|
end.join
|
26
|
-
"#<MatchData #{to_s
|
33
|
+
"#<MatchData #{dump_str(to_s)}#{captures_part}>"
|
27
34
|
end
|
28
35
|
|
29
36
|
def to_a
|
30
|
-
|
37
|
+
captures.unshift(to_s)
|
38
|
+
end
|
39
|
+
|
40
|
+
def size
|
41
|
+
@captures.size + 1
|
42
|
+
end
|
43
|
+
alias length size
|
44
|
+
|
45
|
+
def captures
|
46
|
+
@captures.map { |_name, range| range && @string[range] }
|
47
|
+
end
|
48
|
+
|
49
|
+
def pre_match
|
50
|
+
return '' if start_index == 0
|
51
|
+
@string[0...start_index]
|
52
|
+
end
|
53
|
+
|
54
|
+
def post_match
|
55
|
+
return '' if end_index == string.size
|
56
|
+
@string[end_index..-1]
|
31
57
|
end
|
32
58
|
|
33
59
|
def freeze
|
34
60
|
@captures.freeze
|
35
61
|
super
|
36
62
|
end
|
63
|
+
|
64
|
+
def hash
|
65
|
+
@string.hash ^ @regexp.hash ^ @captures.hash
|
66
|
+
end
|
67
|
+
|
68
|
+
def ==(other)
|
69
|
+
return false unless other.is_a?(MatchData) || other.is_a?(::MatchData)
|
70
|
+
string == other.string &&
|
71
|
+
regexp == other.regexp &&
|
72
|
+
captures == other.captures
|
73
|
+
end
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
def dump_str(str)
|
78
|
+
str.nil? ? 'nil' : str.dump
|
79
|
+
end
|
37
80
|
end
|
38
81
|
end
|
data/lib/arugula/parser.rb
CHANGED
@@ -30,7 +30,7 @@ class Arugula
|
|
30
30
|
peek = pattern.chr
|
31
31
|
if tok.nil?
|
32
32
|
fail 'shouldnt happen'
|
33
|
-
elsif tok == '['
|
33
|
+
elsif tok == '[' && !characterclass_type?
|
34
34
|
push_part(:characterclass)
|
35
35
|
elsif tok == '-' &&
|
36
36
|
characterclass_type? &&
|
@@ -41,6 +41,10 @@ class Arugula
|
|
41
41
|
pattern.slice!(0)
|
42
42
|
elsif tok == ']' && characterclass_type?
|
43
43
|
pop_part
|
44
|
+
elsif tok == '^' && characterclass_type? && state.parts.empty?
|
45
|
+
characterclass_part = pop_part
|
46
|
+
wrap_state(:not)
|
47
|
+
@states << characterclass_part
|
44
48
|
elsif tok == '$'
|
45
49
|
push_part(:eol)
|
46
50
|
elsif tok == '^'
|
@@ -72,6 +76,24 @@ class Arugula
|
|
72
76
|
wrap_state(:star)
|
73
77
|
elsif tok == '+'
|
74
78
|
wrap_state(:plus)
|
79
|
+
elsif tok == '?'
|
80
|
+
wrap_state(:question)
|
81
|
+
elsif tok == '{'
|
82
|
+
before_comma = ''
|
83
|
+
after_comma = ''
|
84
|
+
until pattern.chr == ',' || pattern.chr == '}'
|
85
|
+
before_comma << pattern.slice!(0)
|
86
|
+
end
|
87
|
+
if pattern.chr == ','
|
88
|
+
pattern.slice!(0)
|
89
|
+
else
|
90
|
+
after_comma = before_comma
|
91
|
+
end
|
92
|
+
after_comma << pattern.slice!(0) until pattern.chr == '}'
|
93
|
+
pattern.slice!(0) if pattern.chr == '}'
|
94
|
+
before = before_comma.empty? ? 0 : before_comma.to_i
|
95
|
+
after = after_comma.empty? ? Float::INFINITY : after_comma.to_i
|
96
|
+
wrap_state(:quantifier, before, after)
|
75
97
|
else
|
76
98
|
push_part(:literal, tok)
|
77
99
|
end
|
@@ -85,8 +107,9 @@ class Arugula
|
|
85
107
|
@states << part unless name == :literal
|
86
108
|
end
|
87
109
|
|
88
|
-
def wrap_state(name)
|
89
|
-
wrapped = Part.all.find { |p| p.type == name }
|
110
|
+
def wrap_state(name, *content)
|
111
|
+
wrapped = Part.all.find { |p| p.type == name }
|
112
|
+
.new(*content, state.parts.pop)
|
90
113
|
state.parts << wrapped
|
91
114
|
@states << wrapped
|
92
115
|
end
|
data/lib/arugula/parts.rb
CHANGED
@@ -21,7 +21,7 @@ class Arugula
|
|
21
21
|
end
|
22
22
|
|
23
23
|
def to_s
|
24
|
-
literal.gsub('\\', '\\\\')
|
24
|
+
literal.gsub('\\', '\\\\').gsub(/[.]/) { |m| "\\#{m}" }
|
25
25
|
end
|
26
26
|
|
27
27
|
def match(str, index, _match_data)
|
@@ -61,8 +61,8 @@ class Arugula
|
|
61
61
|
|
62
62
|
def match(str, index, match_data)
|
63
63
|
parts.each do |part|
|
64
|
-
|
65
|
-
return true, match_index if
|
64
|
+
matches, match_index = part.match(str, index, match_data)
|
65
|
+
return true, match_index if matches
|
66
66
|
end
|
67
67
|
[false, index]
|
68
68
|
end
|
@@ -83,7 +83,14 @@ class Arugula
|
|
83
83
|
class CharacterClassPart < Part
|
84
84
|
include MatchAny
|
85
85
|
def to_s
|
86
|
-
|
86
|
+
parts_string = parts.map do |part|
|
87
|
+
next part unless part.class.type == :literal
|
88
|
+
lit = part.literal
|
89
|
+
lit = '\\]' if lit == ']'
|
90
|
+
lit = '\\[' if lit == '['
|
91
|
+
lit
|
92
|
+
end.join
|
93
|
+
"[#{parts_string}]"
|
87
94
|
end
|
88
95
|
end
|
89
96
|
|
@@ -108,13 +115,17 @@ class Arugula
|
|
108
115
|
d: ->(str, index) { ('0'..'9').member?(str[index]) },
|
109
116
|
s: ->(str, index) { [' ', "\t"].include?(str[index]) },
|
110
117
|
S: ->(str, index) { ![' ', "\t"].include?(str[index]) },
|
118
|
+
z: ->(str, index) { index == str.size },
|
119
|
+
Z: ->(str, index) { str[index..-1] == "\n" || index == str.size },
|
111
120
|
}.freeze
|
112
121
|
|
113
122
|
OFFSETS = begin
|
114
123
|
offsets = {
|
115
|
-
A: 0,
|
124
|
+
A: ->(_str, _index) { 0 },
|
125
|
+
Z: ->(_str, _index) { 0 },
|
126
|
+
z: ->(_str, _index) { 0 },
|
116
127
|
}
|
117
|
-
offsets.default = 1
|
128
|
+
offsets.default = ->(_str, _index) { 1 }
|
118
129
|
offsets.freeze
|
119
130
|
end
|
120
131
|
|
@@ -124,7 +135,7 @@ class Arugula
|
|
124
135
|
|
125
136
|
def match(str, index, _match_data)
|
126
137
|
matches = MATCHERS[@metachar][str, index]
|
127
|
-
[matches, index + (matches ? OFFSETS[@metachar] : 0)]
|
138
|
+
[matches, index + (matches ? OFFSETS[@metachar][str, index] : 0)]
|
128
139
|
end
|
129
140
|
|
130
141
|
def to_s
|
@@ -181,33 +192,93 @@ class Arugula
|
|
181
192
|
end
|
182
193
|
end
|
183
194
|
|
184
|
-
class
|
195
|
+
class NotPart < Part
|
185
196
|
include Wrapping
|
197
|
+
|
186
198
|
def to_s
|
187
|
-
|
199
|
+
@wrapped.to_s.dup.insert(1, '^')
|
200
|
+
end
|
201
|
+
|
202
|
+
def match(str, index, match_data)
|
203
|
+
matches, end_index = wrapped.match(str, index, match_data)
|
204
|
+
[!matches, matches ? index : end_index + 1]
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
module MatchNTimes
|
209
|
+
include Wrapping
|
210
|
+
def initialize(*args, times: 1..1)
|
211
|
+
@times = times
|
212
|
+
super(*args)
|
188
213
|
end
|
189
214
|
|
190
215
|
def match(str, index, match_data)
|
216
|
+
match_count = 0
|
217
|
+
end_index = index
|
218
|
+
|
191
219
|
loop do
|
192
220
|
matches, index = wrapped.match(str, index, match_data)
|
193
|
-
|
221
|
+
if matches
|
222
|
+
end_index = index
|
223
|
+
match_count += 1
|
224
|
+
end
|
225
|
+
break if !matches || match_count > @times.end
|
194
226
|
end
|
227
|
+
|
228
|
+
matches = @times.member?(match_count)
|
229
|
+
[matches, matches ? end_index : index]
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
class StarPart < Part
|
234
|
+
include MatchNTimes
|
235
|
+
def initialize(*args)
|
236
|
+
super(*args, times: 0..Float::INFINITY)
|
237
|
+
end
|
238
|
+
|
239
|
+
def to_s
|
240
|
+
"#{wrapped}*"
|
195
241
|
end
|
196
242
|
end
|
197
243
|
|
198
244
|
class PlusPart < Part
|
199
|
-
include
|
245
|
+
include MatchNTimes
|
246
|
+
def initialize(*args)
|
247
|
+
super(*args, times: 1..Float::INFINITY)
|
248
|
+
end
|
249
|
+
|
200
250
|
def to_s
|
201
251
|
"#{wrapped}+"
|
202
252
|
end
|
253
|
+
end
|
203
254
|
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
255
|
+
class QuestionPart < Part
|
256
|
+
include MatchNTimes
|
257
|
+
def initialize(*args)
|
258
|
+
super(*args, times: 0..1)
|
259
|
+
end
|
260
|
+
|
261
|
+
def to_s
|
262
|
+
"#{wrapped}?"
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
class QuantifierPart < Part
|
267
|
+
include MatchNTimes
|
268
|
+
def initialize(before, after, *args)
|
269
|
+
super(*args, times: before..after)
|
270
|
+
end
|
271
|
+
|
272
|
+
def to_s
|
273
|
+
before = @times.begin
|
274
|
+
after = @times.end
|
275
|
+
quantifier_part = '{'.dup
|
276
|
+
quantifier_part << before.to_s unless before == 0
|
277
|
+
quantifier_part << ',' unless before == after
|
278
|
+
quantifier_part << after.to_s unless before == after ||
|
279
|
+
after == Float::INFINITY
|
280
|
+
quantifier_part << '}'
|
281
|
+
"#{wrapped}#{quantifier_part}"
|
211
282
|
end
|
212
283
|
end
|
213
284
|
|
@@ -217,7 +288,8 @@ class Arugula
|
|
217
288
|
end
|
218
289
|
|
219
290
|
def match(str, index, _match_data)
|
220
|
-
|
291
|
+
char = str[index]
|
292
|
+
matches = char && char != "\n"
|
221
293
|
[matches, index + (matches ? 1 : 0)]
|
222
294
|
end
|
223
295
|
end
|
data/lib/arugula/version.rb
CHANGED