arugula 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -0
- data/Gemfile.lock +1 -1
- data/README.md +3 -0
- data/lib/arugula.rb +12 -0
- data/lib/arugula/match_data.rb +46 -3
- data/lib/arugula/parser.rb +26 -3
- data/lib/arugula/parts.rb +91 -19
- data/lib/arugula/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9b9342abba4049dbc32ce57a0d0b6326f0290d36
|
4
|
+
data.tar.gz: 2ec7ae8575d2267627a7b9bfb7e8da5e296f7567
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 48655a01f947db1e92761c07373eb7590770de16fd88c6a89ae4a0a0c4ec3df0e611a5e43f25796e2ad56e3279922fc01fb95678fcd8d522ef41dc9207e1d265
|
7
|
+
data.tar.gz: 6c35efef0d7555482792a794ac625958520abf76f2c7eb96b81d1499d033c06de297311953fa27a9d221ad0e3b7d6114c36c79de210fd1962caf983c8658da37
|
data/.rubocop.yml
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -25,6 +25,9 @@ Or install it yourself as:
|
|
25
25
|
regexp = Arugula.new('[A-Z][a-z]+')
|
26
26
|
regexp.match?('ahoy! my name is Samuel') # => 17
|
27
27
|
regexp.match?('foobar') # => null
|
28
|
+
|
29
|
+
regexp = Arugula.new('Hello ([a-z]+)!')
|
30
|
+
regexp.match('Hello world!') # => #<MatchData "Hello world!" 1:"world">
|
28
31
|
```
|
29
32
|
|
30
33
|
## Development
|
data/lib/arugula.rb
CHANGED
@@ -19,6 +19,7 @@ class Arugula
|
|
19
19
|
def match(str, index = 0)
|
20
20
|
match_data = MatchData.new(self, str)
|
21
21
|
loop do
|
22
|
+
match_data.reset_captures!
|
22
23
|
match, end_index = @root.match(str, index, match_data)
|
23
24
|
if match
|
24
25
|
match_data.start_index = index
|
@@ -33,4 +34,15 @@ class Arugula
|
|
33
34
|
def to_s
|
34
35
|
"/#{@root}/"
|
35
36
|
end
|
37
|
+
|
38
|
+
alias inspect to_s
|
39
|
+
|
40
|
+
def hash
|
41
|
+
to_s.hash
|
42
|
+
end
|
43
|
+
|
44
|
+
def ==(other)
|
45
|
+
return false unless other.is_a?(Arugula) || other.is_a?(Regexp)
|
46
|
+
inspect == other.inspect
|
47
|
+
end
|
36
48
|
end
|
data/lib/arugula/match_data.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
class Arugula
|
2
2
|
class MatchData
|
3
|
+
attr_reader :string, :regexp
|
3
4
|
def initialize(regexp, string)
|
4
5
|
# require "awesome_print"
|
5
6
|
# ap regexp, raw: true
|
@@ -12,6 +13,12 @@ class Arugula
|
|
12
13
|
@captures[name] = start_index...end_index
|
13
14
|
end
|
14
15
|
|
16
|
+
def reset_captures!
|
17
|
+
@captures.keys.each do |key|
|
18
|
+
@captures[key] = nil
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
15
22
|
attr_accessor :start_index
|
16
23
|
attr_accessor :end_index
|
17
24
|
|
@@ -21,18 +28,54 @@ class Arugula
|
|
21
28
|
|
22
29
|
def inspect
|
23
30
|
captures_part = @captures.map do |name, range|
|
24
|
-
" #{name}:#{@string[range]
|
31
|
+
" #{name}:#{dump_str(range && @string[range])}"
|
25
32
|
end.join
|
26
|
-
"#<MatchData #{to_s
|
33
|
+
"#<MatchData #{dump_str(to_s)}#{captures_part}>"
|
27
34
|
end
|
28
35
|
|
29
36
|
def to_a
|
30
|
-
|
37
|
+
captures.unshift(to_s)
|
38
|
+
end
|
39
|
+
|
40
|
+
def size
|
41
|
+
@captures.size + 1
|
42
|
+
end
|
43
|
+
alias length size
|
44
|
+
|
45
|
+
def captures
|
46
|
+
@captures.map { |_name, range| range && @string[range] }
|
47
|
+
end
|
48
|
+
|
49
|
+
def pre_match
|
50
|
+
return '' if start_index == 0
|
51
|
+
@string[0...start_index]
|
52
|
+
end
|
53
|
+
|
54
|
+
def post_match
|
55
|
+
return '' if end_index == string.size
|
56
|
+
@string[end_index..-1]
|
31
57
|
end
|
32
58
|
|
33
59
|
def freeze
|
34
60
|
@captures.freeze
|
35
61
|
super
|
36
62
|
end
|
63
|
+
|
64
|
+
def hash
|
65
|
+
@string.hash ^ @regexp.hash ^ @captures.hash
|
66
|
+
end
|
67
|
+
|
68
|
+
def ==(other)
|
69
|
+
return false unless other.is_a?(MatchData) || other.is_a?(::MatchData)
|
70
|
+
string == other.string &&
|
71
|
+
regexp == other.regexp &&
|
72
|
+
captures == other.captures
|
73
|
+
end
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
def dump_str(str)
|
78
|
+
str.nil? ? 'nil' : str.dump
|
79
|
+
end
|
37
80
|
end
|
38
81
|
end
|
data/lib/arugula/parser.rb
CHANGED
@@ -30,7 +30,7 @@ class Arugula
|
|
30
30
|
peek = pattern.chr
|
31
31
|
if tok.nil?
|
32
32
|
fail 'shouldnt happen'
|
33
|
-
elsif tok == '['
|
33
|
+
elsif tok == '[' && !characterclass_type?
|
34
34
|
push_part(:characterclass)
|
35
35
|
elsif tok == '-' &&
|
36
36
|
characterclass_type? &&
|
@@ -41,6 +41,10 @@ class Arugula
|
|
41
41
|
pattern.slice!(0)
|
42
42
|
elsif tok == ']' && characterclass_type?
|
43
43
|
pop_part
|
44
|
+
elsif tok == '^' && characterclass_type? && state.parts.empty?
|
45
|
+
characterclass_part = pop_part
|
46
|
+
wrap_state(:not)
|
47
|
+
@states << characterclass_part
|
44
48
|
elsif tok == '$'
|
45
49
|
push_part(:eol)
|
46
50
|
elsif tok == '^'
|
@@ -72,6 +76,24 @@ class Arugula
|
|
72
76
|
wrap_state(:star)
|
73
77
|
elsif tok == '+'
|
74
78
|
wrap_state(:plus)
|
79
|
+
elsif tok == '?'
|
80
|
+
wrap_state(:question)
|
81
|
+
elsif tok == '{'
|
82
|
+
before_comma = ''
|
83
|
+
after_comma = ''
|
84
|
+
until pattern.chr == ',' || pattern.chr == '}'
|
85
|
+
before_comma << pattern.slice!(0)
|
86
|
+
end
|
87
|
+
if pattern.chr == ','
|
88
|
+
pattern.slice!(0)
|
89
|
+
else
|
90
|
+
after_comma = before_comma
|
91
|
+
end
|
92
|
+
after_comma << pattern.slice!(0) until pattern.chr == '}'
|
93
|
+
pattern.slice!(0) if pattern.chr == '}'
|
94
|
+
before = before_comma.empty? ? 0 : before_comma.to_i
|
95
|
+
after = after_comma.empty? ? Float::INFINITY : after_comma.to_i
|
96
|
+
wrap_state(:quantifier, before, after)
|
75
97
|
else
|
76
98
|
push_part(:literal, tok)
|
77
99
|
end
|
@@ -85,8 +107,9 @@ class Arugula
|
|
85
107
|
@states << part unless name == :literal
|
86
108
|
end
|
87
109
|
|
88
|
-
def wrap_state(name)
|
89
|
-
wrapped = Part.all.find { |p| p.type == name }
|
110
|
+
def wrap_state(name, *content)
|
111
|
+
wrapped = Part.all.find { |p| p.type == name }
|
112
|
+
.new(*content, state.parts.pop)
|
90
113
|
state.parts << wrapped
|
91
114
|
@states << wrapped
|
92
115
|
end
|
data/lib/arugula/parts.rb
CHANGED
@@ -21,7 +21,7 @@ class Arugula
|
|
21
21
|
end
|
22
22
|
|
23
23
|
def to_s
|
24
|
-
literal.gsub('\\', '\\\\')
|
24
|
+
literal.gsub('\\', '\\\\').gsub(/[.]/) { |m| "\\#{m}" }
|
25
25
|
end
|
26
26
|
|
27
27
|
def match(str, index, _match_data)
|
@@ -61,8 +61,8 @@ class Arugula
|
|
61
61
|
|
62
62
|
def match(str, index, match_data)
|
63
63
|
parts.each do |part|
|
64
|
-
|
65
|
-
return true, match_index if
|
64
|
+
matches, match_index = part.match(str, index, match_data)
|
65
|
+
return true, match_index if matches
|
66
66
|
end
|
67
67
|
[false, index]
|
68
68
|
end
|
@@ -83,7 +83,14 @@ class Arugula
|
|
83
83
|
class CharacterClassPart < Part
|
84
84
|
include MatchAny
|
85
85
|
def to_s
|
86
|
-
|
86
|
+
parts_string = parts.map do |part|
|
87
|
+
next part unless part.class.type == :literal
|
88
|
+
lit = part.literal
|
89
|
+
lit = '\\]' if lit == ']'
|
90
|
+
lit = '\\[' if lit == '['
|
91
|
+
lit
|
92
|
+
end.join
|
93
|
+
"[#{parts_string}]"
|
87
94
|
end
|
88
95
|
end
|
89
96
|
|
@@ -108,13 +115,17 @@ class Arugula
|
|
108
115
|
d: ->(str, index) { ('0'..'9').member?(str[index]) },
|
109
116
|
s: ->(str, index) { [' ', "\t"].include?(str[index]) },
|
110
117
|
S: ->(str, index) { ![' ', "\t"].include?(str[index]) },
|
118
|
+
z: ->(str, index) { index == str.size },
|
119
|
+
Z: ->(str, index) { str[index..-1] == "\n" || index == str.size },
|
111
120
|
}.freeze
|
112
121
|
|
113
122
|
OFFSETS = begin
|
114
123
|
offsets = {
|
115
|
-
A: 0,
|
124
|
+
A: ->(_str, _index) { 0 },
|
125
|
+
Z: ->(_str, _index) { 0 },
|
126
|
+
z: ->(_str, _index) { 0 },
|
116
127
|
}
|
117
|
-
offsets.default = 1
|
128
|
+
offsets.default = ->(_str, _index) { 1 }
|
118
129
|
offsets.freeze
|
119
130
|
end
|
120
131
|
|
@@ -124,7 +135,7 @@ class Arugula
|
|
124
135
|
|
125
136
|
def match(str, index, _match_data)
|
126
137
|
matches = MATCHERS[@metachar][str, index]
|
127
|
-
[matches, index + (matches ? OFFSETS[@metachar] : 0)]
|
138
|
+
[matches, index + (matches ? OFFSETS[@metachar][str, index] : 0)]
|
128
139
|
end
|
129
140
|
|
130
141
|
def to_s
|
@@ -181,33 +192,93 @@ class Arugula
|
|
181
192
|
end
|
182
193
|
end
|
183
194
|
|
184
|
-
class
|
195
|
+
class NotPart < Part
|
185
196
|
include Wrapping
|
197
|
+
|
186
198
|
def to_s
|
187
|
-
|
199
|
+
@wrapped.to_s.dup.insert(1, '^')
|
200
|
+
end
|
201
|
+
|
202
|
+
def match(str, index, match_data)
|
203
|
+
matches, end_index = wrapped.match(str, index, match_data)
|
204
|
+
[!matches, matches ? index : end_index + 1]
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
module MatchNTimes
|
209
|
+
include Wrapping
|
210
|
+
def initialize(*args, times: 1..1)
|
211
|
+
@times = times
|
212
|
+
super(*args)
|
188
213
|
end
|
189
214
|
|
190
215
|
def match(str, index, match_data)
|
216
|
+
match_count = 0
|
217
|
+
end_index = index
|
218
|
+
|
191
219
|
loop do
|
192
220
|
matches, index = wrapped.match(str, index, match_data)
|
193
|
-
|
221
|
+
if matches
|
222
|
+
end_index = index
|
223
|
+
match_count += 1
|
224
|
+
end
|
225
|
+
break if !matches || match_count > @times.end
|
194
226
|
end
|
227
|
+
|
228
|
+
matches = @times.member?(match_count)
|
229
|
+
[matches, matches ? end_index : index]
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
class StarPart < Part
|
234
|
+
include MatchNTimes
|
235
|
+
def initialize(*args)
|
236
|
+
super(*args, times: 0..Float::INFINITY)
|
237
|
+
end
|
238
|
+
|
239
|
+
def to_s
|
240
|
+
"#{wrapped}*"
|
195
241
|
end
|
196
242
|
end
|
197
243
|
|
198
244
|
class PlusPart < Part
|
199
|
-
include
|
245
|
+
include MatchNTimes
|
246
|
+
def initialize(*args)
|
247
|
+
super(*args, times: 1..Float::INFINITY)
|
248
|
+
end
|
249
|
+
|
200
250
|
def to_s
|
201
251
|
"#{wrapped}+"
|
202
252
|
end
|
253
|
+
end
|
203
254
|
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
255
|
+
class QuestionPart < Part
|
256
|
+
include MatchNTimes
|
257
|
+
def initialize(*args)
|
258
|
+
super(*args, times: 0..1)
|
259
|
+
end
|
260
|
+
|
261
|
+
def to_s
|
262
|
+
"#{wrapped}?"
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
class QuantifierPart < Part
|
267
|
+
include MatchNTimes
|
268
|
+
def initialize(before, after, *args)
|
269
|
+
super(*args, times: before..after)
|
270
|
+
end
|
271
|
+
|
272
|
+
def to_s
|
273
|
+
before = @times.begin
|
274
|
+
after = @times.end
|
275
|
+
quantifier_part = '{'.dup
|
276
|
+
quantifier_part << before.to_s unless before == 0
|
277
|
+
quantifier_part << ',' unless before == after
|
278
|
+
quantifier_part << after.to_s unless before == after ||
|
279
|
+
after == Float::INFINITY
|
280
|
+
quantifier_part << '}'
|
281
|
+
"#{wrapped}#{quantifier_part}"
|
211
282
|
end
|
212
283
|
end
|
213
284
|
|
@@ -217,7 +288,8 @@ class Arugula
|
|
217
288
|
end
|
218
289
|
|
219
290
|
def match(str, index, _match_data)
|
220
|
-
|
291
|
+
char = str[index]
|
292
|
+
matches = char && char != "\n"
|
221
293
|
[matches, index + (matches ? 1 : 0)]
|
222
294
|
end
|
223
295
|
end
|
data/lib/arugula/version.rb
CHANGED