arugula 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f59b134599b696a71d41c30d0bfc84fbab54e2c0
4
- data.tar.gz: d3d49f304685d6f458284a712780a3274bc43b97
3
+ metadata.gz: 9b9342abba4049dbc32ce57a0d0b6326f0290d36
4
+ data.tar.gz: 2ec7ae8575d2267627a7b9bfb7e8da5e296f7567
5
5
  SHA512:
6
- metadata.gz: 6cf01040b54906eaf6c9b4b4e6eff08843ad11fd6c4eb7da26865e068fb627ae9b9defd4382295120874e58bf68c48753f7c8dc80c2c2faf4bff7e1da542f883
7
- data.tar.gz: 83b14f9ecf7a4cf2a8feef267d5599c999575db458bd35f3e08302414d4c16580d5234aa92c1d6e1edf09b544f49fc226fb0fc0b4626f650eaaf419b92a349a6
6
+ metadata.gz: 48655a01f947db1e92761c07373eb7590770de16fd88c6a89ae4a0a0c4ec3df0e611a5e43f25796e2ad56e3279922fc01fb95678fcd8d522ef41dc9207e1d265
7
+ data.tar.gz: 6c35efef0d7555482792a794ac625958520abf76f2c7eb96b81d1499d033c06de297311953fa27a9d221ad0e3b7d6114c36c79de210fd1962caf983c8658da37
@@ -10,6 +10,8 @@ Style/TrailingCommaInLiteral:
10
10
 
11
11
  Metrics/AbcSize:
12
12
  Enabled: false
13
+ Metrics/ClassLength:
14
+ Enabled: false
13
15
  Metrics/CyclomaticComplexity:
14
16
  Enabled: false
15
17
  Metrics/MethodLength:
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- arugula (0.3.0)
4
+ arugula (0.4.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -25,6 +25,9 @@ Or install it yourself as:
25
25
  regexp = Arugula.new('[A-Z][a-z]+')
26
26
  regexp.match?('ahoy! my name is Samuel') # => 17
27
27
  regexp.match?('foobar') # => null
28
+
29
+ regexp = Arugula.new('Hello ([a-z]+)!')
30
+ regexp.match('Hello world!') # => #<MatchData "Hello world!" 1:"world">
28
31
  ```
29
32
 
30
33
  ## Development
@@ -19,6 +19,7 @@ class Arugula
19
19
  def match(str, index = 0)
20
20
  match_data = MatchData.new(self, str)
21
21
  loop do
22
+ match_data.reset_captures!
22
23
  match, end_index = @root.match(str, index, match_data)
23
24
  if match
24
25
  match_data.start_index = index
@@ -33,4 +34,15 @@ class Arugula
33
34
  def to_s
34
35
  "/#{@root}/"
35
36
  end
37
+
38
+ alias inspect to_s
39
+
40
+ def hash
41
+ to_s.hash
42
+ end
43
+
44
+ def ==(other)
45
+ return false unless other.is_a?(Arugula) || other.is_a?(Regexp)
46
+ inspect == other.inspect
47
+ end
36
48
  end
@@ -1,5 +1,6 @@
1
1
  class Arugula
2
2
  class MatchData
3
+ attr_reader :string, :regexp
3
4
  def initialize(regexp, string)
4
5
  # require "awesome_print"
5
6
  # ap regexp, raw: true
@@ -12,6 +13,12 @@ class Arugula
12
13
  @captures[name] = start_index...end_index
13
14
  end
14
15
 
16
+ def reset_captures!
17
+ @captures.keys.each do |key|
18
+ @captures[key] = nil
19
+ end
20
+ end
21
+
15
22
  attr_accessor :start_index
16
23
  attr_accessor :end_index
17
24
 
@@ -21,18 +28,54 @@ class Arugula
21
28
 
22
29
  def inspect
23
30
  captures_part = @captures.map do |name, range|
24
- " #{name}:#{@string[range].dump}"
31
+ " #{name}:#{dump_str(range && @string[range])}"
25
32
  end.join
26
- "#<MatchData #{to_s.dump}#{captures_part}>"
33
+ "#<MatchData #{dump_str(to_s)}#{captures_part}>"
27
34
  end
28
35
 
29
36
  def to_a
30
- @captures.map { |_name, range| @string[range] }.unshift(to_s)
37
+ captures.unshift(to_s)
38
+ end
39
+
40
+ def size
41
+ @captures.size + 1
42
+ end
43
+ alias length size
44
+
45
+ def captures
46
+ @captures.map { |_name, range| range && @string[range] }
47
+ end
48
+
49
+ def pre_match
50
+ return '' if start_index == 0
51
+ @string[0...start_index]
52
+ end
53
+
54
+ def post_match
55
+ return '' if end_index == string.size
56
+ @string[end_index..-1]
31
57
  end
32
58
 
33
59
  def freeze
34
60
  @captures.freeze
35
61
  super
36
62
  end
63
+
64
+ def hash
65
+ @string.hash ^ @regexp.hash ^ @captures.hash
66
+ end
67
+
68
+ def ==(other)
69
+ return false unless other.is_a?(MatchData) || other.is_a?(::MatchData)
70
+ string == other.string &&
71
+ regexp == other.regexp &&
72
+ captures == other.captures
73
+ end
74
+
75
+ private
76
+
77
+ def dump_str(str)
78
+ str.nil? ? 'nil' : str.dump
79
+ end
37
80
  end
38
81
  end
@@ -30,7 +30,7 @@ class Arugula
30
30
  peek = pattern.chr
31
31
  if tok.nil?
32
32
  fail 'shouldnt happen'
33
- elsif tok == '['
33
+ elsif tok == '[' && !characterclass_type?
34
34
  push_part(:characterclass)
35
35
  elsif tok == '-' &&
36
36
  characterclass_type? &&
@@ -41,6 +41,10 @@ class Arugula
41
41
  pattern.slice!(0)
42
42
  elsif tok == ']' && characterclass_type?
43
43
  pop_part
44
+ elsif tok == '^' && characterclass_type? && state.parts.empty?
45
+ characterclass_part = pop_part
46
+ wrap_state(:not)
47
+ @states << characterclass_part
44
48
  elsif tok == '$'
45
49
  push_part(:eol)
46
50
  elsif tok == '^'
@@ -72,6 +76,24 @@ class Arugula
72
76
  wrap_state(:star)
73
77
  elsif tok == '+'
74
78
  wrap_state(:plus)
79
+ elsif tok == '?'
80
+ wrap_state(:question)
81
+ elsif tok == '{'
82
+ before_comma = ''
83
+ after_comma = ''
84
+ until pattern.chr == ',' || pattern.chr == '}'
85
+ before_comma << pattern.slice!(0)
86
+ end
87
+ if pattern.chr == ','
88
+ pattern.slice!(0)
89
+ else
90
+ after_comma = before_comma
91
+ end
92
+ after_comma << pattern.slice!(0) until pattern.chr == '}'
93
+ pattern.slice!(0) if pattern.chr == '}'
94
+ before = before_comma.empty? ? 0 : before_comma.to_i
95
+ after = after_comma.empty? ? Float::INFINITY : after_comma.to_i
96
+ wrap_state(:quantifier, before, after)
75
97
  else
76
98
  push_part(:literal, tok)
77
99
  end
@@ -85,8 +107,9 @@ class Arugula
85
107
  @states << part unless name == :literal
86
108
  end
87
109
 
88
- def wrap_state(name)
89
- wrapped = Part.all.find { |p| p.type == name }.new(state.parts.pop)
110
+ def wrap_state(name, *content)
111
+ wrapped = Part.all.find { |p| p.type == name }
112
+ .new(*content, state.parts.pop)
90
113
  state.parts << wrapped
91
114
  @states << wrapped
92
115
  end
@@ -21,7 +21,7 @@ class Arugula
21
21
  end
22
22
 
23
23
  def to_s
24
- literal.gsub('\\', '\\\\')
24
+ literal.gsub('\\', '\\\\').gsub(/[.]/) { |m| "\\#{m}" }
25
25
  end
26
26
 
27
27
  def match(str, index, _match_data)
@@ -61,8 +61,8 @@ class Arugula
61
61
 
62
62
  def match(str, index, match_data)
63
63
  parts.each do |part|
64
- match, match_index = part.match(str, index, match_data)
65
- return true, match_index if match
64
+ matches, match_index = part.match(str, index, match_data)
65
+ return true, match_index if matches
66
66
  end
67
67
  [false, index]
68
68
  end
@@ -83,7 +83,14 @@ class Arugula
83
83
  class CharacterClassPart < Part
84
84
  include MatchAny
85
85
  def to_s
86
- "[#{parts.join}]"
86
+ parts_string = parts.map do |part|
87
+ next part unless part.class.type == :literal
88
+ lit = part.literal
89
+ lit = '\\]' if lit == ']'
90
+ lit = '\\[' if lit == '['
91
+ lit
92
+ end.join
93
+ "[#{parts_string}]"
87
94
  end
88
95
  end
89
96
 
@@ -108,13 +115,17 @@ class Arugula
108
115
  d: ->(str, index) { ('0'..'9').member?(str[index]) },
109
116
  s: ->(str, index) { [' ', "\t"].include?(str[index]) },
110
117
  S: ->(str, index) { ![' ', "\t"].include?(str[index]) },
118
+ z: ->(str, index) { index == str.size },
119
+ Z: ->(str, index) { str[index..-1] == "\n" || index == str.size },
111
120
  }.freeze
112
121
 
113
122
  OFFSETS = begin
114
123
  offsets = {
115
- A: 0,
124
+ A: ->(_str, _index) { 0 },
125
+ Z: ->(_str, _index) { 0 },
126
+ z: ->(_str, _index) { 0 },
116
127
  }
117
- offsets.default = 1
128
+ offsets.default = ->(_str, _index) { 1 }
118
129
  offsets.freeze
119
130
  end
120
131
 
@@ -124,7 +135,7 @@ class Arugula
124
135
 
125
136
  def match(str, index, _match_data)
126
137
  matches = MATCHERS[@metachar][str, index]
127
- [matches, index + (matches ? OFFSETS[@metachar] : 0)]
138
+ [matches, index + (matches ? OFFSETS[@metachar][str, index] : 0)]
128
139
  end
129
140
 
130
141
  def to_s
@@ -181,33 +192,93 @@ class Arugula
181
192
  end
182
193
  end
183
194
 
184
- class StarPart < Part
195
+ class NotPart < Part
185
196
  include Wrapping
197
+
186
198
  def to_s
187
- "#{wrapped}*"
199
+ @wrapped.to_s.dup.insert(1, '^')
200
+ end
201
+
202
+ def match(str, index, match_data)
203
+ matches, end_index = wrapped.match(str, index, match_data)
204
+ [!matches, matches ? index : end_index + 1]
205
+ end
206
+ end
207
+
208
+ module MatchNTimes
209
+ include Wrapping
210
+ def initialize(*args, times: 1..1)
211
+ @times = times
212
+ super(*args)
188
213
  end
189
214
 
190
215
  def match(str, index, match_data)
216
+ match_count = 0
217
+ end_index = index
218
+
191
219
  loop do
192
220
  matches, index = wrapped.match(str, index, match_data)
193
- return true, index unless matches
221
+ if matches
222
+ end_index = index
223
+ match_count += 1
224
+ end
225
+ break if !matches || match_count > @times.end
194
226
  end
227
+
228
+ matches = @times.member?(match_count)
229
+ [matches, matches ? end_index : index]
230
+ end
231
+ end
232
+
233
+ class StarPart < Part
234
+ include MatchNTimes
235
+ def initialize(*args)
236
+ super(*args, times: 0..Float::INFINITY)
237
+ end
238
+
239
+ def to_s
240
+ "#{wrapped}*"
195
241
  end
196
242
  end
197
243
 
198
244
  class PlusPart < Part
199
- include Wrapping
245
+ include MatchNTimes
246
+ def initialize(*args)
247
+ super(*args, times: 1..Float::INFINITY)
248
+ end
249
+
200
250
  def to_s
201
251
  "#{wrapped}+"
202
252
  end
253
+ end
203
254
 
204
- def match(str, index, match_data)
205
- has_matched = false
206
- loop do
207
- matches, index = wrapped.match(str, index, match_data)
208
- has_matched = true if matches
209
- return has_matched, index unless matches
210
- end
255
+ class QuestionPart < Part
256
+ include MatchNTimes
257
+ def initialize(*args)
258
+ super(*args, times: 0..1)
259
+ end
260
+
261
+ def to_s
262
+ "#{wrapped}?"
263
+ end
264
+ end
265
+
266
+ class QuantifierPart < Part
267
+ include MatchNTimes
268
+ def initialize(before, after, *args)
269
+ super(*args, times: before..after)
270
+ end
271
+
272
+ def to_s
273
+ before = @times.begin
274
+ after = @times.end
275
+ quantifier_part = '{'.dup
276
+ quantifier_part << before.to_s unless before == 0
277
+ quantifier_part << ',' unless before == after
278
+ quantifier_part << after.to_s unless before == after ||
279
+ after == Float::INFINITY
280
+ quantifier_part << '}'
281
+ "#{wrapped}#{quantifier_part}"
211
282
  end
212
283
  end
213
284
 
@@ -217,7 +288,8 @@ class Arugula
217
288
  end
218
289
 
219
290
  def match(str, index, _match_data)
220
- matches = index < str.size
291
+ char = str[index]
292
+ matches = char && char != "\n"
221
293
  [matches, index + (matches ? 1 : 0)]
222
294
  end
223
295
  end
@@ -1,4 +1,4 @@
1
1
  # frozen_string_literal: true
2
2
  class Arugula
3
- VERSION = '0.3.0'.freeze
3
+ VERSION = '0.4.0'.freeze
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arugula
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Samuel Giddins