regexp_parser 2.3.1 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -39,6 +39,9 @@ class Regexp::Parser
39
39
  parse_token(token)
40
40
  end
41
41
 
42
+ # Trigger recursive setting of #nesting_level, which reflects how deep
43
+ # a node is in the tree. Do this at the end to account for tree rewrites.
44
+ root.nesting_level = 0
42
45
  assign_referenced_expressions
43
46
 
44
47
  if block_given?
@@ -286,17 +289,9 @@ class Regexp::Parser
286
289
  def nest(exp)
287
290
  nesting.push(exp)
288
291
  node << exp
289
- update_transplanted_subtree(exp, node)
290
292
  self.node = exp
291
293
  end
292
294
 
293
- # subtrees are transplanted to build Alternations, Intersections, Ranges
294
- def update_transplanted_subtree(exp, new_parent)
295
- exp.nesting_level = new_parent.nesting_level + 1
296
- exp.respond_to?(:each) &&
297
- exp.each { |subexp| update_transplanted_subtree(subexp, exp) }
298
- end
299
-
300
295
  def escape(token)
301
296
  case token.token
302
297
 
@@ -483,7 +478,7 @@ class Regexp::Parser
483
478
  new_token = Regexp::Token.new(
484
479
  :group,
485
480
  :passive,
486
- '', # text
481
+ '', # text (none because this group is implicit)
487
482
  target_node.ts,
488
483
  nil, # te (unused)
489
484
  target_node.level,
@@ -493,66 +488,23 @@ class Regexp::Parser
493
488
  new_group = Group::Passive.new(new_token, active_opts)
494
489
  new_group.implicit = true
495
490
  new_group << target_node
496
- increase_level(target_node)
491
+ increase_group_level(target_node)
497
492
  node.expressions[node.expressions.index(target_node)] = new_group
498
493
  target_node = new_group
499
494
  end
500
495
 
501
- case token.token
502
- when :zero_or_one
503
- target_node.quantify(:zero_or_one, token.text, 0, 1, :greedy)
504
- when :zero_or_one_reluctant
505
- target_node.quantify(:zero_or_one, token.text, 0, 1, :reluctant)
506
- when :zero_or_one_possessive
507
- target_node.quantify(:zero_or_one, token.text, 0, 1, :possessive)
508
-
509
- when :zero_or_more
510
- target_node.quantify(:zero_or_more, token.text, 0, -1, :greedy)
511
- when :zero_or_more_reluctant
512
- target_node.quantify(:zero_or_more, token.text, 0, -1, :reluctant)
513
- when :zero_or_more_possessive
514
- target_node.quantify(:zero_or_more, token.text, 0, -1, :possessive)
515
-
516
- when :one_or_more
517
- target_node.quantify(:one_or_more, token.text, 1, -1, :greedy)
518
- when :one_or_more_reluctant
519
- target_node.quantify(:one_or_more, token.text, 1, -1, :reluctant)
520
- when :one_or_more_possessive
521
- target_node.quantify(:one_or_more, token.text, 1, -1, :possessive)
522
-
523
- when :interval
524
- interval(target_node, token)
525
-
526
- else
496
+ unless token.token =~ /\A(?:zero_or_one|zero_or_more|one_or_more|interval)
497
+ (?:_greedy|_reluctant|_possessive)?\z/x
527
498
  raise UnknownTokenError.new('Quantifier', token)
528
499
  end
500
+
501
+ target_node.quantify(token, active_opts)
529
502
  end
530
503
 
531
- def increase_level(exp)
504
+ def increase_group_level(exp)
532
505
  exp.level += 1
533
- exp.respond_to?(:each) && exp.each { |subexp| increase_level(subexp) }
534
- end
535
-
536
- def interval(target_node, token)
537
- text = token.text
538
- mchr = text[text.length-1].chr =~ /[?+]/ ? text[text.length-1].chr : nil
539
- case mchr
540
- when '?'
541
- range_text = text[0...-1]
542
- mode = :reluctant
543
- when '+'
544
- range_text = text[0...-1]
545
- mode = :possessive
546
- else
547
- range_text = text
548
- mode = :greedy
549
- end
550
-
551
- range = range_text.gsub(/\{|\}/, '').split(',', 2)
552
- min = range[0].empty? ? 0 : range[0]
553
- max = range[1] ? (range[1].empty? ? -1 : range[1]) : min
554
-
555
- target_node.quantify(:interval, text, min.to_i, max.to_i, mode)
506
+ exp.quantifier.level += 1 if exp.quantifier
507
+ exp.terminal? || exp.each { |subexp| increase_group_level(subexp) }
556
508
  end
557
509
 
558
510
  def set(token)
@@ -20,7 +20,7 @@
20
20
  name = data[ts+2..te-2].pack('c*').gsub(/[\^\s_\-]/, '').downcase
21
21
 
22
22
  token = self.class.short_prop_map[name] || self.class.long_prop_map[name]
23
- raise UnknownUnicodePropertyError.new(name) unless token
23
+ validation_error(:property, name) unless token
24
24
 
25
25
  self.emit(type, token.to_sym, text)
26
26
 
@@ -28,13 +28,7 @@
28
28
 
29
29
  comment = ('#' . [^\n]* . '\n'?);
30
30
 
31
- class_name_posix = 'alnum' | 'alpha' | 'blank' |
32
- 'cntrl' | 'digit' | 'graph' |
33
- 'lower' | 'print' | 'punct' |
34
- 'space' | 'upper' | 'xdigit' |
35
- 'word' | 'ascii';
36
-
37
- class_posix = ('[:' . '^'? . class_name_posix . ':]');
31
+ class_posix = ('[:' . '^'? . [^\[\]]* . ':]');
38
32
 
39
33
 
40
34
  # these are not supported in ruby at the moment
@@ -74,8 +68,7 @@
74
68
  quantity_maximum = ',' . (digit+);
75
69
  quantity_range = (digit+) . ',' . (digit+);
76
70
  quantifier_interval = range_open . ( quantity_exact | quantity_minimum |
77
- quantity_maximum | quantity_range ) . range_close .
78
- quantifier_mode?;
71
+ quantity_maximum | quantity_range ) . range_close;
79
72
 
80
73
  quantifiers = quantifier_greedy | quantifier_reluctant |
81
74
  quantifier_possessive | quantifier_interval;
@@ -223,24 +216,28 @@
223
216
  fcall character_set;
224
217
  };
225
218
 
226
- class_posix >(open_bracket, 1) @set_closed @eof(premature_end_error) {
219
+ class_posix >(open_bracket, 1) @set_closed @eof(premature_end_error) {
227
220
  text = copy(data, ts, te)
228
221
 
229
222
  type = :posixclass
230
223
  class_name = text[2..-3]
231
- if class_name[0].chr == '^'
224
+ if class_name[0] == '^'
232
225
  class_name = class_name[1..-1]
233
226
  type = :nonposixclass
234
227
  end
235
228
 
229
+ unless self.class.posix_classes.include?(class_name)
230
+ validation_error(:posix_class, text)
231
+ end
232
+
236
233
  emit(type, class_name.to_sym, text)
237
234
  };
238
235
 
239
236
  # These are not supported in ruby at the moment. Enable them if they are.
240
- # collating_sequence >(open_bracket, 1) @set_closed @eof(premature_end_error) {
237
+ # collating_sequence >(open_bracket, 1) @set_closed @eof(premature_end_error) {
241
238
  # emit(:set, :collation, copy(data, ts, te))
242
239
  # };
243
- # character_equivalent >(open_bracket, 1) @set_closed @eof(premature_end_error) {
240
+ # character_equivalent >(open_bracket, 1) @set_closed @eof(premature_end_error) {
244
241
  # emit(:set, :equivalent, copy(data, ts, te))
245
242
  # };
246
243
 
@@ -323,7 +320,7 @@
323
320
 
324
321
  codepoint_sequence > (escaped_alpha, 6) $eof(premature_end_error) {
325
322
  text = copy(data, ts-1, te)
326
- if text[2].chr == '{'
323
+ if text[2] == '{'
327
324
  emit(:escape, :codepoint_list, text)
328
325
  else
329
326
  emit(:escape, :codepoint, text)
@@ -419,12 +416,12 @@
419
416
 
420
417
  backslash . anchor_char > (backslashed, 3) {
421
418
  case text = copy(data, ts, te)
422
- when '\\A'; emit(:anchor, :bos, text)
423
- when '\\z'; emit(:anchor, :eos, text)
424
- when '\\Z'; emit(:anchor, :eos_ob_eol, text)
425
- when '\\b'; emit(:anchor, :word_boundary, text)
426
- when '\\B'; emit(:anchor, :nonword_boundary, text)
427
- when '\\G'; emit(:anchor, :match_start, text)
419
+ when '\A'; emit(:anchor, :bos, text)
420
+ when '\z'; emit(:anchor, :eos, text)
421
+ when '\Z'; emit(:anchor, :eos_ob_eol, text)
422
+ when '\b'; emit(:anchor, :word_boundary, text)
423
+ when '\B'; emit(:anchor, :nonword_boundary, text)
424
+ when '\G'; emit(:anchor, :match_start, text)
428
425
  end
429
426
  };
430
427
 
@@ -477,7 +474,7 @@
477
474
  group_open . group_options >group_opened {
478
475
  text = copy(data, ts, te)
479
476
  if text[2..-1] =~ /([^\-mixdau:]|^$)|-.*([dau])/
480
- raise InvalidGroupOption.new($1 || "-#{$2}", text)
477
+ validation_error(:group_option, $1 || "-#{$2}", text)
481
478
  end
482
479
  emit_options(text)
483
480
  };
@@ -605,7 +602,7 @@
605
602
  end
606
603
  };
607
604
 
608
- quantifier_interval {
605
+ quantifier_interval {
609
606
  emit(:quantifier, :interval, copy(data, ts, te))
610
607
  };
611
608
 
@@ -686,6 +683,7 @@ class Regexp::Scanner
686
683
  end
687
684
 
688
685
  # Invalid groupOption. Used for inline options.
686
+ # TODO: should become InvalidGroupOptionError in v3.0.0 for consistency
689
687
  class InvalidGroupOption < ValidationError
690
688
  def initialize(option, text)
691
689
  super "Invalid group option #{option} in #{text}"
@@ -706,6 +704,13 @@ class Regexp::Scanner
706
704
  end
707
705
  end
708
706
 
707
+ # The POSIX class name was not recognized by the scanner.
708
+ class UnknownPosixClassError < ValidationError
709
+ def initialize(text)
710
+ super "Unknown POSIX class #{text}"
711
+ end
712
+ end
713
+
709
714
  # Scans the given regular expression text, or Regexp object and collects the
710
715
  # emitted token into an array that gets returned at the end. If a block is
711
716
  # given, it gets called for each emitted token.
@@ -771,6 +776,11 @@ class Regexp::Scanner
771
776
  File.read("#{__dir__}/scanner/properties/#{name}.csv").scan(/(.+),(.+)/).to_h
772
777
  end
773
778
 
779
+ def self.posix_classes
780
+ %w[alnum alpha ascii blank cntrl digit graph
781
+ lower print punct space upper word xdigit]
782
+ end
783
+
774
784
  # Emits an array with the details of the scanned pattern
775
785
  def emit(type, token, text)
776
786
  #puts "EMIT: type: #{type}, token: #{token}, text: #{text}, ts: #{ts}, te: #{te}"
@@ -873,15 +883,16 @@ class Regexp::Scanner
873
883
 
874
884
  # Centralizes and unifies the handling of validation related
875
885
  # errors.
876
- def validation_error(type, what, reason)
877
- case type
878
- when :group
879
- error = InvalidGroupError.new(what, reason)
880
- when :backref
881
- error = InvalidBackrefError.new(what, reason)
882
- when :sequence
883
- error = InvalidSequenceError.new(what, reason)
884
- end
886
+ def validation_error(type, what, reason = nil)
887
+ error =
888
+ case type
889
+ when :backref then InvalidBackrefError.new(what, reason)
890
+ when :group then InvalidGroupError.new(what, reason)
891
+ when :group_option then InvalidGroupOption.new(what, reason)
892
+ when :posix_class then UnknownPosixClassError.new(what)
893
+ when :property then UnknownUnicodePropertyError.new(what)
894
+ when :sequence then InvalidSequenceError.new(what, reason)
895
+ end
885
896
 
886
897
  raise error # unless @@config.validation_ignore
887
898
  end