regexp_parser 2.1.1 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +94 -6
  3. data/Gemfile +2 -1
  4. data/LICENSE +1 -1
  5. data/README.md +40 -30
  6. data/Rakefile +6 -70
  7. data/lib/regexp_parser/error.rb +1 -1
  8. data/lib/regexp_parser/expression/base.rb +75 -0
  9. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  10. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +1 -0
  11. data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
  12. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -2
  13. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +2 -2
  14. data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
  15. data/lib/regexp_parser/expression/classes/conditional.rb +2 -2
  16. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +13 -7
  17. data/lib/regexp_parser/expression/classes/free_space.rb +1 -3
  18. data/lib/regexp_parser/expression/classes/group.rb +6 -6
  19. data/lib/regexp_parser/expression/classes/keep.rb +2 -0
  20. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  21. data/lib/regexp_parser/expression/classes/root.rb +3 -6
  22. data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +1 -2
  23. data/lib/regexp_parser/expression/methods/construct.rb +43 -0
  24. data/lib/regexp_parser/expression/methods/match_length.rb +1 -1
  25. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  26. data/lib/regexp_parser/expression/methods/tests.rb +10 -1
  27. data/lib/regexp_parser/expression/quantifier.rb +41 -23
  28. data/lib/regexp_parser/expression/sequence.rb +9 -24
  29. data/lib/regexp_parser/expression/sequence_operation.rb +2 -2
  30. data/lib/regexp_parser/expression/shared.rb +85 -0
  31. data/lib/regexp_parser/expression/subexpression.rb +11 -8
  32. data/lib/regexp_parser/expression.rb +10 -132
  33. data/lib/regexp_parser/lexer.rb +8 -6
  34. data/lib/regexp_parser/parser.rb +21 -72
  35. data/lib/regexp_parser/scanner/properties/long.csv +622 -0
  36. data/lib/regexp_parser/scanner/properties/short.csv +246 -0
  37. data/lib/regexp_parser/scanner/property.rl +1 -1
  38. data/lib/regexp_parser/scanner/scanner.rl +48 -35
  39. data/lib/regexp_parser/scanner.rb +735 -801
  40. data/lib/regexp_parser/syntax/any.rb +2 -7
  41. data/lib/regexp_parser/syntax/base.rb +91 -66
  42. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  43. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  44. data/lib/regexp_parser/syntax/token/backreference.rb +30 -0
  45. data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
  46. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  47. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  48. data/lib/regexp_parser/syntax/token/escape.rb +31 -0
  49. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  50. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  51. data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
  52. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  53. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  54. data/lib/regexp_parser/syntax/token/unicode_property.rb +717 -0
  55. data/lib/regexp_parser/syntax/token.rb +45 -0
  56. data/lib/regexp_parser/syntax/version_lookup.rb +20 -29
  57. data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
  58. data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
  59. data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
  60. data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
  61. data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
  62. data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
  63. data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
  64. data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
  65. data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
  66. data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
  67. data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
  68. data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
  69. data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
  70. data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
  71. data/lib/regexp_parser/syntax/versions.rb +1 -1
  72. data/lib/regexp_parser/syntax.rb +1 -1
  73. data/lib/regexp_parser/token.rb +9 -20
  74. data/lib/regexp_parser/version.rb +1 -1
  75. data/lib/regexp_parser.rb +0 -2
  76. data/regexp_parser.gemspec +20 -22
  77. metadata +37 -166
  78. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  79. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  80. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  81. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  82. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  83. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  84. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  85. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  86. data/spec/expression/base_spec.rb +0 -104
  87. data/spec/expression/clone_spec.rb +0 -152
  88. data/spec/expression/conditional_spec.rb +0 -89
  89. data/spec/expression/free_space_spec.rb +0 -27
  90. data/spec/expression/methods/match_length_spec.rb +0 -161
  91. data/spec/expression/methods/match_spec.rb +0 -25
  92. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  93. data/spec/expression/methods/tests_spec.rb +0 -99
  94. data/spec/expression/methods/traverse_spec.rb +0 -161
  95. data/spec/expression/options_spec.rb +0 -128
  96. data/spec/expression/subexpression_spec.rb +0 -50
  97. data/spec/expression/to_h_spec.rb +0 -26
  98. data/spec/expression/to_s_spec.rb +0 -108
  99. data/spec/lexer/all_spec.rb +0 -22
  100. data/spec/lexer/conditionals_spec.rb +0 -53
  101. data/spec/lexer/delimiters_spec.rb +0 -68
  102. data/spec/lexer/escapes_spec.rb +0 -14
  103. data/spec/lexer/keep_spec.rb +0 -10
  104. data/spec/lexer/literals_spec.rb +0 -64
  105. data/spec/lexer/nesting_spec.rb +0 -99
  106. data/spec/lexer/refcalls_spec.rb +0 -60
  107. data/spec/parser/all_spec.rb +0 -43
  108. data/spec/parser/alternation_spec.rb +0 -88
  109. data/spec/parser/anchors_spec.rb +0 -17
  110. data/spec/parser/conditionals_spec.rb +0 -179
  111. data/spec/parser/errors_spec.rb +0 -30
  112. data/spec/parser/escapes_spec.rb +0 -121
  113. data/spec/parser/free_space_spec.rb +0 -130
  114. data/spec/parser/groups_spec.rb +0 -108
  115. data/spec/parser/keep_spec.rb +0 -6
  116. data/spec/parser/options_spec.rb +0 -28
  117. data/spec/parser/posix_classes_spec.rb +0 -8
  118. data/spec/parser/properties_spec.rb +0 -115
  119. data/spec/parser/quantifiers_spec.rb +0 -68
  120. data/spec/parser/refcalls_spec.rb +0 -117
  121. data/spec/parser/set/intersections_spec.rb +0 -127
  122. data/spec/parser/set/ranges_spec.rb +0 -111
  123. data/spec/parser/sets_spec.rb +0 -178
  124. data/spec/parser/types_spec.rb +0 -18
  125. data/spec/scanner/all_spec.rb +0 -18
  126. data/spec/scanner/anchors_spec.rb +0 -21
  127. data/spec/scanner/conditionals_spec.rb +0 -128
  128. data/spec/scanner/delimiters_spec.rb +0 -52
  129. data/spec/scanner/errors_spec.rb +0 -67
  130. data/spec/scanner/escapes_spec.rb +0 -64
  131. data/spec/scanner/free_space_spec.rb +0 -165
  132. data/spec/scanner/groups_spec.rb +0 -61
  133. data/spec/scanner/keep_spec.rb +0 -10
  134. data/spec/scanner/literals_spec.rb +0 -39
  135. data/spec/scanner/meta_spec.rb +0 -18
  136. data/spec/scanner/options_spec.rb +0 -36
  137. data/spec/scanner/properties_spec.rb +0 -64
  138. data/spec/scanner/quantifiers_spec.rb +0 -25
  139. data/spec/scanner/refcalls_spec.rb +0 -55
  140. data/spec/scanner/sets_spec.rb +0 -151
  141. data/spec/scanner/types_spec.rb +0 -14
  142. data/spec/spec_helper.rb +0 -16
  143. data/spec/support/runner.rb +0 -42
  144. data/spec/support/shared_examples.rb +0 -77
  145. data/spec/support/warning_extractor.rb +0 -60
  146. data/spec/syntax/syntax_spec.rb +0 -48
  147. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  148. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  149. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  150. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  151. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  152. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  153. data/spec/syntax/versions/aliases_spec.rb +0 -37
  154. data/spec/token/token_spec.rb +0 -85
@@ -23,7 +23,7 @@ class Regexp::Parser
23
23
  end
24
24
 
25
25
  def parse(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
26
- root = Root.build(extract_options(input, options))
26
+ root = Root.construct(options: extract_options(input, options))
27
27
 
28
28
  self.root = root
29
29
  self.node = root
@@ -39,6 +39,9 @@ class Regexp::Parser
39
39
  parse_token(token)
40
40
  end
41
41
 
42
+ # Trigger recursive setting of #nesting_level, which reflects how deep
43
+ # a node is in the tree. Do this at the end to account for tree rewrites.
44
+ root.nesting_level = 0
42
45
  assign_referenced_expressions
43
46
 
44
47
  if block_given?
@@ -197,11 +200,11 @@ class Regexp::Parser
197
200
  end
198
201
 
199
202
  def captured_group_count_at_level
200
- captured_group_counts[node.level]
203
+ captured_group_counts[node]
201
204
  end
202
205
 
203
206
  def count_captured_group
204
- captured_group_counts[node.level] += 1
207
+ captured_group_counts[node] += 1
205
208
  end
206
209
 
207
210
  def close_group
@@ -286,17 +289,9 @@ class Regexp::Parser
286
289
  def nest(exp)
287
290
  nesting.push(exp)
288
291
  node << exp
289
- update_transplanted_subtree(exp, node)
290
292
  self.node = exp
291
293
  end
292
294
 
293
- # subtrees are transplanted to build Alternations, Intersections, Ranges
294
- def update_transplanted_subtree(exp, new_parent)
295
- exp.nesting_level = new_parent.nesting_level + 1
296
- exp.respond_to?(:each) &&
297
- exp.each { |subexp| update_transplanted_subtree(subexp, exp) }
298
- end
299
-
300
295
  def escape(token)
301
296
  case token.token
302
297
 
@@ -480,79 +475,33 @@ class Regexp::Parser
480
475
  # description of the problem: https://github.com/ammar/regexp_parser/issues/3
481
476
  # rationale for this solution: https://github.com/ammar/regexp_parser/pull/69
482
477
  if target_node.quantified?
483
- new_token = Regexp::Token.new(
484
- :group,
485
- :passive,
486
- '', # text
487
- target_node.ts,
488
- nil, # te (unused)
489
- target_node.level,
490
- target_node.set_level,
491
- target_node.conditional_level
478
+ new_group = Group::Passive.construct(
479
+ token: :passive,
480
+ ts: target_node.ts,
481
+ level: target_node.level,
482
+ set_level: target_node.set_level,
483
+ conditional_level: target_node.conditional_level,
484
+ options: active_opts,
492
485
  )
493
- new_group = Group::Passive.new(new_token, active_opts)
494
486
  new_group.implicit = true
495
487
  new_group << target_node
496
- increase_level(target_node)
488
+ increase_group_level(target_node)
497
489
  node.expressions[node.expressions.index(target_node)] = new_group
498
490
  target_node = new_group
499
491
  end
500
492
 
501
- case token.token
502
- when :zero_or_one
503
- target_node.quantify(:zero_or_one, token.text, 0, 1, :greedy)
504
- when :zero_or_one_reluctant
505
- target_node.quantify(:zero_or_one, token.text, 0, 1, :reluctant)
506
- when :zero_or_one_possessive
507
- target_node.quantify(:zero_or_one, token.text, 0, 1, :possessive)
508
-
509
- when :zero_or_more
510
- target_node.quantify(:zero_or_more, token.text, 0, -1, :greedy)
511
- when :zero_or_more_reluctant
512
- target_node.quantify(:zero_or_more, token.text, 0, -1, :reluctant)
513
- when :zero_or_more_possessive
514
- target_node.quantify(:zero_or_more, token.text, 0, -1, :possessive)
515
-
516
- when :one_or_more
517
- target_node.quantify(:one_or_more, token.text, 1, -1, :greedy)
518
- when :one_or_more_reluctant
519
- target_node.quantify(:one_or_more, token.text, 1, -1, :reluctant)
520
- when :one_or_more_possessive
521
- target_node.quantify(:one_or_more, token.text, 1, -1, :possessive)
522
-
523
- when :interval
524
- interval(target_node, token)
525
-
526
- else
493
+ unless token.token =~ /\A(?:zero_or_one|zero_or_more|one_or_more|interval)
494
+ (?:_greedy|_reluctant|_possessive)?\z/x
527
495
  raise UnknownTokenError.new('Quantifier', token)
528
496
  end
497
+
498
+ target_node.quantify(token, active_opts)
529
499
  end
530
500
 
531
- def increase_level(exp)
501
+ def increase_group_level(exp)
532
502
  exp.level += 1
533
- exp.respond_to?(:each) && exp.each { |subexp| increase_level(subexp) }
534
- end
535
-
536
- def interval(target_node, token)
537
- text = token.text
538
- mchr = text[text.length-1].chr =~ /[?+]/ ? text[text.length-1].chr : nil
539
- case mchr
540
- when '?'
541
- range_text = text[0...-1]
542
- mode = :reluctant
543
- when '+'
544
- range_text = text[0...-1]
545
- mode = :possessive
546
- else
547
- range_text = text
548
- mode = :greedy
549
- end
550
-
551
- range = range_text.gsub(/\{|\}/, '').split(',', 2)
552
- min = range[0].empty? ? 0 : range[0]
553
- max = range[1] ? (range[1].empty? ? -1 : range[1]) : min
554
-
555
- target_node.quantify(:interval, text, min.to_i, max.to_i, mode)
503
+ exp.quantifier.level += 1 if exp.quantifier
504
+ exp.terminal? || exp.each { |subexp| increase_group_level(subexp) }
556
505
  end
557
506
 
558
507
  def set(token)