regexp_parser 1.7.0 → 2.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +364 -22
  3. data/Gemfile +8 -2
  4. data/LICENSE +1 -1
  5. data/README.md +124 -88
  6. data/Rakefile +6 -70
  7. data/lib/regexp_parser/error.rb +4 -0
  8. data/lib/regexp_parser/expression/base.rb +76 -0
  9. data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
  10. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  11. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +22 -2
  12. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +4 -8
  13. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +3 -4
  14. data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
  15. data/lib/regexp_parser/expression/classes/conditional.rb +11 -5
  16. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +15 -7
  17. data/lib/regexp_parser/expression/classes/free_space.rb +5 -5
  18. data/lib/regexp_parser/expression/classes/group.rb +28 -15
  19. data/lib/regexp_parser/expression/classes/keep.rb +2 -0
  20. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  21. data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
  22. data/lib/regexp_parser/expression/classes/root.rb +4 -19
  23. data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +5 -3
  24. data/lib/regexp_parser/expression/methods/construct.rb +41 -0
  25. data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
  26. data/lib/regexp_parser/expression/methods/match_length.rb +11 -7
  27. data/lib/regexp_parser/expression/methods/parts.rb +23 -0
  28. data/lib/regexp_parser/expression/methods/printing.rb +26 -0
  29. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  30. data/lib/regexp_parser/expression/methods/tests.rb +47 -1
  31. data/lib/regexp_parser/expression/methods/traverse.rb +34 -18
  32. data/lib/regexp_parser/expression/quantifier.rb +57 -17
  33. data/lib/regexp_parser/expression/sequence.rb +11 -47
  34. data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
  35. data/lib/regexp_parser/expression/shared.rb +111 -0
  36. data/lib/regexp_parser/expression/subexpression.rb +27 -19
  37. data/lib/regexp_parser/expression.rb +14 -141
  38. data/lib/regexp_parser/lexer.rb +83 -41
  39. data/lib/regexp_parser/parser.rb +371 -429
  40. data/lib/regexp_parser/scanner/char_type.rl +11 -11
  41. data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
  42. data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
  43. data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
  44. data/lib/regexp_parser/scanner/properties/long.csv +633 -0
  45. data/lib/regexp_parser/scanner/properties/short.csv +248 -0
  46. data/lib/regexp_parser/scanner/property.rl +4 -4
  47. data/lib/regexp_parser/scanner/scanner.rl +295 -368
  48. data/lib/regexp_parser/scanner.rb +1405 -1674
  49. data/lib/regexp_parser/syntax/any.rb +2 -7
  50. data/lib/regexp_parser/syntax/base.rb +92 -67
  51. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  52. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  53. data/lib/regexp_parser/syntax/token/backreference.rb +33 -0
  54. data/lib/regexp_parser/syntax/token/character_set.rb +16 -0
  55. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  56. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  57. data/lib/regexp_parser/syntax/token/escape.rb +33 -0
  58. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  59. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  60. data/lib/regexp_parser/syntax/token/meta.rb +20 -0
  61. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  62. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  63. data/lib/regexp_parser/syntax/token/unicode_property.rb +733 -0
  64. data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
  65. data/lib/regexp_parser/syntax/token.rb +45 -0
  66. data/lib/regexp_parser/syntax/version_lookup.rb +19 -36
  67. data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
  68. data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
  69. data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
  70. data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
  71. data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
  72. data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
  73. data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
  74. data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
  75. data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
  76. data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
  77. data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
  78. data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
  79. data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
  80. data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
  81. data/lib/regexp_parser/syntax/versions.rb +3 -1
  82. data/lib/regexp_parser/syntax.rb +8 -6
  83. data/lib/regexp_parser/token.rb +9 -20
  84. data/lib/regexp_parser/version.rb +1 -1
  85. data/lib/regexp_parser.rb +0 -2
  86. data/regexp_parser.gemspec +20 -22
  87. metadata +49 -166
  88. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  89. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  90. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  91. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  92. data/lib/regexp_parser/syntax/tokens/character_set.rb +0 -13
  93. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  94. data/lib/regexp_parser/syntax/tokens/meta.rb +0 -13
  95. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  96. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  97. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  98. data/spec/expression/base_spec.rb +0 -94
  99. data/spec/expression/clone_spec.rb +0 -120
  100. data/spec/expression/conditional_spec.rb +0 -89
  101. data/spec/expression/free_space_spec.rb +0 -27
  102. data/spec/expression/methods/match_length_spec.rb +0 -161
  103. data/spec/expression/methods/match_spec.rb +0 -25
  104. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  105. data/spec/expression/methods/tests_spec.rb +0 -99
  106. data/spec/expression/methods/traverse_spec.rb +0 -161
  107. data/spec/expression/options_spec.rb +0 -128
  108. data/spec/expression/root_spec.rb +0 -9
  109. data/spec/expression/sequence_spec.rb +0 -9
  110. data/spec/expression/subexpression_spec.rb +0 -50
  111. data/spec/expression/to_h_spec.rb +0 -26
  112. data/spec/expression/to_s_spec.rb +0 -100
  113. data/spec/lexer/all_spec.rb +0 -22
  114. data/spec/lexer/conditionals_spec.rb +0 -53
  115. data/spec/lexer/escapes_spec.rb +0 -14
  116. data/spec/lexer/keep_spec.rb +0 -10
  117. data/spec/lexer/literals_spec.rb +0 -89
  118. data/spec/lexer/nesting_spec.rb +0 -99
  119. data/spec/lexer/refcalls_spec.rb +0 -55
  120. data/spec/parser/all_spec.rb +0 -43
  121. data/spec/parser/alternation_spec.rb +0 -88
  122. data/spec/parser/anchors_spec.rb +0 -17
  123. data/spec/parser/conditionals_spec.rb +0 -179
  124. data/spec/parser/errors_spec.rb +0 -30
  125. data/spec/parser/escapes_spec.rb +0 -121
  126. data/spec/parser/free_space_spec.rb +0 -130
  127. data/spec/parser/groups_spec.rb +0 -108
  128. data/spec/parser/keep_spec.rb +0 -6
  129. data/spec/parser/posix_classes_spec.rb +0 -8
  130. data/spec/parser/properties_spec.rb +0 -115
  131. data/spec/parser/quantifiers_spec.rb +0 -51
  132. data/spec/parser/refcalls_spec.rb +0 -112
  133. data/spec/parser/set/intersections_spec.rb +0 -127
  134. data/spec/parser/set/ranges_spec.rb +0 -111
  135. data/spec/parser/sets_spec.rb +0 -178
  136. data/spec/parser/types_spec.rb +0 -18
  137. data/spec/scanner/all_spec.rb +0 -18
  138. data/spec/scanner/anchors_spec.rb +0 -21
  139. data/spec/scanner/conditionals_spec.rb +0 -128
  140. data/spec/scanner/errors_spec.rb +0 -68
  141. data/spec/scanner/escapes_spec.rb +0 -53
  142. data/spec/scanner/free_space_spec.rb +0 -133
  143. data/spec/scanner/groups_spec.rb +0 -52
  144. data/spec/scanner/keep_spec.rb +0 -10
  145. data/spec/scanner/literals_spec.rb +0 -49
  146. data/spec/scanner/meta_spec.rb +0 -18
  147. data/spec/scanner/properties_spec.rb +0 -64
  148. data/spec/scanner/quantifiers_spec.rb +0 -20
  149. data/spec/scanner/refcalls_spec.rb +0 -36
  150. data/spec/scanner/sets_spec.rb +0 -102
  151. data/spec/scanner/types_spec.rb +0 -14
  152. data/spec/spec_helper.rb +0 -15
  153. data/spec/support/runner.rb +0 -42
  154. data/spec/support/shared_examples.rb +0 -77
  155. data/spec/support/warning_extractor.rb +0 -60
  156. data/spec/syntax/syntax_spec.rb +0 -48
  157. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  158. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  159. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  160. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  161. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  162. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  163. data/spec/syntax/versions/aliases_spec.rb +0 -37
  164. data/spec/token/token_spec.rb +0 -85
  165. /data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
@@ -1,26 +1,20 @@
1
1
  module Regexp::Expression
2
+ # TODO: in v3.0.0, maybe put Shared back into Base, and inherit from Base and
3
+ # call super in #initialize, but raise in #quantifier= and #quantify,
4
+ # or introduce an Expression::Quantifiable intermediate class.
5
+ # Or actually allow chaining as a more concise but tricky solution than PR#69.
2
6
  class Quantifier
3
- MODES = [:greedy, :possessive, :reluctant]
7
+ include Regexp::Expression::Shared
4
8
 
5
- attr_reader :token, :text, :min, :max, :mode
9
+ MODES = %i[greedy possessive reluctant]
6
10
 
7
- def initialize(token, text, min, max, mode)
8
- @token = token
9
- @text = text
10
- @mode = mode
11
- @min = min
12
- @max = max
13
- end
14
-
15
- def initialize_clone(orig)
16
- @text = orig.text.dup
17
- super
18
- end
11
+ def initialize(*args)
12
+ deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
19
13
 
20
- def to_s
21
- text.dup
14
+ init_from_token_and_options(*args)
15
+ # TODO: remove in v3.0.0, stop removing parts of #token (?)
16
+ self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
22
17
  end
23
- alias :to_str :to_s
24
18
 
25
19
  def to_h
26
20
  {
@@ -40,5 +34,51 @@ module Regexp::Expression
40
34
  RUBY
41
35
  end
42
36
  alias :lazy? :reluctant?
37
+
38
+ def min
39
+ derived_data[:min]
40
+ end
41
+
42
+ def max
43
+ derived_data[:max]
44
+ end
45
+
46
+ def mode
47
+ derived_data[:mode]
48
+ end
49
+
50
+ private
51
+
52
+ def deprecated_old_init(token, text, _min, _max, _mode = :greedy)
53
+ warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
54
+ "is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
55
+ "Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode` "\
56
+ "with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode "\
57
+ "will be derived automatically.\n"\
58
+ "Or do `exp.quantifier = #{self.class}.construct(token: token, text: str)`.\n"\
59
+ "This is consistent with how Expression::Base instances are created. "
60
+ @token = token
61
+ @text = text
62
+ end
63
+
64
+ def derived_data
65
+ @derived_data ||= begin
66
+ min, max =
67
+ case text[0]
68
+ when '?'; [0, 1]
69
+ when '*'; [0, -1]
70
+ when '+'; [1, -1]
71
+ else
72
+ int_min = text[/\{(\d*)/, 1]
73
+ int_max = text[/,?(\d*)\}/, 1]
74
+ [int_min.to_i, (int_max.empty? ? -1 : int_max.to_i)]
75
+ end
76
+
77
+ mod = text[/.([?+])/, 1]
78
+ mode = (mod == '?' && :reluctant) || (mod == '+' && :possessive) || :greedy
79
+
80
+ { min: min, max: max, mode: mode }
81
+ end
82
+ end
43
83
  end
44
84
  end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  # A sequence of expressions. Differs from a Subexpressions by how it handles
4
3
  # quantifiers, as it applies them to its last element instead of itself as
5
4
  # a whole subexpression.
@@ -7,61 +6,26 @@ module Regexp::Expression
7
6
  # Used as the base class for the Alternation alternatives, Conditional
8
7
  # branches, and CharacterSet::Intersection intersected sequences.
9
8
  class Sequence < Regexp::Expression::Subexpression
10
- # TODO: this override is here for backwards compatibility, remove in 2.0.0
11
- def initialize(*args)
12
- if args.count == 3
13
- warn('WARNING: Sequence.new without a Regexp::Token argument is '\
14
- 'deprecated and will be removed in 2.0.0.')
15
- return self.class.at_levels(*args)
16
- end
17
- super
18
- end
19
-
20
9
  class << self
21
- def add_to(subexpression, params = {}, active_opts = {})
22
- sequence = at_levels(
23
- subexpression.level,
24
- subexpression.set_level,
25
- params[:conditional_level] || subexpression.conditional_level
10
+ def add_to(exp, params = {}, active_opts = {})
11
+ sequence = construct(
12
+ level: exp.level,
13
+ set_level: exp.set_level,
14
+ conditional_level: params[:conditional_level] || exp.conditional_level,
15
+ ts: params[:ts],
26
16
  )
27
- sequence.nesting_level = subexpression.nesting_level + 1
28
17
  sequence.options = active_opts
29
- subexpression.expressions << sequence
18
+ exp.expressions << sequence
30
19
  sequence
31
20
  end
32
-
33
- def at_levels(level, set_level, conditional_level)
34
- token = Regexp::Token.new(
35
- :expression,
36
- :sequence,
37
- '',
38
- nil, # ts
39
- nil, # te
40
- level,
41
- set_level,
42
- conditional_level
43
- )
44
- new(token)
45
- end
46
21
  end
47
22
 
48
- def starts_at
49
- expressions.first.starts_at
23
+ def ts
24
+ (head = expressions.first) ? head.ts : @ts
50
25
  end
51
- alias :ts :starts_at
52
-
53
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
54
- offset = -1
55
- target = expressions[offset]
56
- while target.is_a?(FreeSpace)
57
- target = expressions[offset -= 1]
58
- end
59
26
 
60
- target || raise(ArgumentError, "No valid target found for '#{text}' "\
61
- 'quantifier')
62
-
63
- target.quantify(token, text, min, max, mode)
27
+ def quantify(token, *args)
28
+ extract_quantifier_target(token.text).quantify(token, *args)
64
29
  end
65
30
  end
66
-
67
31
  end
@@ -5,21 +5,16 @@ module Regexp::Expression
5
5
  alias :operands :expressions
6
6
  alias :operator :text
7
7
 
8
- def starts_at
9
- expressions.first.starts_at
8
+ def ts
9
+ (head = expressions.first) ? head.ts : @ts
10
10
  end
11
- alias :ts :starts_at
12
11
 
13
12
  def <<(exp)
14
13
  expressions.last << exp
15
14
  end
16
15
 
17
- def add_sequence(active_opts = {})
18
- self.class::OPERAND.add_to(self, {}, active_opts)
19
- end
20
-
21
- def to_s(format = :full)
22
- sequences.map { |e| e.to_s(format) }.join(text)
16
+ def add_sequence(active_opts = {}, params = { ts: 0 })
17
+ self.class::OPERAND.add_to(self, params, active_opts)
23
18
  end
24
19
  end
25
20
  end
@@ -0,0 +1,111 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ module ClassMethods; end # filled in ./methods/*.rb
4
+
5
+ def self.included(mod)
6
+ mod.class_eval do
7
+ extend Shared::ClassMethods
8
+
9
+ attr_accessor :type, :token, :text, :ts, :te,
10
+ :level, :set_level, :conditional_level,
11
+ :options, :parent,
12
+ :custom_to_s_handling, :pre_quantifier_decorations
13
+
14
+ attr_reader :nesting_level, :quantifier
15
+ end
16
+ end
17
+
18
+ def init_from_token_and_options(token, options = {})
19
+ self.type = token.type
20
+ self.token = token.token
21
+ self.text = token.text
22
+ self.ts = token.ts
23
+ self.te = token.te
24
+ self.level = token.level
25
+ self.set_level = token.set_level
26
+ self.conditional_level = token.conditional_level
27
+ self.nesting_level = 0
28
+ self.options = options || {}
29
+ end
30
+ private :init_from_token_and_options
31
+
32
+ def initialize_copy(orig)
33
+ self.text = orig.text.dup if orig.text
34
+ self.options = orig.options.dup if orig.options
35
+ self.quantifier = orig.quantifier.clone if orig.quantifier
36
+ self.parent = nil # updated by Subexpression#initialize_copy
37
+ if orig.pre_quantifier_decorations
38
+ self.pre_quantifier_decorations = orig.pre_quantifier_decorations.map(&:dup)
39
+ end
40
+ super
41
+ end
42
+
43
+ def starts_at
44
+ ts
45
+ end
46
+
47
+ def ends_at(include_quantifier = true)
48
+ ts + (include_quantifier ? full_length : base_length)
49
+ end
50
+
51
+ def base_length
52
+ to_s(:base).length
53
+ end
54
+
55
+ def full_length
56
+ to_s(:original).length
57
+ end
58
+
59
+ # #to_s reproduces the original source, as an unparser would.
60
+ #
61
+ # It takes an optional format argument.
62
+ #
63
+ # Example:
64
+ #
65
+ # lit = Regexp::Parser.parse(/a +/x)[0]
66
+ #
67
+ # lit.to_s # => 'a+' # default; with quantifier
68
+ # lit.to_s(:full) # => 'a+' # default; with quantifier
69
+ # lit.to_s(:base) # => 'a' # without quantifier
70
+ # lit.to_s(:original) # => 'a +' # with quantifier AND intermittent decorations
71
+ #
72
+ def to_s(format = :full)
73
+ base = parts.each_with_object(''.dup) do |part, buff|
74
+ if part.instance_of?(String)
75
+ buff << part
76
+ elsif !part.custom_to_s_handling
77
+ buff << part.to_s(:original)
78
+ end
79
+ end
80
+ "#{base}#{pre_quantifier_decoration(format)}#{quantifier_affix(format)}"
81
+ end
82
+ alias :to_str :to_s
83
+
84
+ def pre_quantifier_decoration(expression_format = :original)
85
+ pre_quantifier_decorations.to_a.join if expression_format == :original
86
+ end
87
+
88
+ def quantifier_affix(expression_format = :full)
89
+ quantifier.to_s if quantified? && expression_format != :base
90
+ end
91
+
92
+ def offset
93
+ [starts_at, full_length]
94
+ end
95
+
96
+ def coded_offset
97
+ '@%d+%d' % offset
98
+ end
99
+
100
+ def nesting_level=(lvl)
101
+ @nesting_level = lvl
102
+ quantifier && quantifier.nesting_level = lvl
103
+ terminal? || each { |subexp| subexp.nesting_level = lvl + 1 }
104
+ end
105
+
106
+ def quantifier=(qtf)
107
+ @quantifier = qtf
108
+ @repetitions = nil # clear memoized value
109
+ end
110
+ end
111
+ end
@@ -1,29 +1,25 @@
1
1
  module Regexp::Expression
2
-
3
2
  class Subexpression < Regexp::Expression::Base
4
3
  include Enumerable
5
4
 
6
5
  attr_accessor :expressions
7
6
 
8
7
  def initialize(token, options = {})
9
- super
10
-
11
8
  self.expressions = []
9
+ super
12
10
  end
13
11
 
14
12
  # Override base method to clone the expressions as well.
15
- def initialize_clone(orig)
16
- self.expressions = orig.expressions.map(&:clone)
13
+ def initialize_copy(orig)
14
+ self.expressions = orig.expressions.map do |exp|
15
+ exp.clone.tap { |copy| copy.parent = self }
16
+ end
17
17
  super
18
18
  end
19
19
 
20
20
  def <<(exp)
21
- if exp.is_a?(WhiteSpace) && last && last.is_a?(WhiteSpace)
22
- last.merge(exp)
23
- else
24
- exp.nesting_level = nesting_level + 1
25
- expressions << exp
26
- end
21
+ exp.parent = self
22
+ expressions << exp
27
23
  end
28
24
 
29
25
  %w[[] at each empty? fetch index join last length values_at].each do |method|
@@ -41,19 +37,31 @@ module Regexp::Expression
41
37
  end
42
38
 
43
39
  def te
44
- ts + to_s.length
45
- end
46
-
47
- def to_s(format = :full)
48
- # Note: the format does not get passed down to subexpressions.
49
- "#{expressions.join}#{quantifier_affix(format)}"
40
+ ts + base_length
50
41
  end
51
42
 
52
43
  def to_h
53
- attributes.merge({
44
+ attributes.merge(
54
45
  text: to_s(:base),
55
46
  expressions: expressions.map(&:to_h)
56
- })
47
+ )
48
+ end
49
+
50
+ def extract_quantifier_target(quantifier_description)
51
+ pre_quantifier_decorations = []
52
+ target = expressions.reverse.find do |exp|
53
+ if exp.decorative?
54
+ exp.custom_to_s_handling = true
55
+ pre_quantifier_decorations << exp.text
56
+ next
57
+ end
58
+ exp
59
+ end
60
+ target or raise Regexp::Parser::ParserError,
61
+ "No valid target found for '#{quantifier_description}' quantifier"
62
+
63
+ target.pre_quantifier_decorations = pre_quantifier_decorations
64
+ target
57
65
  end
58
66
  end
59
67
  end
@@ -1,138 +1,7 @@
1
- module Regexp::Expression
2
-
3
- class Base
4
- attr_accessor :type, :token
5
- attr_accessor :text, :ts
6
- attr_accessor :level, :set_level, :conditional_level, :nesting_level
7
-
8
- attr_accessor :quantifier
9
- attr_accessor :options
10
-
11
- def initialize(token, options = {})
12
- self.type = token.type
13
- self.token = token.token
14
- self.text = token.text
15
- self.ts = token.ts
16
- self.level = token.level
17
- self.set_level = token.set_level
18
- self.conditional_level = token.conditional_level
19
- self.nesting_level = 0
20
- self.quantifier = nil
21
- self.options = options
22
- end
23
-
24
- def initialize_clone(orig)
25
- self.text = (orig.text ? orig.text.dup : nil)
26
- self.options = (orig.options ? orig.options.dup : nil)
27
- self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
28
- super
29
- end
30
-
31
- def to_re(format = :full)
32
- ::Regexp.new(to_s(format))
33
- end
34
-
35
- alias :starts_at :ts
36
-
37
- def full_length
38
- to_s.length
39
- end
40
-
41
- def offset
42
- [starts_at, full_length]
43
- end
44
-
45
- def coded_offset
46
- '@%d+%d' % offset
47
- end
48
-
49
- def to_s(format = :full)
50
- "#{text}#{quantifier_affix(format)}"
51
- end
52
-
53
- def quantifier_affix(expression_format)
54
- quantifier.to_s if quantified? && expression_format != :base
55
- end
56
-
57
- def terminal?
58
- !respond_to?(:expressions)
59
- end
60
-
61
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
62
- self.quantifier = Quantifier.new(token, text, min, max, mode)
63
- end
64
-
65
- def unquantified_clone
66
- clone.tap { |exp| exp.quantifier = nil }
67
- end
68
-
69
- def quantified?
70
- !quantifier.nil?
71
- end
72
-
73
- # Deprecated. Prefer `#repetitions` which has a more uniform interface.
74
- def quantity
75
- return [nil,nil] unless quantified?
76
- [quantifier.min, quantifier.max]
77
- end
78
-
79
- def repetitions
80
- return 1..1 unless quantified?
81
- min = quantifier.min
82
- max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
83
- # fix Range#minmax - https://bugs.ruby-lang.org/issues/15807
84
- (min..max).tap { |r| r.define_singleton_method(:minmax) { [min, max] } }
85
- end
86
-
87
- def greedy?
88
- quantified? and quantifier.greedy?
89
- end
90
-
91
- def reluctant?
92
- quantified? and quantifier.reluctant?
93
- end
94
- alias :lazy? :reluctant?
95
-
96
- def possessive?
97
- quantified? and quantifier.possessive?
98
- end
99
-
100
- def attributes
101
- {
102
- type: type,
103
- token: token,
104
- text: to_s(:base),
105
- starts_at: ts,
106
- length: full_length,
107
- level: level,
108
- set_level: set_level,
109
- conditional_level: conditional_level,
110
- options: options,
111
- quantifier: quantified? ? quantifier.to_h : nil,
112
- }
113
- end
114
- alias :to_h :attributes
115
- end
116
-
117
- def self.parsed(exp)
118
- warn('WARNING: Regexp::Expression::Base.parsed is buggy and '\
119
- 'will be removed in 2.0.0. Use Regexp::Parser.parse instead.')
120
- case exp
121
- when String
122
- Regexp::Parser.parse(exp)
123
- when Regexp
124
- Regexp::Parser.parse(exp.source) # <- causes loss of root options
125
- when Regexp::Expression # <- never triggers
126
- exp
127
- else
128
- raise ArgumentError, 'Expression.parsed accepts a String, Regexp, or '\
129
- 'a Regexp::Expression as a value for exp, but it '\
130
- "was given #{exp.class.name}."
131
- end
132
- end
133
-
134
- end # module Regexp::Expression
1
+ require 'regexp_parser/error'
135
2
 
3
+ require 'regexp_parser/expression/shared'
4
+ require 'regexp_parser/expression/base'
136
5
  require 'regexp_parser/expression/quantifier'
137
6
  require 'regexp_parser/expression/subexpression'
138
7
  require 'regexp_parser/expression/sequence'
@@ -140,24 +9,28 @@ require 'regexp_parser/expression/sequence_operation'
140
9
 
141
10
  require 'regexp_parser/expression/classes/alternation'
142
11
  require 'regexp_parser/expression/classes/anchor'
143
- require 'regexp_parser/expression/classes/backref'
12
+ require 'regexp_parser/expression/classes/backreference'
13
+ require 'regexp_parser/expression/classes/character_set'
14
+ require 'regexp_parser/expression/classes/character_set/intersection'
15
+ require 'regexp_parser/expression/classes/character_set/range'
16
+ require 'regexp_parser/expression/classes/character_type'
144
17
  require 'regexp_parser/expression/classes/conditional'
145
- require 'regexp_parser/expression/classes/escape'
18
+ require 'regexp_parser/expression/classes/escape_sequence'
146
19
  require 'regexp_parser/expression/classes/free_space'
147
20
  require 'regexp_parser/expression/classes/group'
148
21
  require 'regexp_parser/expression/classes/keep'
149
22
  require 'regexp_parser/expression/classes/literal'
150
23
  require 'regexp_parser/expression/classes/posix_class'
151
- require 'regexp_parser/expression/classes/property'
152
24
  require 'regexp_parser/expression/classes/root'
153
- require 'regexp_parser/expression/classes/set'
154
- require 'regexp_parser/expression/classes/set/intersection'
155
- require 'regexp_parser/expression/classes/set/range'
156
- require 'regexp_parser/expression/classes/type'
25
+ require 'regexp_parser/expression/classes/unicode_property'
157
26
 
27
+ require 'regexp_parser/expression/methods/construct'
28
+ require 'regexp_parser/expression/methods/human_name'
158
29
  require 'regexp_parser/expression/methods/match'
159
30
  require 'regexp_parser/expression/methods/match_length'
160
31
  require 'regexp_parser/expression/methods/options'
32
+ require 'regexp_parser/expression/methods/parts'
33
+ require 'regexp_parser/expression/methods/printing'
161
34
  require 'regexp_parser/expression/methods/strfregexp'
162
35
  require 'regexp_parser/expression/methods/tests'
163
36
  require 'regexp_parser/expression/methods/traverse'