regexp_parser 2.1.1 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +94 -6
  3. data/Gemfile +2 -1
  4. data/LICENSE +1 -1
  5. data/README.md +40 -30
  6. data/Rakefile +6 -70
  7. data/lib/regexp_parser/error.rb +1 -1
  8. data/lib/regexp_parser/expression/base.rb +75 -0
  9. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  10. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +1 -0
  11. data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
  12. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -2
  13. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +2 -2
  14. data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
  15. data/lib/regexp_parser/expression/classes/conditional.rb +2 -2
  16. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +13 -7
  17. data/lib/regexp_parser/expression/classes/free_space.rb +1 -3
  18. data/lib/regexp_parser/expression/classes/group.rb +6 -6
  19. data/lib/regexp_parser/expression/classes/keep.rb +2 -0
  20. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  21. data/lib/regexp_parser/expression/classes/root.rb +3 -6
  22. data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +1 -2
  23. data/lib/regexp_parser/expression/methods/construct.rb +43 -0
  24. data/lib/regexp_parser/expression/methods/match_length.rb +1 -1
  25. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  26. data/lib/regexp_parser/expression/methods/tests.rb +10 -1
  27. data/lib/regexp_parser/expression/quantifier.rb +41 -23
  28. data/lib/regexp_parser/expression/sequence.rb +9 -24
  29. data/lib/regexp_parser/expression/sequence_operation.rb +2 -2
  30. data/lib/regexp_parser/expression/shared.rb +85 -0
  31. data/lib/regexp_parser/expression/subexpression.rb +11 -8
  32. data/lib/regexp_parser/expression.rb +10 -132
  33. data/lib/regexp_parser/lexer.rb +8 -6
  34. data/lib/regexp_parser/parser.rb +21 -72
  35. data/lib/regexp_parser/scanner/properties/long.csv +622 -0
  36. data/lib/regexp_parser/scanner/properties/short.csv +246 -0
  37. data/lib/regexp_parser/scanner/property.rl +1 -1
  38. data/lib/regexp_parser/scanner/scanner.rl +48 -35
  39. data/lib/regexp_parser/scanner.rb +735 -801
  40. data/lib/regexp_parser/syntax/any.rb +2 -7
  41. data/lib/regexp_parser/syntax/base.rb +91 -66
  42. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  43. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  44. data/lib/regexp_parser/syntax/token/backreference.rb +30 -0
  45. data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
  46. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  47. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  48. data/lib/regexp_parser/syntax/token/escape.rb +31 -0
  49. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  50. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  51. data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
  52. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  53. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  54. data/lib/regexp_parser/syntax/token/unicode_property.rb +717 -0
  55. data/lib/regexp_parser/syntax/token.rb +45 -0
  56. data/lib/regexp_parser/syntax/version_lookup.rb +20 -29
  57. data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
  58. data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
  59. data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
  60. data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
  61. data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
  62. data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
  63. data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
  64. data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
  65. data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
  66. data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
  67. data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
  68. data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
  69. data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
  70. data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
  71. data/lib/regexp_parser/syntax/versions.rb +1 -1
  72. data/lib/regexp_parser/syntax.rb +1 -1
  73. data/lib/regexp_parser/token.rb +9 -20
  74. data/lib/regexp_parser/version.rb +1 -1
  75. data/lib/regexp_parser.rb +0 -2
  76. data/regexp_parser.gemspec +20 -22
  77. metadata +37 -166
  78. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  79. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  80. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  81. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  82. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  83. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  84. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  85. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  86. data/spec/expression/base_spec.rb +0 -104
  87. data/spec/expression/clone_spec.rb +0 -152
  88. data/spec/expression/conditional_spec.rb +0 -89
  89. data/spec/expression/free_space_spec.rb +0 -27
  90. data/spec/expression/methods/match_length_spec.rb +0 -161
  91. data/spec/expression/methods/match_spec.rb +0 -25
  92. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  93. data/spec/expression/methods/tests_spec.rb +0 -99
  94. data/spec/expression/methods/traverse_spec.rb +0 -161
  95. data/spec/expression/options_spec.rb +0 -128
  96. data/spec/expression/subexpression_spec.rb +0 -50
  97. data/spec/expression/to_h_spec.rb +0 -26
  98. data/spec/expression/to_s_spec.rb +0 -108
  99. data/spec/lexer/all_spec.rb +0 -22
  100. data/spec/lexer/conditionals_spec.rb +0 -53
  101. data/spec/lexer/delimiters_spec.rb +0 -68
  102. data/spec/lexer/escapes_spec.rb +0 -14
  103. data/spec/lexer/keep_spec.rb +0 -10
  104. data/spec/lexer/literals_spec.rb +0 -64
  105. data/spec/lexer/nesting_spec.rb +0 -99
  106. data/spec/lexer/refcalls_spec.rb +0 -60
  107. data/spec/parser/all_spec.rb +0 -43
  108. data/spec/parser/alternation_spec.rb +0 -88
  109. data/spec/parser/anchors_spec.rb +0 -17
  110. data/spec/parser/conditionals_spec.rb +0 -179
  111. data/spec/parser/errors_spec.rb +0 -30
  112. data/spec/parser/escapes_spec.rb +0 -121
  113. data/spec/parser/free_space_spec.rb +0 -130
  114. data/spec/parser/groups_spec.rb +0 -108
  115. data/spec/parser/keep_spec.rb +0 -6
  116. data/spec/parser/options_spec.rb +0 -28
  117. data/spec/parser/posix_classes_spec.rb +0 -8
  118. data/spec/parser/properties_spec.rb +0 -115
  119. data/spec/parser/quantifiers_spec.rb +0 -68
  120. data/spec/parser/refcalls_spec.rb +0 -117
  121. data/spec/parser/set/intersections_spec.rb +0 -127
  122. data/spec/parser/set/ranges_spec.rb +0 -111
  123. data/spec/parser/sets_spec.rb +0 -178
  124. data/spec/parser/types_spec.rb +0 -18
  125. data/spec/scanner/all_spec.rb +0 -18
  126. data/spec/scanner/anchors_spec.rb +0 -21
  127. data/spec/scanner/conditionals_spec.rb +0 -128
  128. data/spec/scanner/delimiters_spec.rb +0 -52
  129. data/spec/scanner/errors_spec.rb +0 -67
  130. data/spec/scanner/escapes_spec.rb +0 -64
  131. data/spec/scanner/free_space_spec.rb +0 -165
  132. data/spec/scanner/groups_spec.rb +0 -61
  133. data/spec/scanner/keep_spec.rb +0 -10
  134. data/spec/scanner/literals_spec.rb +0 -39
  135. data/spec/scanner/meta_spec.rb +0 -18
  136. data/spec/scanner/options_spec.rb +0 -36
  137. data/spec/scanner/properties_spec.rb +0 -64
  138. data/spec/scanner/quantifiers_spec.rb +0 -25
  139. data/spec/scanner/refcalls_spec.rb +0 -55
  140. data/spec/scanner/sets_spec.rb +0 -151
  141. data/spec/scanner/types_spec.rb +0 -14
  142. data/spec/spec_helper.rb +0 -16
  143. data/spec/support/runner.rb +0 -42
  144. data/spec/support/shared_examples.rb +0 -77
  145. data/spec/support/warning_extractor.rb +0 -60
  146. data/spec/syntax/syntax_spec.rb +0 -48
  147. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  148. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  149. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  150. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  151. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  152. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  153. data/spec/syntax/versions/aliases_spec.rb +0 -37
  154. data/spec/token/token_spec.rb +0 -85
@@ -1,7 +1,3 @@
1
1
  module Regexp::Expression
2
-
3
- class Literal < Regexp::Expression::Base
4
- # Obviously nothing special here, yet.
5
- end
6
-
2
+ class Literal < Regexp::Expression::Base; end
7
3
  end
@@ -1,12 +1,9 @@
1
1
  module Regexp::Expression
2
-
3
2
  class Root < Regexp::Expression::Subexpression
4
3
  def self.build(options = {})
5
- new(build_token, options)
6
- end
7
-
8
- def self.build_token
9
- Regexp::Token.new(:expression, :root, '', 0)
4
+ warn "`#{self.class}.build(options)` is deprecated and will raise in "\
5
+ "regexp_parser v3.0.0. Please use `.construct(options: options)`."
6
+ construct(options: options)
10
7
  end
11
8
  end
12
9
  end
@@ -1,5 +1,5 @@
1
1
  module Regexp::Expression
2
-
2
+ # TODO: unify name with token :property, on way or the other, in v3.0.0
3
3
  module UnicodeProperty
4
4
  class Base < Regexp::Expression::Base
5
5
  def negative?
@@ -116,5 +116,4 @@ module Regexp::Expression
116
116
  class Script < UnicodeProperty::Base; end
117
117
  class Block < UnicodeProperty::Base; end
118
118
  end
119
-
120
119
  end # module Regexp::Expression
@@ -0,0 +1,43 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ module ClassMethods
4
+ # Convenience method to init a valid Expression without a Regexp::Token
5
+ def construct(params = {})
6
+ attrs = construct_defaults.merge(params)
7
+ options = attrs.delete(:options)
8
+ token_args = Regexp::TOKEN_KEYS.map { |k| attrs.delete(k) }
9
+ token = Regexp::Token.new(*token_args)
10
+ raise ArgumentError, "unsupported attribute(s): #{attrs}" if attrs.any?
11
+
12
+ new(token, options)
13
+ end
14
+
15
+ def construct_defaults
16
+ if self == Root
17
+ { type: :expression, token: :root, ts: 0 }
18
+ elsif self < Sequence
19
+ { type: :expression, token: :sequence }
20
+ else
21
+ { type: token_class::Type }
22
+ end.merge(level: 0, set_level: 0, conditional_level: 0, text: '')
23
+ end
24
+
25
+ def token_class
26
+ if self == Root || self < Sequence
27
+ nil # no token class because these objects are Parser-generated
28
+ # TODO: synch exp & token class names for alt., dot, escapes in v3.0.0
29
+ elsif self == Alternation || self == CharacterType::Any
30
+ Regexp::Syntax::Token::Meta
31
+ elsif self <= EscapeSequence::Base
32
+ Regexp::Syntax::Token::Escape
33
+ else
34
+ Regexp::Syntax::Token.const_get(name.split('::')[2])
35
+ end
36
+ end
37
+ end
38
+
39
+ def token_class
40
+ self.class.token_class
41
+ end
42
+ end
43
+ end
@@ -112,7 +112,7 @@ module Regexp::Expression
112
112
  end
113
113
 
114
114
  def inner_match_length
115
- dummy = Regexp::Expression::Root.build
115
+ dummy = Regexp::Expression::Root.construct
116
116
  dummy.expressions = expressions.map(&:clone)
117
117
  dummy.quantifier = quantifier && quantifier.clone
118
118
  dummy.match_length
@@ -43,7 +43,7 @@ module Regexp::Expression
43
43
 
44
44
  # Order is important! Fields that use other fields in their
45
45
  # definition must appear before the fields they use.
46
- part_keys = %w{a m b o i l x s e S y k c q Q z Z t ~t T >}
46
+ part_keys = %w[a m b o i l x s e S y k c q Q z Z t ~t T >]
47
47
  part.keys.each {|k| part[k] = "<?#{k}?>"}
48
48
 
49
49
  part['>'] = print_level ? (' ' * (print_level + indent_offset)) : ''
@@ -1,5 +1,5 @@
1
1
  module Regexp::Expression
2
- class Base
2
+ module Shared
3
3
 
4
4
  # Test if this expression has the given test_type, which can be either
5
5
  # a symbol or an array of symbols to check against the expression's type.
@@ -93,5 +93,14 @@ module Regexp::Expression
93
93
  "Array, Hash, or Symbol expected, #{scope.class.name} given"
94
94
  end
95
95
  end
96
+
97
+ # Deep-compare two expressions for equality.
98
+ def ==(other)
99
+ other.class == self.class &&
100
+ other.to_s == to_s &&
101
+ other.options == options
102
+ end
103
+ alias :=== :==
104
+ alias :eql? :==
96
105
  end
97
106
  end
@@ -1,26 +1,24 @@
1
1
  module Regexp::Expression
2
+ # TODO: in v3.0.0, maybe put Shared back into Base, and inherit from Base and
3
+ # call super in #initialize, but raise in #quantifier= and #quantify,
4
+ # or introduce an Expression::Quantifiable intermediate class.
5
+ # Or actually allow chaining as a more concise but tricky solution than PR#69.
2
6
  class Quantifier
3
- MODES = [:greedy, :possessive, :reluctant]
7
+ include Regexp::Expression::Shared
4
8
 
5
- attr_reader :token, :text, :min, :max, :mode
9
+ MODES = %i[greedy possessive reluctant]
6
10
 
7
- def initialize(token, text, min, max, mode)
8
- @token = token
9
- @text = text
10
- @mode = mode
11
- @min = min
12
- @max = max
13
- end
11
+ attr_reader :min, :max, :mode
14
12
 
15
- def initialize_copy(orig)
16
- @text = orig.text.dup
17
- super
18
- end
13
+ def initialize(*args)
14
+ deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
19
15
 
20
- def to_s
21
- text.dup
16
+ init_from_token_and_options(*args)
17
+ @mode = (token.to_s[/greedy|reluctant|possessive/] || :greedy).to_sym
18
+ @min, @max = minmax
19
+ # TODO: remove in v3.0.0, stop removing parts of #token (?)
20
+ self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
22
21
  end
23
- alias :to_str :to_s
24
22
 
25
23
  def to_h
26
24
  {
@@ -41,13 +39,33 @@ module Regexp::Expression
41
39
  end
42
40
  alias :lazy? :reluctant?
43
41
 
44
- def ==(other)
45
- other.class == self.class &&
46
- other.token == token &&
47
- other.mode == mode &&
48
- other.min == min &&
49
- other.max == max
42
+ private
43
+
44
+ def deprecated_old_init(token, text, min, max, mode = :greedy)
45
+ warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
46
+ "is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
47
+ "Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode` "\
48
+ "with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode "\
49
+ "will be derived automatically.\n"\
50
+ "Or do `exp.quantifier = #{self.class}.construct(token: token, text: str)`.\n"\
51
+ "This is consistent with how Expression::Base instances are created. "
52
+ @token = token
53
+ @text = text
54
+ @min = min
55
+ @max = max
56
+ @mode = mode
57
+ end
58
+
59
+ def minmax
60
+ case token
61
+ when /zero_or_one/ then [0, 1]
62
+ when /zero_or_more/ then [0, -1]
63
+ when /one_or_more/ then [1, -1]
64
+ when :interval
65
+ int_min = text[/\{(\d*)/, 1]
66
+ int_max = text[/,?(\d*)\}/, 1]
67
+ [int_min.to_i, (int_max.empty? ? -1 : int_max.to_i)]
68
+ end
50
69
  end
51
- alias :eq :==
52
70
  end
53
71
  end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  # A sequence of expressions. Differs from a Subexpressions by how it handles
4
3
  # quantifiers, as it applies them to its last element instead of itself as
5
4
  # a whole subexpression.
@@ -8,31 +7,17 @@ module Regexp::Expression
8
7
  # branches, and CharacterSet::Intersection intersected sequences.
9
8
  class Sequence < Regexp::Expression::Subexpression
10
9
  class << self
11
- def add_to(subexpression, params = {}, active_opts = {})
12
- sequence = at_levels(
13
- subexpression.level,
14
- subexpression.set_level,
15
- params[:conditional_level] || subexpression.conditional_level
10
+ def add_to(exp, params = {}, active_opts = {})
11
+ sequence = construct(
12
+ level: exp.level,
13
+ set_level: exp.set_level,
14
+ conditional_level: params[:conditional_level] || exp.conditional_level,
16
15
  )
17
- sequence.nesting_level = subexpression.nesting_level + 1
16
+ sequence.nesting_level = exp.nesting_level + 1
18
17
  sequence.options = active_opts
19
- subexpression.expressions << sequence
18
+ exp.expressions << sequence
20
19
  sequence
21
20
  end
22
-
23
- def at_levels(level, set_level, conditional_level)
24
- token = Regexp::Token.new(
25
- :expression,
26
- :sequence,
27
- '',
28
- nil, # ts
29
- nil, # te
30
- level,
31
- set_level,
32
- conditional_level
33
- )
34
- new(token)
35
- end
36
21
  end
37
22
 
38
23
  def starts_at
@@ -40,12 +25,12 @@ module Regexp::Expression
40
25
  end
41
26
  alias :ts :starts_at
42
27
 
43
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
28
+ def quantify(*args)
44
29
  target = expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
45
30
  target or raise Regexp::Parser::Error,
46
31
  "No valid target found for '#{text}' quantifier"
47
32
 
48
- target.quantify(token, text, min, max, mode)
33
+ target.quantify(*args)
49
34
  end
50
35
  end
51
36
  end
@@ -18,8 +18,8 @@ module Regexp::Expression
18
18
  self.class::OPERAND.add_to(self, {}, active_opts)
19
19
  end
20
20
 
21
- def to_s(format = :full)
22
- sequences.map { |e| e.to_s(format) }.join(text)
21
+ def parts
22
+ intersperse(expressions, text.dup)
23
23
  end
24
24
  end
25
25
  end
@@ -0,0 +1,85 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ module ClassMethods; end # filled in ./methods/*.rb
4
+
5
+ def self.included(mod)
6
+ mod.class_eval do
7
+ extend Shared::ClassMethods
8
+
9
+ attr_accessor :type, :token, :text, :ts, :te,
10
+ :level, :set_level, :conditional_level,
11
+ :options, :quantifier
12
+
13
+ attr_reader :nesting_level
14
+ end
15
+ end
16
+
17
+ def init_from_token_and_options(token, options = {})
18
+ self.type = token.type
19
+ self.token = token.token
20
+ self.text = token.text
21
+ self.ts = token.ts
22
+ self.te = token.te
23
+ self.level = token.level
24
+ self.set_level = token.set_level
25
+ self.conditional_level = token.conditional_level
26
+ self.nesting_level = 0
27
+ self.options = options || {}
28
+ end
29
+ private :init_from_token_and_options
30
+
31
+ def initialize_copy(orig)
32
+ self.text = orig.text.dup if orig.text
33
+ self.options = orig.options.dup if orig.options
34
+ self.quantifier = orig.quantifier.clone if orig.quantifier
35
+ super
36
+ end
37
+
38
+ def starts_at
39
+ ts
40
+ end
41
+
42
+ def base_length
43
+ to_s(:base).length
44
+ end
45
+
46
+ def full_length
47
+ to_s.length
48
+ end
49
+
50
+ def to_s(format = :full)
51
+ "#{parts.join}#{quantifier_affix(format)}"
52
+ end
53
+ alias :to_str :to_s
54
+
55
+ def parts
56
+ [text.dup]
57
+ end
58
+
59
+ def quantifier_affix(expression_format)
60
+ quantifier.to_s if quantified? && expression_format != :base
61
+ end
62
+
63
+ def quantified?
64
+ !quantifier.nil?
65
+ end
66
+
67
+ def offset
68
+ [starts_at, full_length]
69
+ end
70
+
71
+ def coded_offset
72
+ '@%d+%d' % offset
73
+ end
74
+
75
+ def terminal?
76
+ !respond_to?(:expressions)
77
+ end
78
+
79
+ def nesting_level=(lvl)
80
+ @nesting_level = lvl
81
+ quantifier && quantifier.nesting_level = lvl
82
+ terminal? || each { |subexp| subexp.nesting_level = lvl + 1 }
83
+ end
84
+ end
85
+ end
@@ -1,14 +1,12 @@
1
1
  module Regexp::Expression
2
-
3
2
  class Subexpression < Regexp::Expression::Base
4
3
  include Enumerable
5
4
 
6
5
  attr_accessor :expressions
7
6
 
8
7
  def initialize(token, options = {})
9
- super
10
-
11
8
  self.expressions = []
9
+ super
12
10
  end
13
11
 
14
12
  # Override base method to clone the expressions as well.
@@ -44,16 +42,21 @@ module Regexp::Expression
44
42
  ts + to_s.length
45
43
  end
46
44
 
47
- def to_s(format = :full)
48
- # Note: the format does not get passed down to subexpressions.
49
- "#{expressions.join}#{quantifier_affix(format)}"
45
+ def parts
46
+ expressions
50
47
  end
51
48
 
52
49
  def to_h
53
- attributes.merge({
50
+ attributes.merge(
54
51
  text: to_s(:base),
55
52
  expressions: expressions.map(&:to_h)
56
- })
53
+ )
54
+ end
55
+
56
+ private
57
+
58
+ def intersperse(expressions, separator)
59
+ expressions.flat_map { |exp| [exp, separator] }.slice(0...-1)
57
60
  end
58
61
  end
59
62
  end
@@ -1,130 +1,7 @@
1
1
  require 'regexp_parser/error'
2
2
 
3
- module Regexp::Expression
4
- class Base
5
- attr_accessor :type, :token
6
- attr_accessor :text, :ts
7
- attr_accessor :level, :set_level, :conditional_level, :nesting_level
8
-
9
- attr_accessor :quantifier
10
- attr_accessor :options
11
-
12
- def initialize(token, options = {})
13
- self.type = token.type
14
- self.token = token.token
15
- self.text = token.text
16
- self.ts = token.ts
17
- self.level = token.level
18
- self.set_level = token.set_level
19
- self.conditional_level = token.conditional_level
20
- self.nesting_level = 0
21
- self.quantifier = nil
22
- self.options = options
23
- end
24
-
25
- def initialize_copy(orig)
26
- self.text = (orig.text ? orig.text.dup : nil)
27
- self.options = (orig.options ? orig.options.dup : nil)
28
- self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
29
- super
30
- end
31
-
32
- def to_re(format = :full)
33
- ::Regexp.new(to_s(format))
34
- end
35
-
36
- alias :starts_at :ts
37
-
38
- def base_length
39
- to_s(:base).length
40
- end
41
-
42
- def full_length
43
- to_s.length
44
- end
45
-
46
- def offset
47
- [starts_at, full_length]
48
- end
49
-
50
- def coded_offset
51
- '@%d+%d' % offset
52
- end
53
-
54
- def to_s(format = :full)
55
- "#{text}#{quantifier_affix(format)}"
56
- end
57
-
58
- def quantifier_affix(expression_format)
59
- quantifier.to_s if quantified? && expression_format != :base
60
- end
61
-
62
- def terminal?
63
- !respond_to?(:expressions)
64
- end
65
-
66
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
67
- self.quantifier = Quantifier.new(token, text, min, max, mode)
68
- end
69
-
70
- def unquantified_clone
71
- clone.tap { |exp| exp.quantifier = nil }
72
- end
73
-
74
- def quantified?
75
- !quantifier.nil?
76
- end
77
-
78
- # Deprecated. Prefer `#repetitions` which has a more uniform interface.
79
- def quantity
80
- return [nil,nil] unless quantified?
81
- [quantifier.min, quantifier.max]
82
- end
83
-
84
- def repetitions
85
- return 1..1 unless quantified?
86
- min = quantifier.min
87
- max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
88
- range = min..max
89
- # fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
90
- if RUBY_VERSION.to_f < 2.7
91
- range.define_singleton_method(:minmax) { [min, max] }
92
- end
93
- range
94
- end
95
-
96
- def greedy?
97
- quantified? and quantifier.greedy?
98
- end
99
-
100
- def reluctant?
101
- quantified? and quantifier.reluctant?
102
- end
103
- alias :lazy? :reluctant?
104
-
105
- def possessive?
106
- quantified? and quantifier.possessive?
107
- end
108
-
109
- def attributes
110
- {
111
- type: type,
112
- token: token,
113
- text: to_s(:base),
114
- starts_at: ts,
115
- length: full_length,
116
- level: level,
117
- set_level: set_level,
118
- conditional_level: conditional_level,
119
- options: options,
120
- quantifier: quantified? ? quantifier.to_h : nil,
121
- }
122
- end
123
- alias :to_h :attributes
124
- end
125
-
126
- end # module Regexp::Expression
127
-
3
+ require 'regexp_parser/expression/shared'
4
+ require 'regexp_parser/expression/base'
128
5
  require 'regexp_parser/expression/quantifier'
129
6
  require 'regexp_parser/expression/subexpression'
130
7
  require 'regexp_parser/expression/sequence'
@@ -132,21 +9,22 @@ require 'regexp_parser/expression/sequence_operation'
132
9
 
133
10
  require 'regexp_parser/expression/classes/alternation'
134
11
  require 'regexp_parser/expression/classes/anchor'
135
- require 'regexp_parser/expression/classes/backref'
12
+ require 'regexp_parser/expression/classes/backreference'
13
+ require 'regexp_parser/expression/classes/character_set'
14
+ require 'regexp_parser/expression/classes/character_set/intersection'
15
+ require 'regexp_parser/expression/classes/character_set/range'
16
+ require 'regexp_parser/expression/classes/character_type'
136
17
  require 'regexp_parser/expression/classes/conditional'
137
- require 'regexp_parser/expression/classes/escape'
18
+ require 'regexp_parser/expression/classes/escape_sequence'
138
19
  require 'regexp_parser/expression/classes/free_space'
139
20
  require 'regexp_parser/expression/classes/group'
140
21
  require 'regexp_parser/expression/classes/keep'
141
22
  require 'regexp_parser/expression/classes/literal'
142
23
  require 'regexp_parser/expression/classes/posix_class'
143
- require 'regexp_parser/expression/classes/property'
144
24
  require 'regexp_parser/expression/classes/root'
145
- require 'regexp_parser/expression/classes/set'
146
- require 'regexp_parser/expression/classes/set/intersection'
147
- require 'regexp_parser/expression/classes/set/range'
148
- require 'regexp_parser/expression/classes/type'
25
+ require 'regexp_parser/expression/classes/unicode_property'
149
26
 
27
+ require 'regexp_parser/expression/methods/construct'
150
28
  require 'regexp_parser/expression/methods/match'
151
29
  require 'regexp_parser/expression/methods/match_length'
152
30
  require 'regexp_parser/expression/methods/options'
@@ -4,19 +4,21 @@
4
4
  # given syntax flavor.
5
5
  class Regexp::Lexer
6
6
 
7
- OPENING_TOKENS = [
8
- :capture, :passive, :lookahead, :nlookahead, :lookbehind, :nlookbehind,
9
- :atomic, :options, :options_switch, :named, :absence
7
+ OPENING_TOKENS = %i[
8
+ capture passive lookahead nlookahead lookbehind nlookbehind
9
+ atomic options options_switch named absence
10
10
  ].freeze
11
11
 
12
- CLOSING_TOKENS = [:close].freeze
12
+ CLOSING_TOKENS = %i[close].freeze
13
+
14
+ CONDITION_TOKENS = %i[condition condition_close].freeze
13
15
 
14
16
  def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
15
17
  new.lex(input, syntax, options: options, &block)
16
18
  end
17
19
 
18
20
  def lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
19
- syntax = Regexp::Syntax.new(syntax)
21
+ syntax = Regexp::Syntax.for(syntax)
20
22
 
21
23
  self.tokens = []
22
24
  self.nesting = 0
@@ -40,7 +42,7 @@ class Regexp::Lexer
40
42
  nesting, set_nesting, conditional_nesting)
41
43
 
42
44
  current = merge_condition(current) if type == :conditional and
43
- [:condition, :condition_close].include?(token)
45
+ CONDITION_TOKENS.include?(token)
44
46
 
45
47
  last.next = current if last
46
48
  current.previous = last if last