regexp_parser 2.1.1 → 2.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (154) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +94 -6
  3. data/Gemfile +2 -1
  4. data/LICENSE +1 -1
  5. data/README.md +40 -30
  6. data/Rakefile +6 -70
  7. data/lib/regexp_parser/error.rb +1 -1
  8. data/lib/regexp_parser/expression/base.rb +75 -0
  9. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  10. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +1 -0
  11. data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
  12. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -2
  13. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +2 -2
  14. data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
  15. data/lib/regexp_parser/expression/classes/conditional.rb +2 -2
  16. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +13 -7
  17. data/lib/regexp_parser/expression/classes/free_space.rb +1 -3
  18. data/lib/regexp_parser/expression/classes/group.rb +6 -6
  19. data/lib/regexp_parser/expression/classes/keep.rb +2 -0
  20. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  21. data/lib/regexp_parser/expression/classes/root.rb +3 -6
  22. data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +1 -2
  23. data/lib/regexp_parser/expression/methods/construct.rb +43 -0
  24. data/lib/regexp_parser/expression/methods/match_length.rb +1 -1
  25. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  26. data/lib/regexp_parser/expression/methods/tests.rb +10 -1
  27. data/lib/regexp_parser/expression/quantifier.rb +41 -23
  28. data/lib/regexp_parser/expression/sequence.rb +9 -24
  29. data/lib/regexp_parser/expression/sequence_operation.rb +2 -2
  30. data/lib/regexp_parser/expression/shared.rb +85 -0
  31. data/lib/regexp_parser/expression/subexpression.rb +11 -8
  32. data/lib/regexp_parser/expression.rb +10 -132
  33. data/lib/regexp_parser/lexer.rb +8 -6
  34. data/lib/regexp_parser/parser.rb +21 -72
  35. data/lib/regexp_parser/scanner/properties/long.csv +622 -0
  36. data/lib/regexp_parser/scanner/properties/short.csv +246 -0
  37. data/lib/regexp_parser/scanner/property.rl +1 -1
  38. data/lib/regexp_parser/scanner/scanner.rl +48 -35
  39. data/lib/regexp_parser/scanner.rb +735 -801
  40. data/lib/regexp_parser/syntax/any.rb +2 -7
  41. data/lib/regexp_parser/syntax/base.rb +91 -66
  42. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  43. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  44. data/lib/regexp_parser/syntax/token/backreference.rb +30 -0
  45. data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
  46. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  47. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  48. data/lib/regexp_parser/syntax/token/escape.rb +31 -0
  49. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  50. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  51. data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
  52. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  53. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  54. data/lib/regexp_parser/syntax/token/unicode_property.rb +717 -0
  55. data/lib/regexp_parser/syntax/token.rb +45 -0
  56. data/lib/regexp_parser/syntax/version_lookup.rb +20 -29
  57. data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
  58. data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
  59. data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
  60. data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
  61. data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
  62. data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
  63. data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
  64. data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
  65. data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
  66. data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
  67. data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
  68. data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
  69. data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
  70. data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
  71. data/lib/regexp_parser/syntax/versions.rb +1 -1
  72. data/lib/regexp_parser/syntax.rb +1 -1
  73. data/lib/regexp_parser/token.rb +9 -20
  74. data/lib/regexp_parser/version.rb +1 -1
  75. data/lib/regexp_parser.rb +0 -2
  76. data/regexp_parser.gemspec +20 -22
  77. metadata +37 -166
  78. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  79. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  80. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  81. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  82. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  83. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  84. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  85. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  86. data/spec/expression/base_spec.rb +0 -104
  87. data/spec/expression/clone_spec.rb +0 -152
  88. data/spec/expression/conditional_spec.rb +0 -89
  89. data/spec/expression/free_space_spec.rb +0 -27
  90. data/spec/expression/methods/match_length_spec.rb +0 -161
  91. data/spec/expression/methods/match_spec.rb +0 -25
  92. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  93. data/spec/expression/methods/tests_spec.rb +0 -99
  94. data/spec/expression/methods/traverse_spec.rb +0 -161
  95. data/spec/expression/options_spec.rb +0 -128
  96. data/spec/expression/subexpression_spec.rb +0 -50
  97. data/spec/expression/to_h_spec.rb +0 -26
  98. data/spec/expression/to_s_spec.rb +0 -108
  99. data/spec/lexer/all_spec.rb +0 -22
  100. data/spec/lexer/conditionals_spec.rb +0 -53
  101. data/spec/lexer/delimiters_spec.rb +0 -68
  102. data/spec/lexer/escapes_spec.rb +0 -14
  103. data/spec/lexer/keep_spec.rb +0 -10
  104. data/spec/lexer/literals_spec.rb +0 -64
  105. data/spec/lexer/nesting_spec.rb +0 -99
  106. data/spec/lexer/refcalls_spec.rb +0 -60
  107. data/spec/parser/all_spec.rb +0 -43
  108. data/spec/parser/alternation_spec.rb +0 -88
  109. data/spec/parser/anchors_spec.rb +0 -17
  110. data/spec/parser/conditionals_spec.rb +0 -179
  111. data/spec/parser/errors_spec.rb +0 -30
  112. data/spec/parser/escapes_spec.rb +0 -121
  113. data/spec/parser/free_space_spec.rb +0 -130
  114. data/spec/parser/groups_spec.rb +0 -108
  115. data/spec/parser/keep_spec.rb +0 -6
  116. data/spec/parser/options_spec.rb +0 -28
  117. data/spec/parser/posix_classes_spec.rb +0 -8
  118. data/spec/parser/properties_spec.rb +0 -115
  119. data/spec/parser/quantifiers_spec.rb +0 -68
  120. data/spec/parser/refcalls_spec.rb +0 -117
  121. data/spec/parser/set/intersections_spec.rb +0 -127
  122. data/spec/parser/set/ranges_spec.rb +0 -111
  123. data/spec/parser/sets_spec.rb +0 -178
  124. data/spec/parser/types_spec.rb +0 -18
  125. data/spec/scanner/all_spec.rb +0 -18
  126. data/spec/scanner/anchors_spec.rb +0 -21
  127. data/spec/scanner/conditionals_spec.rb +0 -128
  128. data/spec/scanner/delimiters_spec.rb +0 -52
  129. data/spec/scanner/errors_spec.rb +0 -67
  130. data/spec/scanner/escapes_spec.rb +0 -64
  131. data/spec/scanner/free_space_spec.rb +0 -165
  132. data/spec/scanner/groups_spec.rb +0 -61
  133. data/spec/scanner/keep_spec.rb +0 -10
  134. data/spec/scanner/literals_spec.rb +0 -39
  135. data/spec/scanner/meta_spec.rb +0 -18
  136. data/spec/scanner/options_spec.rb +0 -36
  137. data/spec/scanner/properties_spec.rb +0 -64
  138. data/spec/scanner/quantifiers_spec.rb +0 -25
  139. data/spec/scanner/refcalls_spec.rb +0 -55
  140. data/spec/scanner/sets_spec.rb +0 -151
  141. data/spec/scanner/types_spec.rb +0 -14
  142. data/spec/spec_helper.rb +0 -16
  143. data/spec/support/runner.rb +0 -42
  144. data/spec/support/shared_examples.rb +0 -77
  145. data/spec/support/warning_extractor.rb +0 -60
  146. data/spec/syntax/syntax_spec.rb +0 -48
  147. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  148. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  149. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  150. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  151. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  152. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  153. data/spec/syntax/versions/aliases_spec.rb +0 -37
  154. data/spec/token/token_spec.rb +0 -85
@@ -1,7 +1,3 @@
1
1
  module Regexp::Expression
2
-
3
- class Literal < Regexp::Expression::Base
4
- # Obviously nothing special here, yet.
5
- end
6
-
2
+ class Literal < Regexp::Expression::Base; end
7
3
  end
@@ -1,12 +1,9 @@
1
1
  module Regexp::Expression
2
-
3
2
  class Root < Regexp::Expression::Subexpression
4
3
  def self.build(options = {})
5
- new(build_token, options)
6
- end
7
-
8
- def self.build_token
9
- Regexp::Token.new(:expression, :root, '', 0)
4
+ warn "`#{self.class}.build(options)` is deprecated and will raise in "\
5
+ "regexp_parser v3.0.0. Please use `.construct(options: options)`."
6
+ construct(options: options)
10
7
  end
11
8
  end
12
9
  end
@@ -1,5 +1,5 @@
1
1
  module Regexp::Expression
2
-
2
+ # TODO: unify name with token :property, on way or the other, in v3.0.0
3
3
  module UnicodeProperty
4
4
  class Base < Regexp::Expression::Base
5
5
  def negative?
@@ -116,5 +116,4 @@ module Regexp::Expression
116
116
  class Script < UnicodeProperty::Base; end
117
117
  class Block < UnicodeProperty::Base; end
118
118
  end
119
-
120
119
  end # module Regexp::Expression
@@ -0,0 +1,43 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ module ClassMethods
4
+ # Convenience method to init a valid Expression without a Regexp::Token
5
+ def construct(params = {})
6
+ attrs = construct_defaults.merge(params)
7
+ options = attrs.delete(:options)
8
+ token_args = Regexp::TOKEN_KEYS.map { |k| attrs.delete(k) }
9
+ token = Regexp::Token.new(*token_args)
10
+ raise ArgumentError, "unsupported attribute(s): #{attrs}" if attrs.any?
11
+
12
+ new(token, options)
13
+ end
14
+
15
+ def construct_defaults
16
+ if self == Root
17
+ { type: :expression, token: :root, ts: 0 }
18
+ elsif self < Sequence
19
+ { type: :expression, token: :sequence }
20
+ else
21
+ { type: token_class::Type }
22
+ end.merge(level: 0, set_level: 0, conditional_level: 0, text: '')
23
+ end
24
+
25
+ def token_class
26
+ if self == Root || self < Sequence
27
+ nil # no token class because these objects are Parser-generated
28
+ # TODO: synch exp & token class names for alt., dot, escapes in v3.0.0
29
+ elsif self == Alternation || self == CharacterType::Any
30
+ Regexp::Syntax::Token::Meta
31
+ elsif self <= EscapeSequence::Base
32
+ Regexp::Syntax::Token::Escape
33
+ else
34
+ Regexp::Syntax::Token.const_get(name.split('::')[2])
35
+ end
36
+ end
37
+ end
38
+
39
+ def token_class
40
+ self.class.token_class
41
+ end
42
+ end
43
+ end
@@ -112,7 +112,7 @@ module Regexp::Expression
112
112
  end
113
113
 
114
114
  def inner_match_length
115
- dummy = Regexp::Expression::Root.build
115
+ dummy = Regexp::Expression::Root.construct
116
116
  dummy.expressions = expressions.map(&:clone)
117
117
  dummy.quantifier = quantifier && quantifier.clone
118
118
  dummy.match_length
@@ -43,7 +43,7 @@ module Regexp::Expression
43
43
 
44
44
  # Order is important! Fields that use other fields in their
45
45
  # definition must appear before the fields they use.
46
- part_keys = %w{a m b o i l x s e S y k c q Q z Z t ~t T >}
46
+ part_keys = %w[a m b o i l x s e S y k c q Q z Z t ~t T >]
47
47
  part.keys.each {|k| part[k] = "<?#{k}?>"}
48
48
 
49
49
  part['>'] = print_level ? (' ' * (print_level + indent_offset)) : ''
@@ -1,5 +1,5 @@
1
1
  module Regexp::Expression
2
- class Base
2
+ module Shared
3
3
 
4
4
  # Test if this expression has the given test_type, which can be either
5
5
  # a symbol or an array of symbols to check against the expression's type.
@@ -93,5 +93,14 @@ module Regexp::Expression
93
93
  "Array, Hash, or Symbol expected, #{scope.class.name} given"
94
94
  end
95
95
  end
96
+
97
+ # Deep-compare two expressions for equality.
98
+ def ==(other)
99
+ other.class == self.class &&
100
+ other.to_s == to_s &&
101
+ other.options == options
102
+ end
103
+ alias :=== :==
104
+ alias :eql? :==
96
105
  end
97
106
  end
@@ -1,26 +1,24 @@
1
1
  module Regexp::Expression
2
+ # TODO: in v3.0.0, maybe put Shared back into Base, and inherit from Base and
3
+ # call super in #initialize, but raise in #quantifier= and #quantify,
4
+ # or introduce an Expression::Quantifiable intermediate class.
5
+ # Or actually allow chaining as a more concise but tricky solution than PR#69.
2
6
  class Quantifier
3
- MODES = [:greedy, :possessive, :reluctant]
7
+ include Regexp::Expression::Shared
4
8
 
5
- attr_reader :token, :text, :min, :max, :mode
9
+ MODES = %i[greedy possessive reluctant]
6
10
 
7
- def initialize(token, text, min, max, mode)
8
- @token = token
9
- @text = text
10
- @mode = mode
11
- @min = min
12
- @max = max
13
- end
11
+ attr_reader :min, :max, :mode
14
12
 
15
- def initialize_copy(orig)
16
- @text = orig.text.dup
17
- super
18
- end
13
+ def initialize(*args)
14
+ deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
19
15
 
20
- def to_s
21
- text.dup
16
+ init_from_token_and_options(*args)
17
+ @mode = (token.to_s[/greedy|reluctant|possessive/] || :greedy).to_sym
18
+ @min, @max = minmax
19
+ # TODO: remove in v3.0.0, stop removing parts of #token (?)
20
+ self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
22
21
  end
23
- alias :to_str :to_s
24
22
 
25
23
  def to_h
26
24
  {
@@ -41,13 +39,33 @@ module Regexp::Expression
41
39
  end
42
40
  alias :lazy? :reluctant?
43
41
 
44
- def ==(other)
45
- other.class == self.class &&
46
- other.token == token &&
47
- other.mode == mode &&
48
- other.min == min &&
49
- other.max == max
42
+ private
43
+
44
+ def deprecated_old_init(token, text, min, max, mode = :greedy)
45
+ warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
46
+ "is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
47
+ "Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode` "\
48
+ "with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode "\
49
+ "will be derived automatically.\n"\
50
+ "Or do `exp.quantifier = #{self.class}.construct(token: token, text: str)`.\n"\
51
+ "This is consistent with how Expression::Base instances are created. "
52
+ @token = token
53
+ @text = text
54
+ @min = min
55
+ @max = max
56
+ @mode = mode
57
+ end
58
+
59
+ def minmax
60
+ case token
61
+ when /zero_or_one/ then [0, 1]
62
+ when /zero_or_more/ then [0, -1]
63
+ when /one_or_more/ then [1, -1]
64
+ when :interval
65
+ int_min = text[/\{(\d*)/, 1]
66
+ int_max = text[/,?(\d*)\}/, 1]
67
+ [int_min.to_i, (int_max.empty? ? -1 : int_max.to_i)]
68
+ end
50
69
  end
51
- alias :eq :==
52
70
  end
53
71
  end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  # A sequence of expressions. Differs from a Subexpressions by how it handles
4
3
  # quantifiers, as it applies them to its last element instead of itself as
5
4
  # a whole subexpression.
@@ -8,31 +7,17 @@ module Regexp::Expression
8
7
  # branches, and CharacterSet::Intersection intersected sequences.
9
8
  class Sequence < Regexp::Expression::Subexpression
10
9
  class << self
11
- def add_to(subexpression, params = {}, active_opts = {})
12
- sequence = at_levels(
13
- subexpression.level,
14
- subexpression.set_level,
15
- params[:conditional_level] || subexpression.conditional_level
10
+ def add_to(exp, params = {}, active_opts = {})
11
+ sequence = construct(
12
+ level: exp.level,
13
+ set_level: exp.set_level,
14
+ conditional_level: params[:conditional_level] || exp.conditional_level,
16
15
  )
17
- sequence.nesting_level = subexpression.nesting_level + 1
16
+ sequence.nesting_level = exp.nesting_level + 1
18
17
  sequence.options = active_opts
19
- subexpression.expressions << sequence
18
+ exp.expressions << sequence
20
19
  sequence
21
20
  end
22
-
23
- def at_levels(level, set_level, conditional_level)
24
- token = Regexp::Token.new(
25
- :expression,
26
- :sequence,
27
- '',
28
- nil, # ts
29
- nil, # te
30
- level,
31
- set_level,
32
- conditional_level
33
- )
34
- new(token)
35
- end
36
21
  end
37
22
 
38
23
  def starts_at
@@ -40,12 +25,12 @@ module Regexp::Expression
40
25
  end
41
26
  alias :ts :starts_at
42
27
 
43
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
28
+ def quantify(*args)
44
29
  target = expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
45
30
  target or raise Regexp::Parser::Error,
46
31
  "No valid target found for '#{text}' quantifier"
47
32
 
48
- target.quantify(token, text, min, max, mode)
33
+ target.quantify(*args)
49
34
  end
50
35
  end
51
36
  end
@@ -18,8 +18,8 @@ module Regexp::Expression
18
18
  self.class::OPERAND.add_to(self, {}, active_opts)
19
19
  end
20
20
 
21
- def to_s(format = :full)
22
- sequences.map { |e| e.to_s(format) }.join(text)
21
+ def parts
22
+ intersperse(expressions, text.dup)
23
23
  end
24
24
  end
25
25
  end
@@ -0,0 +1,85 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ module ClassMethods; end # filled in ./methods/*.rb
4
+
5
+ def self.included(mod)
6
+ mod.class_eval do
7
+ extend Shared::ClassMethods
8
+
9
+ attr_accessor :type, :token, :text, :ts, :te,
10
+ :level, :set_level, :conditional_level,
11
+ :options, :quantifier
12
+
13
+ attr_reader :nesting_level
14
+ end
15
+ end
16
+
17
+ def init_from_token_and_options(token, options = {})
18
+ self.type = token.type
19
+ self.token = token.token
20
+ self.text = token.text
21
+ self.ts = token.ts
22
+ self.te = token.te
23
+ self.level = token.level
24
+ self.set_level = token.set_level
25
+ self.conditional_level = token.conditional_level
26
+ self.nesting_level = 0
27
+ self.options = options || {}
28
+ end
29
+ private :init_from_token_and_options
30
+
31
+ def initialize_copy(orig)
32
+ self.text = orig.text.dup if orig.text
33
+ self.options = orig.options.dup if orig.options
34
+ self.quantifier = orig.quantifier.clone if orig.quantifier
35
+ super
36
+ end
37
+
38
+ def starts_at
39
+ ts
40
+ end
41
+
42
+ def base_length
43
+ to_s(:base).length
44
+ end
45
+
46
+ def full_length
47
+ to_s.length
48
+ end
49
+
50
+ def to_s(format = :full)
51
+ "#{parts.join}#{quantifier_affix(format)}"
52
+ end
53
+ alias :to_str :to_s
54
+
55
+ def parts
56
+ [text.dup]
57
+ end
58
+
59
+ def quantifier_affix(expression_format)
60
+ quantifier.to_s if quantified? && expression_format != :base
61
+ end
62
+
63
+ def quantified?
64
+ !quantifier.nil?
65
+ end
66
+
67
+ def offset
68
+ [starts_at, full_length]
69
+ end
70
+
71
+ def coded_offset
72
+ '@%d+%d' % offset
73
+ end
74
+
75
+ def terminal?
76
+ !respond_to?(:expressions)
77
+ end
78
+
79
+ def nesting_level=(lvl)
80
+ @nesting_level = lvl
81
+ quantifier && quantifier.nesting_level = lvl
82
+ terminal? || each { |subexp| subexp.nesting_level = lvl + 1 }
83
+ end
84
+ end
85
+ end
@@ -1,14 +1,12 @@
1
1
  module Regexp::Expression
2
-
3
2
  class Subexpression < Regexp::Expression::Base
4
3
  include Enumerable
5
4
 
6
5
  attr_accessor :expressions
7
6
 
8
7
  def initialize(token, options = {})
9
- super
10
-
11
8
  self.expressions = []
9
+ super
12
10
  end
13
11
 
14
12
  # Override base method to clone the expressions as well.
@@ -44,16 +42,21 @@ module Regexp::Expression
44
42
  ts + to_s.length
45
43
  end
46
44
 
47
- def to_s(format = :full)
48
- # Note: the format does not get passed down to subexpressions.
49
- "#{expressions.join}#{quantifier_affix(format)}"
45
+ def parts
46
+ expressions
50
47
  end
51
48
 
52
49
  def to_h
53
- attributes.merge({
50
+ attributes.merge(
54
51
  text: to_s(:base),
55
52
  expressions: expressions.map(&:to_h)
56
- })
53
+ )
54
+ end
55
+
56
+ private
57
+
58
+ def intersperse(expressions, separator)
59
+ expressions.flat_map { |exp| [exp, separator] }.slice(0...-1)
57
60
  end
58
61
  end
59
62
  end
@@ -1,130 +1,7 @@
1
1
  require 'regexp_parser/error'
2
2
 
3
- module Regexp::Expression
4
- class Base
5
- attr_accessor :type, :token
6
- attr_accessor :text, :ts
7
- attr_accessor :level, :set_level, :conditional_level, :nesting_level
8
-
9
- attr_accessor :quantifier
10
- attr_accessor :options
11
-
12
- def initialize(token, options = {})
13
- self.type = token.type
14
- self.token = token.token
15
- self.text = token.text
16
- self.ts = token.ts
17
- self.level = token.level
18
- self.set_level = token.set_level
19
- self.conditional_level = token.conditional_level
20
- self.nesting_level = 0
21
- self.quantifier = nil
22
- self.options = options
23
- end
24
-
25
- def initialize_copy(orig)
26
- self.text = (orig.text ? orig.text.dup : nil)
27
- self.options = (orig.options ? orig.options.dup : nil)
28
- self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
29
- super
30
- end
31
-
32
- def to_re(format = :full)
33
- ::Regexp.new(to_s(format))
34
- end
35
-
36
- alias :starts_at :ts
37
-
38
- def base_length
39
- to_s(:base).length
40
- end
41
-
42
- def full_length
43
- to_s.length
44
- end
45
-
46
- def offset
47
- [starts_at, full_length]
48
- end
49
-
50
- def coded_offset
51
- '@%d+%d' % offset
52
- end
53
-
54
- def to_s(format = :full)
55
- "#{text}#{quantifier_affix(format)}"
56
- end
57
-
58
- def quantifier_affix(expression_format)
59
- quantifier.to_s if quantified? && expression_format != :base
60
- end
61
-
62
- def terminal?
63
- !respond_to?(:expressions)
64
- end
65
-
66
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
67
- self.quantifier = Quantifier.new(token, text, min, max, mode)
68
- end
69
-
70
- def unquantified_clone
71
- clone.tap { |exp| exp.quantifier = nil }
72
- end
73
-
74
- def quantified?
75
- !quantifier.nil?
76
- end
77
-
78
- # Deprecated. Prefer `#repetitions` which has a more uniform interface.
79
- def quantity
80
- return [nil,nil] unless quantified?
81
- [quantifier.min, quantifier.max]
82
- end
83
-
84
- def repetitions
85
- return 1..1 unless quantified?
86
- min = quantifier.min
87
- max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
88
- range = min..max
89
- # fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
90
- if RUBY_VERSION.to_f < 2.7
91
- range.define_singleton_method(:minmax) { [min, max] }
92
- end
93
- range
94
- end
95
-
96
- def greedy?
97
- quantified? and quantifier.greedy?
98
- end
99
-
100
- def reluctant?
101
- quantified? and quantifier.reluctant?
102
- end
103
- alias :lazy? :reluctant?
104
-
105
- def possessive?
106
- quantified? and quantifier.possessive?
107
- end
108
-
109
- def attributes
110
- {
111
- type: type,
112
- token: token,
113
- text: to_s(:base),
114
- starts_at: ts,
115
- length: full_length,
116
- level: level,
117
- set_level: set_level,
118
- conditional_level: conditional_level,
119
- options: options,
120
- quantifier: quantified? ? quantifier.to_h : nil,
121
- }
122
- end
123
- alias :to_h :attributes
124
- end
125
-
126
- end # module Regexp::Expression
127
-
3
+ require 'regexp_parser/expression/shared'
4
+ require 'regexp_parser/expression/base'
128
5
  require 'regexp_parser/expression/quantifier'
129
6
  require 'regexp_parser/expression/subexpression'
130
7
  require 'regexp_parser/expression/sequence'
@@ -132,21 +9,22 @@ require 'regexp_parser/expression/sequence_operation'
132
9
 
133
10
  require 'regexp_parser/expression/classes/alternation'
134
11
  require 'regexp_parser/expression/classes/anchor'
135
- require 'regexp_parser/expression/classes/backref'
12
+ require 'regexp_parser/expression/classes/backreference'
13
+ require 'regexp_parser/expression/classes/character_set'
14
+ require 'regexp_parser/expression/classes/character_set/intersection'
15
+ require 'regexp_parser/expression/classes/character_set/range'
16
+ require 'regexp_parser/expression/classes/character_type'
136
17
  require 'regexp_parser/expression/classes/conditional'
137
- require 'regexp_parser/expression/classes/escape'
18
+ require 'regexp_parser/expression/classes/escape_sequence'
138
19
  require 'regexp_parser/expression/classes/free_space'
139
20
  require 'regexp_parser/expression/classes/group'
140
21
  require 'regexp_parser/expression/classes/keep'
141
22
  require 'regexp_parser/expression/classes/literal'
142
23
  require 'regexp_parser/expression/classes/posix_class'
143
- require 'regexp_parser/expression/classes/property'
144
24
  require 'regexp_parser/expression/classes/root'
145
- require 'regexp_parser/expression/classes/set'
146
- require 'regexp_parser/expression/classes/set/intersection'
147
- require 'regexp_parser/expression/classes/set/range'
148
- require 'regexp_parser/expression/classes/type'
25
+ require 'regexp_parser/expression/classes/unicode_property'
149
26
 
27
+ require 'regexp_parser/expression/methods/construct'
150
28
  require 'regexp_parser/expression/methods/match'
151
29
  require 'regexp_parser/expression/methods/match_length'
152
30
  require 'regexp_parser/expression/methods/options'
@@ -4,19 +4,21 @@
4
4
  # given syntax flavor.
5
5
  class Regexp::Lexer
6
6
 
7
- OPENING_TOKENS = [
8
- :capture, :passive, :lookahead, :nlookahead, :lookbehind, :nlookbehind,
9
- :atomic, :options, :options_switch, :named, :absence
7
+ OPENING_TOKENS = %i[
8
+ capture passive lookahead nlookahead lookbehind nlookbehind
9
+ atomic options options_switch named absence
10
10
  ].freeze
11
11
 
12
- CLOSING_TOKENS = [:close].freeze
12
+ CLOSING_TOKENS = %i[close].freeze
13
+
14
+ CONDITION_TOKENS = %i[condition condition_close].freeze
13
15
 
14
16
  def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
15
17
  new.lex(input, syntax, options: options, &block)
16
18
  end
17
19
 
18
20
  def lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
19
- syntax = Regexp::Syntax.new(syntax)
21
+ syntax = Regexp::Syntax.for(syntax)
20
22
 
21
23
  self.tokens = []
22
24
  self.nesting = 0
@@ -40,7 +42,7 @@ class Regexp::Lexer
40
42
  nesting, set_nesting, conditional_nesting)
41
43
 
42
44
  current = merge_condition(current) if type == :conditional and
43
- [:condition, :condition_close].include?(token)
45
+ CONDITION_TOKENS.include?(token)
44
46
 
45
47
  last.next = current if last
46
48
  current.previous = last if last