regexp_parser 1.7.1 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +157 -1
  3. data/Gemfile +6 -1
  4. data/LICENSE +1 -1
  5. data/README.md +38 -32
  6. data/Rakefile +18 -27
  7. data/lib/regexp_parser/error.rb +4 -0
  8. data/lib/regexp_parser/expression/base.rb +123 -0
  9. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  10. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +5 -0
  11. data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
  12. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -1
  13. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
  14. data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
  15. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +13 -7
  16. data/lib/regexp_parser/expression/classes/free_space.rb +2 -4
  17. data/lib/regexp_parser/expression/classes/group.rb +28 -3
  18. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  19. data/lib/regexp_parser/expression/classes/property.rb +1 -3
  20. data/lib/regexp_parser/expression/classes/root.rb +4 -17
  21. data/lib/regexp_parser/expression/classes/type.rb +0 -2
  22. data/lib/regexp_parser/expression/methods/match_length.rb +2 -2
  23. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  24. data/lib/regexp_parser/expression/methods/traverse.rb +2 -2
  25. data/lib/regexp_parser/expression/quantifier.rb +11 -2
  26. data/lib/regexp_parser/expression/sequence.rb +3 -20
  27. data/lib/regexp_parser/expression/subexpression.rb +1 -2
  28. data/lib/regexp_parser/expression.rb +7 -139
  29. data/lib/regexp_parser/lexer.rb +13 -11
  30. data/lib/regexp_parser/parser.rb +325 -344
  31. data/lib/regexp_parser/scanner/char_type.rl +11 -11
  32. data/lib/regexp_parser/scanner/properties/long.csv +604 -0
  33. data/lib/regexp_parser/scanner/properties/short.csv +242 -0
  34. data/lib/regexp_parser/scanner/property.rl +2 -2
  35. data/lib/regexp_parser/scanner/scanner.rl +235 -255
  36. data/lib/regexp_parser/scanner.rb +1324 -1387
  37. data/lib/regexp_parser/syntax/any.rb +4 -6
  38. data/lib/regexp_parser/syntax/base.rb +13 -15
  39. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  40. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  41. data/lib/regexp_parser/syntax/token/backreference.rb +30 -0
  42. data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
  43. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  44. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  45. data/lib/regexp_parser/syntax/token/escape.rb +31 -0
  46. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  47. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  48. data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
  49. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  50. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  51. data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
  52. data/lib/regexp_parser/syntax/token.rb +45 -0
  53. data/lib/regexp_parser/syntax/version_lookup.rb +4 -4
  54. data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -2
  55. data/lib/regexp_parser/syntax/versions/1.9.1.rb +1 -1
  56. data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
  57. data/lib/regexp_parser/syntax.rb +8 -6
  58. data/lib/regexp_parser/token.rb +9 -20
  59. data/lib/regexp_parser/version.rb +1 -1
  60. data/lib/regexp_parser.rb +0 -2
  61. data/regexp_parser.gemspec +20 -22
  62. metadata +34 -165
  63. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  64. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  65. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  66. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  67. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  68. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  69. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  70. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  71. data/spec/expression/base_spec.rb +0 -94
  72. data/spec/expression/clone_spec.rb +0 -120
  73. data/spec/expression/conditional_spec.rb +0 -89
  74. data/spec/expression/free_space_spec.rb +0 -27
  75. data/spec/expression/methods/match_length_spec.rb +0 -161
  76. data/spec/expression/methods/match_spec.rb +0 -25
  77. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  78. data/spec/expression/methods/tests_spec.rb +0 -99
  79. data/spec/expression/methods/traverse_spec.rb +0 -161
  80. data/spec/expression/options_spec.rb +0 -128
  81. data/spec/expression/root_spec.rb +0 -9
  82. data/spec/expression/sequence_spec.rb +0 -9
  83. data/spec/expression/subexpression_spec.rb +0 -50
  84. data/spec/expression/to_h_spec.rb +0 -26
  85. data/spec/expression/to_s_spec.rb +0 -100
  86. data/spec/lexer/all_spec.rb +0 -22
  87. data/spec/lexer/conditionals_spec.rb +0 -53
  88. data/spec/lexer/delimiters_spec.rb +0 -68
  89. data/spec/lexer/escapes_spec.rb +0 -14
  90. data/spec/lexer/keep_spec.rb +0 -10
  91. data/spec/lexer/literals_spec.rb +0 -89
  92. data/spec/lexer/nesting_spec.rb +0 -99
  93. data/spec/lexer/refcalls_spec.rb +0 -55
  94. data/spec/parser/all_spec.rb +0 -43
  95. data/spec/parser/alternation_spec.rb +0 -88
  96. data/spec/parser/anchors_spec.rb +0 -17
  97. data/spec/parser/conditionals_spec.rb +0 -179
  98. data/spec/parser/errors_spec.rb +0 -30
  99. data/spec/parser/escapes_spec.rb +0 -121
  100. data/spec/parser/free_space_spec.rb +0 -130
  101. data/spec/parser/groups_spec.rb +0 -108
  102. data/spec/parser/keep_spec.rb +0 -6
  103. data/spec/parser/posix_classes_spec.rb +0 -8
  104. data/spec/parser/properties_spec.rb +0 -115
  105. data/spec/parser/quantifiers_spec.rb +0 -52
  106. data/spec/parser/refcalls_spec.rb +0 -112
  107. data/spec/parser/set/intersections_spec.rb +0 -127
  108. data/spec/parser/set/ranges_spec.rb +0 -111
  109. data/spec/parser/sets_spec.rb +0 -178
  110. data/spec/parser/types_spec.rb +0 -18
  111. data/spec/scanner/all_spec.rb +0 -18
  112. data/spec/scanner/anchors_spec.rb +0 -21
  113. data/spec/scanner/conditionals_spec.rb +0 -128
  114. data/spec/scanner/delimiters_spec.rb +0 -52
  115. data/spec/scanner/errors_spec.rb +0 -67
  116. data/spec/scanner/escapes_spec.rb +0 -53
  117. data/spec/scanner/free_space_spec.rb +0 -133
  118. data/spec/scanner/groups_spec.rb +0 -52
  119. data/spec/scanner/keep_spec.rb +0 -10
  120. data/spec/scanner/literals_spec.rb +0 -49
  121. data/spec/scanner/meta_spec.rb +0 -18
  122. data/spec/scanner/properties_spec.rb +0 -64
  123. data/spec/scanner/quantifiers_spec.rb +0 -20
  124. data/spec/scanner/refcalls_spec.rb +0 -36
  125. data/spec/scanner/sets_spec.rb +0 -102
  126. data/spec/scanner/types_spec.rb +0 -14
  127. data/spec/spec_helper.rb +0 -15
  128. data/spec/support/runner.rb +0 -42
  129. data/spec/support/shared_examples.rb +0 -77
  130. data/spec/support/warning_extractor.rb +0 -60
  131. data/spec/syntax/syntax_spec.rb +0 -48
  132. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  133. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  134. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  135. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  136. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  137. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  138. data/spec/syntax/versions/aliases_spec.rb +0 -37
  139. data/spec/token/token_spec.rb +0 -85
@@ -1,16 +1,22 @@
1
1
  module Regexp::Expression
2
+ # TODO: unify naming with Token::Escape, on way or the other, in v3.0.0
2
3
  module EscapeSequence
3
4
  class Base < Regexp::Expression::Base
4
- require 'yaml'
5
-
6
- def char
7
- # poor man's unescape without using eval
8
- YAML.load(%Q(---\n"#{text}"\n))
9
- end
10
-
11
5
  def codepoint
12
6
  char.ord
13
7
  end
8
+
9
+ if ''.respond_to?(:undump)
10
+ def char
11
+ %("#{text}").undump
12
+ end
13
+ else
14
+ # poor man's unescape without using eval
15
+ require 'yaml'
16
+ def char
17
+ YAML.load(%Q(---\n"#{text}"\n))
18
+ end
19
+ end
14
20
  end
15
21
 
16
22
  class Literal < EscapeSequence::Base
@@ -1,8 +1,7 @@
1
1
  module Regexp::Expression
2
-
3
2
  class FreeSpace < Regexp::Expression::Base
4
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
5
- raise "Can not quantify a free space object"
3
+ def quantify(_token, _text, _min = nil, _max = nil, _mode = :greedy)
4
+ raise Regexp::Parser::Error, 'Can not quantify a free space object'
6
5
  end
7
6
  end
8
7
 
@@ -13,5 +12,4 @@ module Regexp::Expression
13
12
  text << exp.text
14
13
  end
15
14
  end
16
-
17
15
  end
@@ -10,11 +10,36 @@ module Regexp::Expression
10
10
  def comment?; false end
11
11
  end
12
12
 
13
- class Atomic < Group::Base; end
14
- class Passive < Group::Base; end
13
+ class Passive < Group::Base
14
+ attr_writer :implicit
15
+
16
+ def initialize(*)
17
+ @implicit = false
18
+ super
19
+ end
20
+
21
+ def to_s(format = :full)
22
+ if implicit?
23
+ "#{expressions.join}#{quantifier_affix(format)}"
24
+ else
25
+ super
26
+ end
27
+ end
28
+
29
+ def implicit?
30
+ @implicit
31
+ end
32
+ end
33
+
15
34
  class Absence < Group::Base; end
35
+ class Atomic < Group::Base; end
16
36
  class Options < Group::Base
17
37
  attr_accessor :option_changes
38
+
39
+ def initialize_copy(orig)
40
+ self.option_changes = orig.option_changes.dup
41
+ super
42
+ end
18
43
  end
19
44
 
20
45
  class Capture < Group::Base
@@ -33,7 +58,7 @@ module Regexp::Expression
33
58
  super
34
59
  end
35
60
 
36
- def initialize_clone(orig)
61
+ def initialize_copy(orig)
37
62
  @name = orig.name.dup
38
63
  super
39
64
  end
@@ -1,7 +1,3 @@
1
1
  module Regexp::Expression
2
-
3
- class Literal < Regexp::Expression::Base
4
- # Obviously nothing special here, yet.
5
- end
6
-
2
+ class Literal < Regexp::Expression::Base; end
7
3
  end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module UnicodeProperty
4
3
  class Base < Regexp::Expression::Base
5
4
  def negative?
@@ -7,7 +6,7 @@ module Regexp::Expression
7
6
  end
8
7
 
9
8
  def name
10
- text =~ /\A\\[pP]\{([^}]+)\}\z/; $1
9
+ text[/\A\\[pP]\{([^}]+)\}\z/, 1]
11
10
  end
12
11
 
13
12
  def shortcut
@@ -116,5 +115,4 @@ module Regexp::Expression
116
115
  class Script < UnicodeProperty::Base; end
117
116
  class Block < UnicodeProperty::Base; end
118
117
  end
119
-
120
118
  end # module Regexp::Expression
@@ -1,24 +1,11 @@
1
1
  module Regexp::Expression
2
-
3
2
  class Root < Regexp::Expression::Subexpression
4
- # TODO: this override is here for backwards compatibility, remove in 2.0.0
5
- def initialize(*args)
6
- unless args.first.is_a?(Regexp::Token)
7
- warn('WARNING: Root.new without a Token argument is deprecated and '\
8
- 'will be removed in 2.0.0. Use Root.build for the old behavior.')
9
- return super(self.class.build_token, *args)
10
- end
11
- super
3
+ def self.build(options = {})
4
+ new(build_token, options)
12
5
  end
13
6
 
14
- class << self
15
- def build(options = {})
16
- new(build_token, options)
17
- end
18
-
19
- def build_token
20
- Regexp::Token.new(:expression, :root, '', 0)
21
- end
7
+ def self.build_token
8
+ Regexp::Token.new(:expression, :root, '', 0)
22
9
  end
23
10
  end
24
11
  end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module CharacterType
4
3
  class Base < Regexp::Expression::Base; end
5
4
 
@@ -15,5 +14,4 @@ module Regexp::Expression
15
14
  class Linebreak < CharacterType::Base; end
16
15
  class ExtendedGrapheme < CharacterType::Base; end
17
16
  end
18
-
19
17
  end
@@ -10,7 +10,7 @@ class Regexp::MatchLength
10
10
  self.exp_class = exp.class
11
11
  self.min_rep = exp.repetitions.min
12
12
  self.max_rep = exp.repetitions.max
13
- if base = opts[:base]
13
+ if (base = opts[:base])
14
14
  self.base_min = base
15
15
  self.base_max = base
16
16
  self.reify = ->{ '.' * base }
@@ -32,7 +32,7 @@ class Regexp::MatchLength
32
32
  end
33
33
  end
34
34
 
35
- def endless_each(&block)
35
+ def endless_each
36
36
  return enum_for(__method__) unless block_given?
37
37
  (min..max).each { |num| yield(num) if include?(num) }
38
38
  end
@@ -43,7 +43,7 @@ module Regexp::Expression
43
43
 
44
44
  # Order is important! Fields that use other fields in their
45
45
  # definition must appear before the fields they use.
46
- part_keys = %w{a m b o i l x s e S y k c q Q z Z t ~t T >}
46
+ part_keys = %w[a m b o i l x s e S y k c q Q z Z t ~t T >]
47
47
  part.keys.each {|k| part[k] = "<?#{k}?>"}
48
48
 
49
49
  part['>'] = print_level ? (' ' * (print_level + indent_offset)) : ''
@@ -36,7 +36,7 @@ module Regexp::Expression
36
36
 
37
37
  # Iterates over the expressions of this expression as an array, passing
38
38
  # the expression and its index within its parent to the given block.
39
- def each_expression(include_self = false, &block)
39
+ def each_expression(include_self = false)
40
40
  return enum_for(__method__, include_self) unless block_given?
41
41
 
42
42
  traverse(include_self) do |event, exp, index|
@@ -47,7 +47,7 @@ module Regexp::Expression
47
47
  # Returns a new array with the results of calling the given block once
48
48
  # for every expression. If a block is not given, returns an array with
49
49
  # each expression and its level index as an array.
50
- def flat_map(include_self = false, &block)
50
+ def flat_map(include_self = false)
51
51
  result = []
52
52
 
53
53
  each_expression(include_self) do |exp, index|
@@ -1,6 +1,6 @@
1
1
  module Regexp::Expression
2
2
  class Quantifier
3
- MODES = [:greedy, :possessive, :reluctant]
3
+ MODES = %i[greedy possessive reluctant]
4
4
 
5
5
  attr_reader :token, :text, :min, :max, :mode
6
6
 
@@ -12,7 +12,7 @@ module Regexp::Expression
12
12
  @max = max
13
13
  end
14
14
 
15
- def initialize_clone(orig)
15
+ def initialize_copy(orig)
16
16
  @text = orig.text.dup
17
17
  super
18
18
  end
@@ -40,5 +40,14 @@ module Regexp::Expression
40
40
  RUBY
41
41
  end
42
42
  alias :lazy? :reluctant?
43
+
44
+ def ==(other)
45
+ other.class == self.class &&
46
+ other.token == token &&
47
+ other.mode == mode &&
48
+ other.min == min &&
49
+ other.max == max
50
+ end
51
+ alias :eq :==
43
52
  end
44
53
  end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  # A sequence of expressions. Differs from a Subexpressions by how it handles
4
3
  # quantifiers, as it applies them to its last element instead of itself as
5
4
  # a whole subexpression.
@@ -7,16 +6,6 @@ module Regexp::Expression
7
6
  # Used as the base class for the Alternation alternatives, Conditional
8
7
  # branches, and CharacterSet::Intersection intersected sequences.
9
8
  class Sequence < Regexp::Expression::Subexpression
10
- # TODO: this override is here for backwards compatibility, remove in 2.0.0
11
- def initialize(*args)
12
- if args.count == 3
13
- warn('WARNING: Sequence.new without a Regexp::Token argument is '\
14
- 'deprecated and will be removed in 2.0.0.')
15
- return self.class.at_levels(*args)
16
- end
17
- super
18
- end
19
-
20
9
  class << self
21
10
  def add_to(subexpression, params = {}, active_opts = {})
22
11
  sequence = at_levels(
@@ -51,17 +40,11 @@ module Regexp::Expression
51
40
  alias :ts :starts_at
52
41
 
53
42
  def quantify(token, text, min = nil, max = nil, mode = :greedy)
54
- offset = -1
55
- target = expressions[offset]
56
- while target.is_a?(FreeSpace)
57
- target = expressions[offset -= 1]
58
- end
59
-
60
- target || raise(ArgumentError, "No valid target found for '#{text}' "\
61
- 'quantifier')
43
+ target = expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
44
+ target or raise Regexp::Parser::Error,
45
+ "No valid target found for '#{text}' quantifier"
62
46
 
63
47
  target.quantify(token, text, min, max, mode)
64
48
  end
65
49
  end
66
-
67
50
  end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  class Subexpression < Regexp::Expression::Base
4
3
  include Enumerable
5
4
 
@@ -12,7 +11,7 @@ module Regexp::Expression
12
11
  end
13
12
 
14
13
  # Override base method to clone the expressions as well.
15
- def initialize_clone(orig)
14
+ def initialize_copy(orig)
16
15
  self.expressions = orig.expressions.map(&:clone)
17
16
  super
18
17
  end
@@ -1,138 +1,6 @@
1
- module Regexp::Expression
2
-
3
- class Base
4
- attr_accessor :type, :token
5
- attr_accessor :text, :ts
6
- attr_accessor :level, :set_level, :conditional_level, :nesting_level
7
-
8
- attr_accessor :quantifier
9
- attr_accessor :options
10
-
11
- def initialize(token, options = {})
12
- self.type = token.type
13
- self.token = token.token
14
- self.text = token.text
15
- self.ts = token.ts
16
- self.level = token.level
17
- self.set_level = token.set_level
18
- self.conditional_level = token.conditional_level
19
- self.nesting_level = 0
20
- self.quantifier = nil
21
- self.options = options
22
- end
23
-
24
- def initialize_clone(orig)
25
- self.text = (orig.text ? orig.text.dup : nil)
26
- self.options = (orig.options ? orig.options.dup : nil)
27
- self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
28
- super
29
- end
30
-
31
- def to_re(format = :full)
32
- ::Regexp.new(to_s(format))
33
- end
34
-
35
- alias :starts_at :ts
36
-
37
- def full_length
38
- to_s.length
39
- end
40
-
41
- def offset
42
- [starts_at, full_length]
43
- end
44
-
45
- def coded_offset
46
- '@%d+%d' % offset
47
- end
48
-
49
- def to_s(format = :full)
50
- "#{text}#{quantifier_affix(format)}"
51
- end
52
-
53
- def quantifier_affix(expression_format)
54
- quantifier.to_s if quantified? && expression_format != :base
55
- end
56
-
57
- def terminal?
58
- !respond_to?(:expressions)
59
- end
60
-
61
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
62
- self.quantifier = Quantifier.new(token, text, min, max, mode)
63
- end
64
-
65
- def unquantified_clone
66
- clone.tap { |exp| exp.quantifier = nil }
67
- end
68
-
69
- def quantified?
70
- !quantifier.nil?
71
- end
72
-
73
- # Deprecated. Prefer `#repetitions` which has a more uniform interface.
74
- def quantity
75
- return [nil,nil] unless quantified?
76
- [quantifier.min, quantifier.max]
77
- end
78
-
79
- def repetitions
80
- return 1..1 unless quantified?
81
- min = quantifier.min
82
- max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
83
- # fix Range#minmax - https://bugs.ruby-lang.org/issues/15807
84
- (min..max).tap { |r| r.define_singleton_method(:minmax) { [min, max] } }
85
- end
86
-
87
- def greedy?
88
- quantified? and quantifier.greedy?
89
- end
90
-
91
- def reluctant?
92
- quantified? and quantifier.reluctant?
93
- end
94
- alias :lazy? :reluctant?
95
-
96
- def possessive?
97
- quantified? and quantifier.possessive?
98
- end
99
-
100
- def attributes
101
- {
102
- type: type,
103
- token: token,
104
- text: to_s(:base),
105
- starts_at: ts,
106
- length: full_length,
107
- level: level,
108
- set_level: set_level,
109
- conditional_level: conditional_level,
110
- options: options,
111
- quantifier: quantified? ? quantifier.to_h : nil,
112
- }
113
- end
114
- alias :to_h :attributes
115
- end
116
-
117
- def self.parsed(exp)
118
- warn('WARNING: Regexp::Expression::Base.parsed is buggy and '\
119
- 'will be removed in 2.0.0. Use Regexp::Parser.parse instead.')
120
- case exp
121
- when String
122
- Regexp::Parser.parse(exp)
123
- when Regexp
124
- Regexp::Parser.parse(exp.source) # <- causes loss of root options
125
- when Regexp::Expression # <- never triggers
126
- exp
127
- else
128
- raise ArgumentError, 'Expression.parsed accepts a String, Regexp, or '\
129
- 'a Regexp::Expression as a value for exp, but it '\
130
- "was given #{exp.class.name}."
131
- end
132
- end
133
-
134
- end # module Regexp::Expression
1
+ require 'regexp_parser/error'
135
2
 
3
+ require 'regexp_parser/expression/base'
136
4
  require 'regexp_parser/expression/quantifier'
137
5
  require 'regexp_parser/expression/subexpression'
138
6
  require 'regexp_parser/expression/sequence'
@@ -140,9 +8,12 @@ require 'regexp_parser/expression/sequence_operation'
140
8
 
141
9
  require 'regexp_parser/expression/classes/alternation'
142
10
  require 'regexp_parser/expression/classes/anchor'
143
- require 'regexp_parser/expression/classes/backref'
11
+ require 'regexp_parser/expression/classes/backreference'
12
+ require 'regexp_parser/expression/classes/character_set'
13
+ require 'regexp_parser/expression/classes/character_set/intersection'
14
+ require 'regexp_parser/expression/classes/character_set/range'
144
15
  require 'regexp_parser/expression/classes/conditional'
145
- require 'regexp_parser/expression/classes/escape'
16
+ require 'regexp_parser/expression/classes/escape_sequence'
146
17
  require 'regexp_parser/expression/classes/free_space'
147
18
  require 'regexp_parser/expression/classes/group'
148
19
  require 'regexp_parser/expression/classes/keep'
@@ -150,9 +21,6 @@ require 'regexp_parser/expression/classes/literal'
150
21
  require 'regexp_parser/expression/classes/posix_class'
151
22
  require 'regexp_parser/expression/classes/property'
152
23
  require 'regexp_parser/expression/classes/root'
153
- require 'regexp_parser/expression/classes/set'
154
- require 'regexp_parser/expression/classes/set/intersection'
155
- require 'regexp_parser/expression/classes/set/range'
156
24
  require 'regexp_parser/expression/classes/type'
157
25
 
158
26
  require 'regexp_parser/expression/methods/match'
@@ -4,18 +4,20 @@
4
4
  # given syntax flavor.
5
5
  class Regexp::Lexer
6
6
 
7
- OPENING_TOKENS = [
8
- :capture, :passive, :lookahead, :nlookahead, :lookbehind, :nlookbehind,
9
- :atomic, :options, :options_switch, :named, :absence
7
+ OPENING_TOKENS = %i[
8
+ capture passive lookahead nlookahead lookbehind nlookbehind
9
+ atomic options options_switch named absence
10
10
  ].freeze
11
11
 
12
- CLOSING_TOKENS = [:close].freeze
12
+ CLOSING_TOKENS = %i[close].freeze
13
13
 
14
- def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", &block)
15
- new.lex(input, syntax, &block)
14
+ CONDITION_TOKENS = %i[condition condition_close].freeze
15
+
16
+ def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
17
+ new.lex(input, syntax, options: options, &block)
16
18
  end
17
19
 
18
- def lex(input, syntax = "ruby/#{RUBY_VERSION}", &block)
20
+ def lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
19
21
  syntax = Regexp::Syntax.new(syntax)
20
22
 
21
23
  self.tokens = []
@@ -25,7 +27,7 @@ class Regexp::Lexer
25
27
  self.shift = 0
26
28
 
27
29
  last = nil
28
- Regexp::Scanner.scan(input) do |type, token, text, ts, te|
30
+ Regexp::Scanner.scan(input, options: options) do |type, token, text, ts, te|
29
31
  type, token = *syntax.normalize(type, token)
30
32
  syntax.check! type, token
31
33
 
@@ -40,7 +42,7 @@ class Regexp::Lexer
40
42
  nesting, set_nesting, conditional_nesting)
41
43
 
42
44
  current = merge_condition(current) if type == :conditional and
43
- [:condition, :condition_close].include?(token)
45
+ CONDITION_TOKENS.include?(token)
44
46
 
45
47
  last.next = current if last
46
48
  current.previous = last if last
@@ -96,10 +98,10 @@ class Regexp::Lexer
96
98
 
97
99
  tokens.pop
98
100
  tokens << Regexp::Token.new(:literal, :literal, lead,
99
- token.ts, (token.te - last.bytesize),
101
+ token.ts, (token.te - last.length),
100
102
  nesting, set_nesting, conditional_nesting)
101
103
  tokens << Regexp::Token.new(:literal, :literal, last,
102
- (token.ts + lead.bytesize), token.te,
104
+ (token.ts + lead.length), token.te,
103
105
  nesting, set_nesting, conditional_nesting)
104
106
  end
105
107