regexp_parser 1.7.0 → 2.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +8 -2
  3. data/LICENSE +1 -1
  4. data/Rakefile +6 -70
  5. data/lib/regexp_parser/error.rb +4 -0
  6. data/lib/regexp_parser/expression/base.rb +76 -0
  7. data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
  8. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  9. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +22 -2
  10. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +4 -8
  11. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +3 -4
  12. data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
  13. data/lib/regexp_parser/expression/classes/conditional.rb +11 -5
  14. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +15 -7
  15. data/lib/regexp_parser/expression/classes/free_space.rb +5 -5
  16. data/lib/regexp_parser/expression/classes/group.rb +28 -15
  17. data/lib/regexp_parser/expression/classes/keep.rb +2 -0
  18. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  19. data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
  20. data/lib/regexp_parser/expression/classes/root.rb +4 -19
  21. data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +5 -3
  22. data/lib/regexp_parser/expression/methods/construct.rb +41 -0
  23. data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
  24. data/lib/regexp_parser/expression/methods/match_length.rb +11 -7
  25. data/lib/regexp_parser/expression/methods/parts.rb +23 -0
  26. data/lib/regexp_parser/expression/methods/printing.rb +26 -0
  27. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  28. data/lib/regexp_parser/expression/methods/tests.rb +47 -1
  29. data/lib/regexp_parser/expression/methods/traverse.rb +34 -18
  30. data/lib/regexp_parser/expression/quantifier.rb +57 -17
  31. data/lib/regexp_parser/expression/sequence.rb +11 -47
  32. data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
  33. data/lib/regexp_parser/expression/shared.rb +111 -0
  34. data/lib/regexp_parser/expression/subexpression.rb +27 -19
  35. data/lib/regexp_parser/expression.rb +14 -141
  36. data/lib/regexp_parser/lexer.rb +83 -41
  37. data/lib/regexp_parser/parser.rb +371 -429
  38. data/lib/regexp_parser/scanner/char_type.rl +11 -11
  39. data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
  40. data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
  41. data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
  42. data/lib/regexp_parser/scanner/properties/long.csv +633 -0
  43. data/lib/regexp_parser/scanner/properties/short.csv +248 -0
  44. data/lib/regexp_parser/scanner/property.rl +4 -4
  45. data/lib/regexp_parser/scanner/scanner.rl +303 -368
  46. data/lib/regexp_parser/scanner.rb +1423 -1674
  47. data/lib/regexp_parser/syntax/any.rb +2 -7
  48. data/lib/regexp_parser/syntax/base.rb +92 -67
  49. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  50. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  51. data/lib/regexp_parser/syntax/token/backreference.rb +33 -0
  52. data/lib/regexp_parser/syntax/token/character_set.rb +16 -0
  53. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  54. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  55. data/lib/regexp_parser/syntax/token/escape.rb +33 -0
  56. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  57. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  58. data/lib/regexp_parser/syntax/token/meta.rb +20 -0
  59. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  60. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  61. data/lib/regexp_parser/syntax/token/unicode_property.rb +733 -0
  62. data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
  63. data/lib/regexp_parser/syntax/token.rb +45 -0
  64. data/lib/regexp_parser/syntax/version_lookup.rb +19 -36
  65. data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
  66. data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
  67. data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
  68. data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
  69. data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
  70. data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
  71. data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
  72. data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
  73. data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
  74. data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
  75. data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
  76. data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
  77. data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
  78. data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
  79. data/lib/regexp_parser/syntax/versions.rb +3 -1
  80. data/lib/regexp_parser/syntax.rb +8 -6
  81. data/lib/regexp_parser/token.rb +9 -20
  82. data/lib/regexp_parser/version.rb +1 -1
  83. data/lib/regexp_parser.rb +0 -2
  84. data/regexp_parser.gemspec +19 -23
  85. metadata +52 -171
  86. data/CHANGELOG.md +0 -349
  87. data/README.md +0 -470
  88. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  89. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  90. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  91. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  92. data/lib/regexp_parser/syntax/tokens/character_set.rb +0 -13
  93. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  94. data/lib/regexp_parser/syntax/tokens/meta.rb +0 -13
  95. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  96. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  97. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  98. data/spec/expression/base_spec.rb +0 -94
  99. data/spec/expression/clone_spec.rb +0 -120
  100. data/spec/expression/conditional_spec.rb +0 -89
  101. data/spec/expression/free_space_spec.rb +0 -27
  102. data/spec/expression/methods/match_length_spec.rb +0 -161
  103. data/spec/expression/methods/match_spec.rb +0 -25
  104. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  105. data/spec/expression/methods/tests_spec.rb +0 -99
  106. data/spec/expression/methods/traverse_spec.rb +0 -161
  107. data/spec/expression/options_spec.rb +0 -128
  108. data/spec/expression/root_spec.rb +0 -9
  109. data/spec/expression/sequence_spec.rb +0 -9
  110. data/spec/expression/subexpression_spec.rb +0 -50
  111. data/spec/expression/to_h_spec.rb +0 -26
  112. data/spec/expression/to_s_spec.rb +0 -100
  113. data/spec/lexer/all_spec.rb +0 -22
  114. data/spec/lexer/conditionals_spec.rb +0 -53
  115. data/spec/lexer/escapes_spec.rb +0 -14
  116. data/spec/lexer/keep_spec.rb +0 -10
  117. data/spec/lexer/literals_spec.rb +0 -89
  118. data/spec/lexer/nesting_spec.rb +0 -99
  119. data/spec/lexer/refcalls_spec.rb +0 -55
  120. data/spec/parser/all_spec.rb +0 -43
  121. data/spec/parser/alternation_spec.rb +0 -88
  122. data/spec/parser/anchors_spec.rb +0 -17
  123. data/spec/parser/conditionals_spec.rb +0 -179
  124. data/spec/parser/errors_spec.rb +0 -30
  125. data/spec/parser/escapes_spec.rb +0 -121
  126. data/spec/parser/free_space_spec.rb +0 -130
  127. data/spec/parser/groups_spec.rb +0 -108
  128. data/spec/parser/keep_spec.rb +0 -6
  129. data/spec/parser/posix_classes_spec.rb +0 -8
  130. data/spec/parser/properties_spec.rb +0 -115
  131. data/spec/parser/quantifiers_spec.rb +0 -51
  132. data/spec/parser/refcalls_spec.rb +0 -112
  133. data/spec/parser/set/intersections_spec.rb +0 -127
  134. data/spec/parser/set/ranges_spec.rb +0 -111
  135. data/spec/parser/sets_spec.rb +0 -178
  136. data/spec/parser/types_spec.rb +0 -18
  137. data/spec/scanner/all_spec.rb +0 -18
  138. data/spec/scanner/anchors_spec.rb +0 -21
  139. data/spec/scanner/conditionals_spec.rb +0 -128
  140. data/spec/scanner/errors_spec.rb +0 -68
  141. data/spec/scanner/escapes_spec.rb +0 -53
  142. data/spec/scanner/free_space_spec.rb +0 -133
  143. data/spec/scanner/groups_spec.rb +0 -52
  144. data/spec/scanner/keep_spec.rb +0 -10
  145. data/spec/scanner/literals_spec.rb +0 -49
  146. data/spec/scanner/meta_spec.rb +0 -18
  147. data/spec/scanner/properties_spec.rb +0 -64
  148. data/spec/scanner/quantifiers_spec.rb +0 -20
  149. data/spec/scanner/refcalls_spec.rb +0 -36
  150. data/spec/scanner/sets_spec.rb +0 -102
  151. data/spec/scanner/types_spec.rb +0 -14
  152. data/spec/spec_helper.rb +0 -15
  153. data/spec/support/runner.rb +0 -42
  154. data/spec/support/shared_examples.rb +0 -77
  155. data/spec/support/warning_extractor.rb +0 -60
  156. data/spec/syntax/syntax_spec.rb +0 -48
  157. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  158. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  159. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  160. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  161. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  162. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  163. data/spec/syntax/versions/aliases_spec.rb +0 -37
  164. data/spec/token/token_spec.rb +0 -85
  165. /data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
@@ -1,138 +1,7 @@
1
- module Regexp::Expression
2
-
3
- class Base
4
- attr_accessor :type, :token
5
- attr_accessor :text, :ts
6
- attr_accessor :level, :set_level, :conditional_level, :nesting_level
7
-
8
- attr_accessor :quantifier
9
- attr_accessor :options
10
-
11
- def initialize(token, options = {})
12
- self.type = token.type
13
- self.token = token.token
14
- self.text = token.text
15
- self.ts = token.ts
16
- self.level = token.level
17
- self.set_level = token.set_level
18
- self.conditional_level = token.conditional_level
19
- self.nesting_level = 0
20
- self.quantifier = nil
21
- self.options = options
22
- end
23
-
24
- def initialize_clone(orig)
25
- self.text = (orig.text ? orig.text.dup : nil)
26
- self.options = (orig.options ? orig.options.dup : nil)
27
- self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
28
- super
29
- end
30
-
31
- def to_re(format = :full)
32
- ::Regexp.new(to_s(format))
33
- end
34
-
35
- alias :starts_at :ts
36
-
37
- def full_length
38
- to_s.length
39
- end
40
-
41
- def offset
42
- [starts_at, full_length]
43
- end
44
-
45
- def coded_offset
46
- '@%d+%d' % offset
47
- end
48
-
49
- def to_s(format = :full)
50
- "#{text}#{quantifier_affix(format)}"
51
- end
52
-
53
- def quantifier_affix(expression_format)
54
- quantifier.to_s if quantified? && expression_format != :base
55
- end
56
-
57
- def terminal?
58
- !respond_to?(:expressions)
59
- end
60
-
61
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
62
- self.quantifier = Quantifier.new(token, text, min, max, mode)
63
- end
64
-
65
- def unquantified_clone
66
- clone.tap { |exp| exp.quantifier = nil }
67
- end
68
-
69
- def quantified?
70
- !quantifier.nil?
71
- end
72
-
73
- # Deprecated. Prefer `#repetitions` which has a more uniform interface.
74
- def quantity
75
- return [nil,nil] unless quantified?
76
- [quantifier.min, quantifier.max]
77
- end
78
-
79
- def repetitions
80
- return 1..1 unless quantified?
81
- min = quantifier.min
82
- max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
83
- # fix Range#minmax - https://bugs.ruby-lang.org/issues/15807
84
- (min..max).tap { |r| r.define_singleton_method(:minmax) { [min, max] } }
85
- end
86
-
87
- def greedy?
88
- quantified? and quantifier.greedy?
89
- end
90
-
91
- def reluctant?
92
- quantified? and quantifier.reluctant?
93
- end
94
- alias :lazy? :reluctant?
95
-
96
- def possessive?
97
- quantified? and quantifier.possessive?
98
- end
99
-
100
- def attributes
101
- {
102
- type: type,
103
- token: token,
104
- text: to_s(:base),
105
- starts_at: ts,
106
- length: full_length,
107
- level: level,
108
- set_level: set_level,
109
- conditional_level: conditional_level,
110
- options: options,
111
- quantifier: quantified? ? quantifier.to_h : nil,
112
- }
113
- end
114
- alias :to_h :attributes
115
- end
116
-
117
- def self.parsed(exp)
118
- warn('WARNING: Regexp::Expression::Base.parsed is buggy and '\
119
- 'will be removed in 2.0.0. Use Regexp::Parser.parse instead.')
120
- case exp
121
- when String
122
- Regexp::Parser.parse(exp)
123
- when Regexp
124
- Regexp::Parser.parse(exp.source) # <- causes loss of root options
125
- when Regexp::Expression # <- never triggers
126
- exp
127
- else
128
- raise ArgumentError, 'Expression.parsed accepts a String, Regexp, or '\
129
- 'a Regexp::Expression as a value for exp, but it '\
130
- "was given #{exp.class.name}."
131
- end
132
- end
133
-
134
- end # module Regexp::Expression
1
+ require 'regexp_parser/error'
135
2
 
3
+ require 'regexp_parser/expression/shared'
4
+ require 'regexp_parser/expression/base'
136
5
  require 'regexp_parser/expression/quantifier'
137
6
  require 'regexp_parser/expression/subexpression'
138
7
  require 'regexp_parser/expression/sequence'
@@ -140,24 +9,28 @@ require 'regexp_parser/expression/sequence_operation'
140
9
 
141
10
  require 'regexp_parser/expression/classes/alternation'
142
11
  require 'regexp_parser/expression/classes/anchor'
143
- require 'regexp_parser/expression/classes/backref'
12
+ require 'regexp_parser/expression/classes/backreference'
13
+ require 'regexp_parser/expression/classes/character_set'
14
+ require 'regexp_parser/expression/classes/character_set/intersection'
15
+ require 'regexp_parser/expression/classes/character_set/range'
16
+ require 'regexp_parser/expression/classes/character_type'
144
17
  require 'regexp_parser/expression/classes/conditional'
145
- require 'regexp_parser/expression/classes/escape'
18
+ require 'regexp_parser/expression/classes/escape_sequence'
146
19
  require 'regexp_parser/expression/classes/free_space'
147
20
  require 'regexp_parser/expression/classes/group'
148
21
  require 'regexp_parser/expression/classes/keep'
149
22
  require 'regexp_parser/expression/classes/literal'
150
23
  require 'regexp_parser/expression/classes/posix_class'
151
- require 'regexp_parser/expression/classes/property'
152
24
  require 'regexp_parser/expression/classes/root'
153
- require 'regexp_parser/expression/classes/set'
154
- require 'regexp_parser/expression/classes/set/intersection'
155
- require 'regexp_parser/expression/classes/set/range'
156
- require 'regexp_parser/expression/classes/type'
25
+ require 'regexp_parser/expression/classes/unicode_property'
157
26
 
27
+ require 'regexp_parser/expression/methods/construct'
28
+ require 'regexp_parser/expression/methods/human_name'
158
29
  require 'regexp_parser/expression/methods/match'
159
30
  require 'regexp_parser/expression/methods/match_length'
160
31
  require 'regexp_parser/expression/methods/options'
32
+ require 'regexp_parser/expression/methods/parts'
33
+ require 'regexp_parser/expression/methods/printing'
161
34
  require 'regexp_parser/expression/methods/strfregexp'
162
35
  require 'regexp_parser/expression/methods/tests'
163
36
  require 'regexp_parser/expression/methods/traverse'
@@ -4,57 +4,77 @@
4
4
  # given syntax flavor.
5
5
  class Regexp::Lexer
6
6
 
7
- OPENING_TOKENS = [
8
- :capture, :passive, :lookahead, :nlookahead, :lookbehind, :nlookbehind,
9
- :atomic, :options, :options_switch, :named, :absence
7
+ OPENING_TOKENS = %i[
8
+ capture passive lookahead nlookahead lookbehind nlookbehind
9
+ atomic options options_switch named absence open
10
10
  ].freeze
11
11
 
12
- CLOSING_TOKENS = [:close].freeze
12
+ CLOSING_TOKENS = %i[close].freeze
13
13
 
14
- def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", &block)
15
- new.lex(input, syntax, &block)
14
+ CONDITION_TOKENS = %i[condition condition_close].freeze
15
+
16
+ def self.lex(input, syntax = nil, options: nil, collect_tokens: true, &block)
17
+ new.lex(input, syntax, options: options, collect_tokens: collect_tokens, &block)
16
18
  end
17
19
 
18
- def lex(input, syntax = "ruby/#{RUBY_VERSION}", &block)
19
- syntax = Regexp::Syntax.new(syntax)
20
+ def lex(input, syntax = nil, options: nil, collect_tokens: true, &block)
21
+ syntax = syntax ? Regexp::Syntax.for(syntax) : Regexp::Syntax::CURRENT
20
22
 
23
+ self.block = block
24
+ self.collect_tokens = collect_tokens
21
25
  self.tokens = []
26
+ self.prev_token = nil
27
+ self.preprev_token = nil
22
28
  self.nesting = 0
23
29
  self.set_nesting = 0
24
30
  self.conditional_nesting = 0
25
31
  self.shift = 0
26
32
 
27
- last = nil
28
- Regexp::Scanner.scan(input) do |type, token, text, ts, te|
33
+ Regexp::Scanner.scan(input, options: options, collect_tokens: false) do |type, token, text, ts, te|
29
34
  type, token = *syntax.normalize(type, token)
30
35
  syntax.check! type, token
31
36
 
32
37
  ascend(type, token)
33
38
 
34
- if type == :quantifier and last
35
- break_literal(last) if last.type == :literal
36
- break_codepoint_list(last) if last.token == :codepoint_list
39
+ if (last = prev_token) &&
40
+ type == :quantifier &&
41
+ (
42
+ (last.type == :literal && (parts = break_literal(last))) ||
43
+ (last.token == :codepoint_list && (parts = break_codepoint_list(last)))
44
+ )
45
+ emit(parts[0])
46
+ last = parts[1]
37
47
  end
38
48
 
39
49
  current = Regexp::Token.new(type, token, text, ts + shift, te + shift,
40
50
  nesting, set_nesting, conditional_nesting)
41
51
 
42
- current = merge_condition(current) if type == :conditional and
43
- [:condition, :condition_close].include?(token)
44
-
45
- last.next = current if last
46
- current.previous = last if last
52
+ if type == :conditional && CONDITION_TOKENS.include?(token)
53
+ current = merge_condition(current, last)
54
+ elsif last
55
+ last.next = current
56
+ current.previous = last
57
+ emit(last)
58
+ end
47
59
 
48
- tokens << current
49
- last = current
60
+ self.preprev_token = last
61
+ self.prev_token = current
50
62
 
51
63
  descend(type, token)
52
64
  end
53
65
 
54
- if block_given?
55
- tokens.map { |t| block.call(t) }
66
+ emit(prev_token) if prev_token
67
+
68
+ collect_tokens ? tokens : nil
69
+ end
70
+
71
+ def emit(token)
72
+ if block
73
+ # TODO: in v3.0.0, remove `collect_tokens:` kwarg and only collect w/o block
74
+ res = block.call(token)
75
+ tokens << res if collect_tokens
56
76
  else
57
- tokens
77
+ tokens << token
58
78
  end
59
79
  end
60
80
 
@@ -64,27 +84,37 @@ class Regexp::Lexer
64
84
 
65
85
  private
66
86
 
67
- attr_accessor :tokens, :nesting, :set_nesting, :conditional_nesting, :shift
87
+ attr_accessor :block,
88
+ :collect_tokens, :tokens, :prev_token, :preprev_token,
89
+ :nesting, :set_nesting, :conditional_nesting, :shift
68
90
 
69
91
  def ascend(type, token)
92
+ return unless CLOSING_TOKENS.include?(token)
93
+
70
94
  case type
71
95
  when :group, :assertion
72
- self.nesting = nesting - 1 if CLOSING_TOKENS.include?(token)
96
+ self.nesting = nesting - 1
73
97
  when :set
74
- self.set_nesting = set_nesting - 1 if token == :close
98
+ self.set_nesting = set_nesting - 1
75
99
  when :conditional
76
- self.conditional_nesting = conditional_nesting - 1 if token == :close
100
+ self.conditional_nesting = conditional_nesting - 1
101
+ else
102
+ raise "unhandled nesting type #{type}"
77
103
  end
78
104
  end
79
105
 
80
106
  def descend(type, token)
107
+ return unless OPENING_TOKENS.include?(token)
108
+
81
109
  case type
82
110
  when :group, :assertion
83
- self.nesting = nesting + 1 if OPENING_TOKENS.include?(token)
111
+ self.nesting = nesting + 1
84
112
  when :set
85
- self.set_nesting = set_nesting + 1 if token == :open
113
+ self.set_nesting = set_nesting + 1
86
114
  when :conditional
87
- self.conditional_nesting = conditional_nesting + 1 if token == :open
115
+ self.conditional_nesting = conditional_nesting + 1
116
+ else
117
+ raise "unhandled nesting type #{type}"
88
118
  end
89
119
  end
90
120
 
@@ -94,34 +124,46 @@ class Regexp::Lexer
94
124
  lead, last, _ = token.text.partition(/.\z/mu)
95
125
  return if lead.empty?
96
126
 
97
- tokens.pop
98
- tokens << Regexp::Token.new(:literal, :literal, lead,
99
- token.ts, (token.te - last.bytesize),
127
+ token_1 = Regexp::Token.new(:literal, :literal, lead,
128
+ token.ts, (token.te - last.length),
100
129
  nesting, set_nesting, conditional_nesting)
101
- tokens << Regexp::Token.new(:literal, :literal, last,
102
- (token.ts + lead.bytesize), token.te,
130
+ token_2 = Regexp::Token.new(:literal, :literal, last,
131
+ (token.ts + lead.length), token.te,
103
132
  nesting, set_nesting, conditional_nesting)
133
+
134
+ token_1.previous = preprev_token
135
+ token_1.next = token_2
136
+ token_2.previous = token_1 # .next will be set by #lex
137
+ [token_1, token_2]
104
138
  end
105
139
 
140
+ # if a codepoint list is followed by a quantifier, that quantifier applies
141
+ # to the last codepoint, e.g. /\u{61 62 63}{3}/ =~ 'abccc'
142
+ # c.f. #break_literal.
106
143
  def break_codepoint_list(token)
107
144
  lead, _, tail = token.text.rpartition(' ')
108
145
  return if lead.empty?
109
146
 
110
- tokens.pop
111
- tokens << Regexp::Token.new(:escape, :codepoint_list, lead + '}',
147
+ token_1 = Regexp::Token.new(:escape, :codepoint_list, lead + '}',
112
148
  token.ts, (token.te - tail.length),
113
149
  nesting, set_nesting, conditional_nesting)
114
- tokens << Regexp::Token.new(:escape, :codepoint_list, '\u{' + tail,
150
+ token_2 = Regexp::Token.new(:escape, :codepoint_list, '\u{' + tail,
115
151
  (token.ts + lead.length + 1), (token.te + 3),
116
152
  nesting, set_nesting, conditional_nesting)
117
153
 
118
154
  self.shift = shift + 3 # one space less, but extra \, u, {, and }
155
+
156
+ token_1.previous = preprev_token
157
+ token_1.next = token_2
158
+ token_2.previous = token_1 # .next will be set by #lex
159
+ [token_1, token_2]
119
160
  end
120
161
 
121
- def merge_condition(current)
122
- last = tokens.pop
123
- Regexp::Token.new(:conditional, :condition, last.text + current.text,
162
+ def merge_condition(current, last)
163
+ token = Regexp::Token.new(:conditional, :condition, last.text + current.text,
124
164
  last.ts, current.te, nesting, set_nesting, conditional_nesting)
165
+ token.previous = preprev_token # .next will be set by #lex
166
+ token
125
167
  end
126
168
 
127
169
  end # module Regexp::Lexer