regexp_parser 2.6.2 → 2.7.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 66568005494b517613155277c6be4731eb8a26bb9b48a692a9430507286ce583
4
- data.tar.gz: d1fc6c6f1a0c7f939c51703ac844c2dbb134f96e0e55780646cb7e3e87d7a652
3
+ metadata.gz: 04af46818e9d560362fea9b3fd24802b557ac145ed95f6e02580dd7cf5e8ddfc
4
+ data.tar.gz: 75b7d30241f48ddf90c8cd68228fa928904ab6055ea755f4bdcf28361e645a4b
5
5
  SHA512:
6
- metadata.gz: b955b2215b71c94497e52841142fab8c2b9930d0d6cea6ea2b3eeb8ed9fe84575e2f34aae3a6051af2b56429f98cf070b9151805f2cb93ddb511ec1e0e50dd7c
7
- data.tar.gz: 3a4f083942b66ddb4b67ab33f14bb1c0b724a60c2b30605059d32ce3648e9cb46e31e797b7a526a2028c1e018d73365f5ef955256de4e63397d6ea105714ff12
6
+ metadata.gz: 407025a9b14af76463260fca2a48f9fef4ab863e3dddf3f7f54101c1348611afa49d9973e850d9e1c84d6e5faf8f1a9d3d2da5dceaefe8dc4fefe7069ecd9280
7
+ data.tar.gz: 9f3d2eb4264318511a82e9034c4c4a8a8e73e67e427945f0c9f745fd37b2f2f0ae8e30ba942f0920da3109b59436a5518dfc5e2f7669317de0214a0deb6f0e07
data/CHANGELOG.md CHANGED
@@ -5,7 +5,22 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
- ## [Unreleased]
8
+ ## [2.7.0] - 2023-02-08 - [Janosch Müller](mailto:janosch84@gmail.com)
9
+
10
+ ### Added
11
+
12
+ - `Regexp::Lexer.lex` now streams tokens when called with a block
13
+ - it can now take arbitrarily large input, just like `Regexp::Scanner`
14
+ - this also slightly improves `Regexp::Parser.parse` performance
15
+ - note: `Regexp::Parser.parse` still does not and will not support streaming
16
+ - improved performance of `Subexpression#each_expression`
17
+ - minor improvements to `Regexp::Scanner` performance
18
+ - overall improvement of parse performance: about 10% for large Regexps
19
+
20
+ ### Fixed
21
+
22
+ - parsing of octal escape sequences in sets, e.g. `[\141]`
23
+ * thanks to [Randy Stauner](https://github.com/rwstauner) for the report
9
24
 
10
25
  ## [2.6.2] - 2023-01-19 - [Janosch Müller](mailto:janosch84@gmail.com)
11
26
 
@@ -20,6 +20,10 @@ module Regexp::Expression
20
20
 
21
21
  super
22
22
  end
23
+
24
+ def referential?
25
+ true
26
+ end
23
27
  end
24
28
 
25
29
  class Number < Backreference::Base
@@ -20,6 +20,10 @@ module Regexp::Expression
20
20
  self.referenced_expression = orig.referenced_expression.dup
21
21
  super
22
22
  end
23
+
24
+ def referential?
25
+ true
26
+ end
23
27
  end
24
28
 
25
29
  class Branch < Regexp::Expression::Sequence; end
@@ -55,6 +59,10 @@ module Regexp::Expression
55
59
  condition.reference
56
60
  end
57
61
 
62
+ def referential?
63
+ true
64
+ end
65
+
58
66
  def parts
59
67
  [text.dup, condition, *intersperse(branches, '|'), ')']
60
68
  end
@@ -36,11 +36,14 @@ module Regexp::Expression
36
36
 
37
37
  # Iterates over the expressions of this expression as an array, passing
38
38
  # the expression and its index within its parent to the given block.
39
- def each_expression(include_self = false)
39
+ def each_expression(include_self = false, &block)
40
40
  return enum_for(__method__, include_self) unless block_given?
41
41
 
42
- traverse(include_self) do |event, exp, index|
43
- yield(exp, index) unless event == :exit
42
+ block.call(self, 0) if include_self
43
+
44
+ each_with_index do |exp, index|
45
+ block.call(exp, index)
46
+ exp.each_expression(&block) unless exp.terminal?
44
47
  end
45
48
  end
46
49
 
@@ -13,7 +13,6 @@ module Regexp::Expression
13
13
  set_level: exp.set_level,
14
14
  conditional_level: params[:conditional_level] || exp.conditional_level,
15
15
  )
16
- sequence.nesting_level = exp.nesting_level + 1
17
16
  sequence.options = active_opts
18
17
  exp.expressions << sequence
19
18
  sequence
@@ -77,7 +77,11 @@ module Regexp::Expression
77
77
  end
78
78
 
79
79
  def terminal?
80
- !respond_to?(:expressions)
80
+ true # overridden to be false in Expression::Subexpression
81
+ end
82
+
83
+ def referential?
84
+ false # overridden to be true e.g. in Expression::Backreference::Base
81
85
  end
82
86
 
83
87
  def nesting_level=(lvl)
@@ -19,7 +19,6 @@ module Regexp::Expression
19
19
  if exp.is_a?(WhiteSpace) && last && last.is_a?(WhiteSpace)
20
20
  last.merge(exp)
21
21
  else
22
- exp.nesting_level = nesting_level + 1
23
22
  expressions << exp
24
23
  end
25
24
  end
@@ -53,6 +52,10 @@ module Regexp::Expression
53
52
  )
54
53
  end
55
54
 
55
+ def terminal?
56
+ false
57
+ end
58
+
56
59
  private
57
60
 
58
61
  def intersperse(expressions, separator)
@@ -13,50 +13,68 @@ class Regexp::Lexer
13
13
 
14
14
  CONDITION_TOKENS = %i[condition condition_close].freeze
15
15
 
16
- def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
17
- new.lex(input, syntax, options: options, &block)
16
+ def self.lex(input, syntax = nil, options: nil, collect_tokens: true, &block)
17
+ new.lex(input, syntax, options: options, collect_tokens: collect_tokens, &block)
18
18
  end
19
19
 
20
- def lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
21
- syntax = Regexp::Syntax.for(syntax)
20
+ def lex(input, syntax = nil, options: nil, collect_tokens: true, &block)
21
+ syntax = syntax ? Regexp::Syntax.for(syntax) : Regexp::Syntax::CURRENT
22
22
 
23
+ self.block = block
24
+ self.collect_tokens = collect_tokens
23
25
  self.tokens = []
26
+ self.prev_token = nil
27
+ self.preprev_token = nil
24
28
  self.nesting = 0
25
29
  self.set_nesting = 0
26
30
  self.conditional_nesting = 0
27
31
  self.shift = 0
28
32
 
29
- last = nil
30
- Regexp::Scanner.scan(input, options: options) do |type, token, text, ts, te|
33
+ Regexp::Scanner.scan(input, options: options, collect_tokens: false) do |type, token, text, ts, te|
31
34
  type, token = *syntax.normalize(type, token)
32
35
  syntax.check! type, token
33
36
 
34
37
  ascend(type, token)
35
38
 
36
- if type == :quantifier and last
37
- break_literal(last) if last.type == :literal
38
- break_codepoint_list(last) if last.token == :codepoint_list
39
+ if (last = prev_token) &&
40
+ type == :quantifier &&
41
+ (
42
+ (last.type == :literal && (parts = break_literal(last))) ||
43
+ (last.token == :codepoint_list && (parts = break_codepoint_list(last)))
44
+ )
45
+ emit(parts[0])
46
+ last = parts[1]
39
47
  end
40
48
 
41
49
  current = Regexp::Token.new(type, token, text, ts + shift, te + shift,
42
50
  nesting, set_nesting, conditional_nesting)
43
51
 
44
- current = merge_condition(current) if type == :conditional and
45
- CONDITION_TOKENS.include?(token)
46
-
47
- last.next = current if last
48
- current.previous = last if last
52
+ if type == :conditional && CONDITION_TOKENS.include?(token)
53
+ current = merge_condition(current, last)
54
+ elsif last
55
+ last.next = current
56
+ current.previous = last
57
+ emit(last)
58
+ end
49
59
 
50
- tokens << current
51
- last = current
60
+ self.preprev_token = last
61
+ self.prev_token = current
52
62
 
53
63
  descend(type, token)
54
64
  end
55
65
 
56
- if block_given?
57
- tokens.map { |t| block.call(t) }
66
+ emit(prev_token) if prev_token
67
+
68
+ collect_tokens ? tokens : nil
69
+ end
70
+
71
+ def emit(token)
72
+ if block
73
+ # TODO: in v3.0.0, remove `collect_tokens:` kwarg and only collect w/o block
74
+ res = block.call(token)
75
+ tokens << res if collect_tokens
58
76
  else
59
- tokens
77
+ tokens << token
60
78
  end
61
79
  end
62
80
 
@@ -66,7 +84,9 @@ class Regexp::Lexer
66
84
 
67
85
  private
68
86
 
69
- attr_accessor :tokens, :nesting, :set_nesting, :conditional_nesting, :shift
87
+ attr_accessor :block,
88
+ :collect_tokens, :tokens, :prev_token, :preprev_token,
89
+ :nesting, :set_nesting, :conditional_nesting, :shift
70
90
 
71
91
  def ascend(type, token)
72
92
  case type
@@ -96,34 +116,46 @@ class Regexp::Lexer
96
116
  lead, last, _ = token.text.partition(/.\z/mu)
97
117
  return if lead.empty?
98
118
 
99
- tokens.pop
100
- tokens << Regexp::Token.new(:literal, :literal, lead,
119
+ token_1 = Regexp::Token.new(:literal, :literal, lead,
101
120
  token.ts, (token.te - last.length),
102
121
  nesting, set_nesting, conditional_nesting)
103
- tokens << Regexp::Token.new(:literal, :literal, last,
122
+ token_2 = Regexp::Token.new(:literal, :literal, last,
104
123
  (token.ts + lead.length), token.te,
105
124
  nesting, set_nesting, conditional_nesting)
125
+
126
+ token_1.previous = preprev_token
127
+ token_1.next = token_2
128
+ token_2.previous = token_1 # .next will be set by #lex
129
+ [token_1, token_2]
106
130
  end
107
131
 
132
+ # if a codepoint list is followed by a quantifier, that quantifier applies
133
+ # to the last codepoint, e.g. /\u{61 62 63}{3}/ =~ 'abccc'
134
+ # c.f. #break_literal.
108
135
  def break_codepoint_list(token)
109
136
  lead, _, tail = token.text.rpartition(' ')
110
137
  return if lead.empty?
111
138
 
112
- tokens.pop
113
- tokens << Regexp::Token.new(:escape, :codepoint_list, lead + '}',
139
+ token_1 = Regexp::Token.new(:escape, :codepoint_list, lead + '}',
114
140
  token.ts, (token.te - tail.length),
115
141
  nesting, set_nesting, conditional_nesting)
116
- tokens << Regexp::Token.new(:escape, :codepoint_list, '\u{' + tail,
142
+ token_2 = Regexp::Token.new(:escape, :codepoint_list, '\u{' + tail,
117
143
  (token.ts + lead.length + 1), (token.te + 3),
118
144
  nesting, set_nesting, conditional_nesting)
119
145
 
120
146
  self.shift = shift + 3 # one space less, but extra \, u, {, and }
147
+
148
+ token_1.previous = preprev_token
149
+ token_1.next = token_2
150
+ token_2.previous = token_1 # .next will be set by #lex
151
+ [token_1, token_2]
121
152
  end
122
153
 
123
- def merge_condition(current)
124
- last = tokens.pop
125
- Regexp::Token.new(:conditional, :condition, last.text + current.text,
154
+ def merge_condition(current, last)
155
+ token = Regexp::Token.new(:conditional, :condition, last.text + current.text,
126
156
  last.ts, current.te, nesting, set_nesting, conditional_nesting)
157
+ token.previous = preprev_token # .next will be set by #lex
158
+ token
127
159
  end
128
160
 
129
161
  end # module Regexp::Lexer
@@ -18,11 +18,11 @@ class Regexp::Parser
18
18
  end
19
19
  end
20
20
 
21
- def self.parse(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
21
+ def self.parse(input, syntax = nil, options: nil, &block)
22
22
  new.parse(input, syntax, options: options, &block)
23
23
  end
24
24
 
25
- def parse(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
25
+ def parse(input, syntax = nil, options: nil, &block)
26
26
  root = Root.construct(options: extract_options(input, options))
27
27
 
28
28
  self.root = root
@@ -35,7 +35,7 @@ class Regexp::Parser
35
35
 
36
36
  self.captured_group_counts = Hash.new(0)
37
37
 
38
- Regexp::Lexer.scan(input, syntax, options: options) do |token|
38
+ Regexp::Lexer.scan(input, syntax, options: options, collect_tokens: false) do |token|
39
39
  parse_token(token)
40
40
  end
41
41
 
@@ -379,7 +379,7 @@ class Regexp::Parser
379
379
  end
380
380
 
381
381
  def sequence_operation(klass, token)
382
- unless node.is_a?(klass)
382
+ unless node.instance_of?(klass)
383
383
  operator = klass.new(token, active_opts)
384
384
  sequence = operator.add_sequence(active_opts)
385
385
  sequence.expressions = node.expressions
@@ -541,7 +541,7 @@ class Regexp::Parser
541
541
 
542
542
  def range(token)
543
543
  exp = CharacterSet::Range.new(token, active_opts)
544
- scope = node.last.is_a?(CharacterSet::IntersectedSequence) ? node.last : node
544
+ scope = node.last.instance_of?(CharacterSet::IntersectedSequence) ? node.last : node
545
545
  exp << scope.expressions.pop
546
546
  nest(exp)
547
547
  end
@@ -568,7 +568,7 @@ class Regexp::Parser
568
568
  end
569
569
 
570
570
  def close_completed_character_set_range
571
- decrease_nesting if node.is_a?(CharacterSet::Range) && node.complete?
571
+ decrease_nesting if node.instance_of?(CharacterSet::Range) && node.complete?
572
572
  end
573
573
 
574
574
  def active_opts
@@ -579,15 +579,16 @@ class Regexp::Parser
579
579
  # an instance of Backreference::Number, its #referenced_expression is set to
580
580
  # the instance of Group::Capture that it refers to via its number.
581
581
  def assign_referenced_expressions
582
- # find all referencable expressions
582
+ # find all referencable and refering expressions
583
583
  targets = { 0 => root }
584
+ referrers = []
584
585
  root.each_expression do |exp|
585
586
  exp.is_a?(Group::Capture) && targets[exp.identifier] = exp
587
+ referrers << exp if exp.referential?
586
588
  end
587
- # assign them to any refering expressions
588
- root.each_expression do |exp|
589
- next unless exp.respond_to?(:reference)
590
-
589
+ # assign reference expression to refering expressions
590
+ # (in a second iteration because there might be forward references)
591
+ referrers.each do |exp|
591
592
  exp.referenced_expression = targets[exp.reference] ||
592
593
  raise(ParserError, "Invalid reference: #{exp.reference}")
593
594
  end
@@ -17,7 +17,7 @@
17
17
  text = copy(data, ts-1, te)
18
18
  type = (text[1] == 'P') ^ (text[3] == '^') ? :nonproperty : :property
19
19
 
20
- name = data[ts+2..te-2].pack('c*').gsub(/[\^\s_\-]/, '').downcase
20
+ name = text[3..-2].gsub(/[\^\s_\-]/, '').downcase
21
21
 
22
22
  token = self.class.short_prop_map[name] || self.class.long_prop_map[name]
23
23
  validation_error(:property, name) unless token
@@ -59,9 +59,6 @@
59
59
  one_or_more = '+' | '+?' | '++';
60
60
 
61
61
  quantifier_greedy = '?' | '*' | '+';
62
- quantifier_reluctant = '??' | '*?' | '+?';
63
- quantifier_possessive = '?+' | '*+' | '++';
64
- quantifier_mode = '?' | '+';
65
62
 
66
63
  quantity_exact = (digit+);
67
64
  quantity_minimum = (digit+) . ',';
@@ -70,9 +67,6 @@
70
67
  quantifier_interval = range_open . ( quantity_exact | quantity_minimum |
71
68
  quantity_maximum | quantity_range ) . range_close;
72
69
 
73
- quantifiers = quantifier_greedy | quantifier_reluctant |
74
- quantifier_possessive | quantifier_interval;
75
-
76
70
  conditional = '(?(';
77
71
 
78
72
  group_comment = '?#' . [^)]* . group_close;
@@ -132,7 +126,8 @@
132
126
  keep_mark | sequence_char;
133
127
 
134
128
  # escapes that also work within a character set
135
- set_escape = backslash | brackets | escaped_ascii | property_char |
129
+ set_escape = backslash | brackets | escaped_ascii |
130
+ octal_sequence | property_char |
136
131
  sequence_char | single_codepoint_char_type;
137
132
 
138
133
 
@@ -168,8 +163,8 @@
168
163
  };
169
164
 
170
165
  '-]' @set_closed { # special case, emits two tokens
171
- emit(:literal, :literal, copy(data, ts, te-1))
172
- emit(:set, :close, copy(data, ts+1, te))
166
+ emit(:literal, :literal, '-')
167
+ emit(:set, :close, ']')
173
168
  if in_set?
174
169
  fret;
175
170
  else
@@ -183,28 +178,27 @@
183
178
  };
184
179
 
185
180
  '^' {
186
- text = copy(data, ts, te)
187
- if tokens.last[1] == :open
188
- emit(:set, :negate, text)
181
+ if prev_token[1] == :open
182
+ emit(:set, :negate, '^')
189
183
  else
190
- emit(:literal, :literal, text)
184
+ emit(:literal, :literal, '^')
191
185
  end
192
186
  };
193
187
 
194
188
  '-' {
195
- text = copy(data, ts, te)
196
- # ranges cant start with a subset or intersection/negation/range operator
197
- if tokens.last[0] == :set
198
- emit(:literal, :literal, text)
189
+ # ranges cant start with the opening bracket, a subset, or
190
+ # intersection/negation/range operators
191
+ if prev_token[0] == :set
192
+ emit(:literal, :literal, '-')
199
193
  else
200
- emit(:set, :range, text)
194
+ emit(:set, :range, '-')
201
195
  end
202
196
  };
203
197
 
204
198
  # Unlike ranges, intersections can start or end at set boundaries, whereupon
205
199
  # they match nothing: r = /[a&&]/; [r =~ ?a, r =~ ?&] # => [nil, nil]
206
200
  '&&' {
207
- emit(:set, :intersection, copy(data, ts, te))
201
+ emit(:set, :intersection, '&&')
208
202
  };
209
203
 
210
204
  backslash {
@@ -212,7 +206,7 @@
212
206
  };
213
207
 
214
208
  set_open >(open_bracket, 1) >set_opened {
215
- emit(:set, :open, copy(data, ts, te))
209
+ emit(:set, :open, '[')
216
210
  fcall character_set;
217
211
  };
218
212
 
@@ -254,12 +248,22 @@
254
248
  # set escapes scanner
255
249
  # --------------------------------------------------------------------------
256
250
  set_escape_sequence := |*
251
+ # Special case: in sets, octal sequences have higher priority than backrefs
252
+ octal_sequence {
253
+ emit(:escape, :octal, copy(data, ts-1, te))
254
+ fret;
255
+ };
256
+
257
+ # Scan all other escapes that work in sets with the generic escape scanner
257
258
  set_escape > (escaped_set_alpha, 2) {
258
259
  fhold;
259
260
  fnext character_set;
260
261
  fcall escape_sequence;
261
262
  };
262
263
 
264
+ # Treat all remaining escapes - those not supported in sets - as literal.
265
+ # (This currently includes \^, \-, \&, \:, although these could potentially
266
+ # be meta chars when not escaped, depending on their position in the set.)
263
267
  any > (escaped_set_alpha, 1) {
264
268
  emit(:escape, :literal, copy(data, ts-1, te))
265
269
  fret;
@@ -528,7 +532,7 @@
528
532
  group_close @group_closed {
529
533
  if conditional_stack.last == group_depth + 1
530
534
  conditional_stack.pop
531
- emit(:conditional, :close, copy(data, ts, te))
535
+ emit(:conditional, :close, ')')
532
536
  else
533
537
  if spacing_stack.length > 1 &&
534
538
  spacing_stack.last[:depth] == group_depth + 1
@@ -536,7 +540,7 @@
536
540
  self.free_spacing = spacing_stack.last[:free_spacing]
537
541
  end
538
542
 
539
- emit(:group, :close, copy(data, ts, te))
543
+ emit(:group, :close, ')')
540
544
  end
541
545
  };
542
546
 
@@ -717,23 +721,24 @@ class Regexp::Scanner
717
721
  #
718
722
  # This method may raise errors if a syntax error is encountered.
719
723
  # --------------------------------------------------------------------------
720
- def self.scan(input_object, options: nil, &block)
721
- new.scan(input_object, options: options, &block)
724
+ def self.scan(input_object, options: nil, collect_tokens: true, &block)
725
+ new.scan(input_object, options: options, collect_tokens: collect_tokens, &block)
722
726
  end
723
727
 
724
- def scan(input_object, options: nil, &block)
725
- self.literal = nil
728
+ def scan(input_object, options: nil, collect_tokens: true, &block)
729
+ self.collect_tokens = collect_tokens
730
+ self.literal_run = nil
726
731
  stack = []
727
732
 
728
733
  input = input_object.is_a?(Regexp) ? input_object.source : input_object
729
734
  self.free_spacing = free_spacing?(input_object, options)
730
735
  self.spacing_stack = [{:free_spacing => free_spacing, :depth => 0}]
731
736
 
732
- data = input.unpack("c*") if input.is_a?(String)
737
+ data = input.unpack("c*")
733
738
  eof = data.length
734
739
 
735
740
  self.tokens = []
736
- self.block = block_given? ? block : nil
741
+ self.block = block
737
742
 
738
743
  self.set_depth = 0
739
744
  self.group_depth = 0
@@ -758,7 +763,7 @@ class Regexp::Scanner
758
763
  "[#{set_depth}]") if in_set?
759
764
 
760
765
  # when the entire expression is a literal run
761
- emit_literal if literal
766
+ emit_literal if literal_run
762
767
 
763
768
  tokens
764
769
  end
@@ -785,26 +790,37 @@ class Regexp::Scanner
785
790
  def emit(type, token, text)
786
791
  #puts "EMIT: type: #{type}, token: #{token}, text: #{text}, ts: #{ts}, te: #{te}"
787
792
 
788
- emit_literal if literal
793
+ emit_literal if literal_run
789
794
 
790
795
  # Ragel runs with byte-based indices (ts, te). These are of little value to
791
796
  # end-users, so we keep track of char-based indices and emit those instead.
792
797
  ts_char_pos = char_pos
793
798
  te_char_pos = char_pos + text.length
794
799
 
795
- if block
796
- block.call type, token, text, ts_char_pos, te_char_pos
797
- end
800
+ tok = [type, token, text, ts_char_pos, te_char_pos]
798
801
 
799
- tokens << [type, token, text, ts_char_pos, te_char_pos]
802
+ self.prev_token = tok
800
803
 
801
804
  self.char_pos = te_char_pos
805
+
806
+ if block
807
+ block.call type, token, text, ts_char_pos, te_char_pos
808
+ # TODO: in v3.0.0, remove `collect_tokens:` kwarg and only collect if no block given
809
+ tokens << tok if collect_tokens
810
+ elsif collect_tokens
811
+ tokens << tok
812
+ end
802
813
  end
803
814
 
815
+ attr_accessor :literal_run # only public for #||= to work on ruby <= 2.5
816
+
804
817
  private
805
818
 
806
- attr_accessor :tokens, :literal, :block, :free_spacing, :spacing_stack,
807
- :group_depth, :set_depth, :conditional_stack, :char_pos
819
+ attr_accessor :block,
820
+ :collect_tokens, :tokens, :prev_token,
821
+ :free_spacing, :spacing_stack,
822
+ :group_depth, :set_depth, :conditional_stack,
823
+ :char_pos
808
824
 
809
825
  def free_spacing?(input_object, options)
810
826
  if options && !input_object.is_a?(String)
@@ -834,14 +850,13 @@ class Regexp::Scanner
834
850
  # Appends one or more characters to the literal buffer, to be emitted later
835
851
  # by a call to emit_literal.
836
852
  def append_literal(data, ts, te)
837
- self.literal = literal || []
838
- literal << copy(data, ts, te)
853
+ (self.literal_run ||= []) << copy(data, ts, te)
839
854
  end
840
855
 
841
856
  # Emits the literal run collected by calls to the append_literal method.
842
857
  def emit_literal
843
- text = literal.join
844
- self.literal = nil
858
+ text = literal_run.join
859
+ self.literal_run = nil
845
860
  emit(:literal, :literal, text)
846
861
  end
847
862