parser 2.0.0.pre2 → 2.0.0.pre3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -35,7 +35,7 @@ module Parser
35
35
  def decorate(range, token, info)
36
36
  from, to = range.begin.column, range.end.column
37
37
 
38
- line = range.source_line
38
+ line = range.source_line + ' '
39
39
  line[from...to] = "\e[4m#{line[from...to]}\e[0m"
40
40
 
41
41
  tail_len = to - from - 1
@@ -1,3 +1,5 @@
1
+ # encoding: utf-8
2
+
1
3
  module Parser
2
4
 
3
5
  class Lexer::Literal
@@ -36,6 +38,11 @@ module Parser
36
38
  @lexer = lexer
37
39
  @nesting = 1
38
40
 
41
+ # DELIMITERS and TYPES are hashes with keys encoded in binary.
42
+ # Coerce incoming data to the same encoding.
43
+ str_type = coerce_encoding(str_type)
44
+ delimiter = coerce_encoding(delimiter)
45
+
39
46
  unless TYPES.include?(str_type)
40
47
  message = ERRORS[:unexpected_percent_str] % { :type => str_type }
41
48
  lexer.send(:diagnostic, :error, message, @lexer.send(:range, str_s, str_s + 2))
@@ -46,11 +53,6 @@ module Parser
46
53
  # Start of the string type specifier.
47
54
  @str_s = str_s
48
55
 
49
- # Data buffer.
50
- @buffer = ''
51
- # Start of the current chunk in data buffer.
52
- @buffer_s = nil
53
-
54
56
  @start_tok, @interpolate = TYPES[str_type]
55
57
  @start_delim = DELIMITERS.include?(delimiter) ? delimiter : nil
56
58
  @end_delim = DELIMITERS.fetch(delimiter, delimiter)
@@ -73,6 +75,8 @@ module Parser
73
75
  @str_type << delimiter
74
76
  end
75
77
 
78
+ clear_buffer
79
+
76
80
  emit_start_tok unless @monolithic
77
81
  end
78
82
 
@@ -98,6 +102,8 @@ module Parser
98
102
  end
99
103
 
100
104
  def munge_escape?(character)
105
+ character = coerce_encoding(character)
106
+
101
107
  if words? && character =~ /[ \t\v\r\f\n]/
102
108
  true
103
109
  else
@@ -105,15 +111,9 @@ module Parser
105
111
  end
106
112
  end
107
113
 
108
- def delimiter?(delimiter)
109
- if @indent
110
- @end_delim == delimiter.lstrip
111
- else
112
- @end_delim == delimiter
113
- end
114
- end
115
-
116
114
  def nest_and_try_closing(delimiter, ts, te)
115
+ delimiter = coerce_encoding(delimiter)
116
+
117
117
  if @start_delim && @start_delim == delimiter
118
118
  @nesting += 1
119
119
  elsif delimiter?(delimiter)
@@ -157,7 +157,11 @@ module Parser
157
157
 
158
158
  @buffer_e = te
159
159
 
160
- @buffer << string
160
+ if defined?(Encoding)
161
+ @buffer << string.encode(@lexer.encoding)
162
+ else
163
+ @buffer << string
164
+ end
161
165
  end
162
166
 
163
167
  def flush_string
@@ -169,9 +173,7 @@ module Parser
169
173
  unless @buffer.empty?
170
174
  emit(:tSTRING_CONTENT, @buffer, @buffer_s, @buffer_e)
171
175
 
172
- @buffer = ''
173
- @buffer_s = nil
174
- @buffer_e = nil
176
+ clear_buffer
175
177
  extend_content
176
178
  end
177
179
  end
@@ -192,6 +194,36 @@ module Parser
192
194
 
193
195
  protected
194
196
 
197
+ def delimiter?(delimiter)
198
+ if @indent
199
+ @end_delim == delimiter.lstrip
200
+ else
201
+ @end_delim == delimiter
202
+ end
203
+ end
204
+
205
+ def coerce_encoding(string)
206
+ if defined?(Encoding)
207
+ string.encode(Encoding::UTF_8,
208
+ :invalid => :replace, :undef => :replace)
209
+ else
210
+ string
211
+ end
212
+ end
213
+
214
+ def clear_buffer
215
+ @buffer = ''
216
+
217
+ # Prime the buffer with lexer encoding; otherwise,
218
+ # concatenation will produce varying results.
219
+ if defined?(Encoding)
220
+ @buffer.force_encoding(@lexer.encoding)
221
+ end
222
+
223
+ @buffer_s = nil
224
+ @buffer_e = nil
225
+ end
226
+
195
227
  def emit_start_tok
196
228
  str_e = @heredoc_e || @str_s + @str_type.length
197
229
  emit(@start_tok, @str_type, @str_s, str_e)
@@ -40,6 +40,8 @@ module Parser
40
40
  # Keep in mind that {Parser::Rewriter} does not take care of indentation when
41
41
  # inserting/replacing code so you'll have to do this yourself.
42
42
  #
43
+ # @api public
44
+ #
43
45
  class Rewriter < Parser::AST::Processor
44
46
  ##
45
47
  # Rewrites the AST/source buffer and returns a String containing the new
@@ -418,19 +418,11 @@ rule
418
418
  }
419
419
  | primary_value tCOLON2 tCONSTANT
420
420
  {
421
- if in_def?
422
- diagnostic(:error, :dynamic_const, val[2])
423
- end
424
-
425
421
  result = @builder.assignable(
426
422
  @builder.const_fetch(val[0], val[1], val[2]))
427
423
  }
428
424
  | tCOLON3 tCONSTANT
429
425
  {
430
- if in_def?
431
- diagnostic(:error, :dynamic_const, val[1])
432
- end
433
-
434
426
  result = @builder.assignable(
435
427
  @builder.const_global(val[0], val[1]))
436
428
  }
@@ -461,19 +453,11 @@ rule
461
453
  }
462
454
  | primary_value tCOLON2 tCONSTANT
463
455
  {
464
- if in_def?
465
- diagnostic(:error, :dynamic_const, val[2])
466
- end
467
-
468
456
  result = @builder.assignable(
469
457
  @builder.const_fetch(val[0], val[1], val[2]))
470
458
  }
471
459
  | tCOLON3 tCONSTANT
472
460
  {
473
- if in_def?
474
- diagnostic(:error, :dynamic_const, val[1])
475
- end
476
-
477
461
  result = @builder.assignable(
478
462
  @builder.const_global(val[0], val[1]))
479
463
  }
@@ -1340,7 +1324,7 @@ rule
1340
1324
  }
1341
1325
  | tPIPE block_var tPIPE
1342
1326
  {
1343
- result = @builder.args(val[0], val[1], val[2])
1327
+ result = @builder.args(val[0], val[1], val[2], false)
1344
1328
  }
1345
1329
 
1346
1330
  do_block: kDO_BLOCK
@@ -473,19 +473,11 @@ rule
473
473
  }
474
474
  | primary_value tCOLON2 tCONSTANT
475
475
  {
476
- if in_def?
477
- diagnostic(:error, :dynamic_const, val[2])
478
- end
479
-
480
476
  result = @builder.assignable(
481
477
  @builder.const_fetch(val[0], val[1], val[2]))
482
478
  }
483
479
  | tCOLON3 tCONSTANT
484
480
  {
485
- if in_def?
486
- diagnostic(:error, :dynamic_const, val[1])
487
- end
488
-
489
481
  result = @builder.assignable(
490
482
  @builder.const_global(val[0], val[1]))
491
483
  }
@@ -520,19 +512,11 @@ rule
520
512
  }
521
513
  | primary_value tCOLON2 tCONSTANT
522
514
  {
523
- if in_def?
524
- diagnostic(:error, :dynamic_const, val[2])
525
- end
526
-
527
515
  result = @builder.assignable(
528
516
  @builder.const_fetch(val[0], val[1], val[2]))
529
517
  }
530
518
  | tCOLON3 tCONSTANT
531
519
  {
532
- if in_def?
533
- diagnostic(:error, :dynamic_const, val[1])
534
- end
535
-
536
520
  result = @builder.assignable(
537
521
  @builder.const_global(val[0], val[1]))
538
522
  }
@@ -1425,9 +1409,14 @@ rule
1425
1409
  }
1426
1410
  | f_bad_arg
1427
1411
 
1428
- lambda: f_larglist lambda_body
1412
+ lambda: {
1413
+ @static_env.extend_dynamic
1414
+ }
1415
+ f_larglist lambda_body
1429
1416
  {
1430
- result = [ val[0], val[1] ]
1417
+ result = [ val[1], val[2] ]
1418
+
1419
+ @static_env.unextend
1431
1420
  }
1432
1421
 
1433
1422
  f_larglist: tLPAREN2 f_args opt_bv_decl rparen
@@ -481,19 +481,11 @@ rule
481
481
  }
482
482
  | primary_value tCOLON2 tCONSTANT
483
483
  {
484
- if in_def?
485
- diagnostic(:error, :dynamic_const, val[2])
486
- end
487
-
488
484
  result = @builder.assignable(
489
485
  @builder.const_fetch(val[0], val[1], val[2]))
490
486
  }
491
487
  | tCOLON3 tCONSTANT
492
488
  {
493
- if in_def?
494
- diagnostic(:error, :dynamic_const, val[1])
495
- end
496
-
497
489
  result = @builder.assignable(
498
490
  @builder.const_global(val[0], val[1]))
499
491
  }
@@ -528,19 +520,11 @@ rule
528
520
  }
529
521
  | primary_value tCOLON2 tCONSTANT
530
522
  {
531
- if in_def?
532
- diagnostic(:error, :dynamic_const, val[2])
533
- end
534
-
535
523
  result = @builder.assignable(
536
524
  @builder.const_fetch(val[0], val[1], val[2]))
537
525
  }
538
526
  | tCOLON3 tCONSTANT
539
527
  {
540
- if in_def?
541
- diagnostic(:error, :dynamic_const, val[1])
542
- end
543
-
544
528
  result = @builder.assignable(
545
529
  @builder.const_global(val[0], val[1]))
546
530
  }
@@ -668,21 +652,13 @@ rule
668
652
  }
669
653
  | primary_value tCOLON2 tCONSTANT tOP_ASGN arg
670
654
  {
671
- if in_def?
672
- diagnostic(:error, :dynamic_const, val[2], [ val[3] ])
673
- end
674
-
675
- const = @builder.assignable(
655
+ const = @builder.const_op_assignable(
676
656
  @builder.const_fetch(val[0], val[1], val[2]))
677
657
  result = @builder.op_assign(const, val[3], val[4])
678
658
  }
679
659
  | tCOLON3 tCONSTANT tOP_ASGN arg
680
660
  {
681
- if in_def?
682
- diagnostic(:error, :dynamic_const, val[1], [ val[2] ])
683
- end
684
-
685
- const = @builder.assignable(
661
+ const = @builder.const_op_assignable(
686
662
  @builder.const_global(val[0], val[1]))
687
663
  result = @builder.op_assign(const, val[2], val[3])
688
664
  }
@@ -1486,9 +1462,14 @@ opt_block_args_tail:
1486
1462
  }
1487
1463
  | f_bad_arg
1488
1464
 
1489
- lambda: f_larglist lambda_body
1465
+ lambda: {
1466
+ @static_env.extend_dynamic
1467
+ }
1468
+ f_larglist lambda_body
1490
1469
  {
1491
- result = [ val[0], val[1] ]
1470
+ result = [ val[1], val[2] ]
1471
+
1472
+ @static_env.unextend
1492
1473
  }
1493
1474
 
1494
1475
  f_larglist: tLPAREN2 f_args opt_bv_decl tRPAREN
@@ -116,11 +116,7 @@ rule
116
116
  stmt_or_begin: stmt
117
117
  | klBEGIN tLCURLY top_compstmt tRCURLY
118
118
  {
119
- if in_def?
120
- diagnostic(:error, :begin_in_method, val[0])
121
- end
122
-
123
- result = @builder.preexe(val[0], val[1], val[2], val[3])
119
+ diagnostic(:error, :begin_in_method, val[0])
124
120
  }
125
121
 
126
122
  stmt: kALIAS fitem
@@ -476,19 +472,11 @@ rule
476
472
  }
477
473
  | primary_value tCOLON2 tCONSTANT
478
474
  {
479
- if in_def?
480
- diagnostic(:error, :dynamic_const, val[2])
481
- end
482
-
483
475
  result = @builder.assignable(
484
476
  @builder.const_fetch(val[0], val[1], val[2]))
485
477
  }
486
478
  | tCOLON3 tCONSTANT
487
479
  {
488
- if in_def?
489
- diagnostic(:error, :dynamic_const, val[1])
490
- end
491
-
492
480
  result = @builder.assignable(
493
481
  @builder.const_global(val[0], val[1]))
494
482
  }
@@ -523,19 +511,11 @@ rule
523
511
  }
524
512
  | primary_value tCOLON2 tCONSTANT
525
513
  {
526
- if in_def?
527
- diagnostic(:error, :dynamic_const, val[2])
528
- end
529
-
530
514
  result = @builder.assignable(
531
515
  @builder.const_fetch(val[0], val[1], val[2]))
532
516
  }
533
517
  | tCOLON3 tCONSTANT
534
518
  {
535
- if in_def?
536
- diagnostic(:error, :dynamic_const, val[1])
537
- end
538
-
539
519
  result = @builder.assignable(
540
520
  @builder.const_global(val[0], val[1]))
541
521
  }
@@ -663,21 +643,13 @@ rule
663
643
  }
664
644
  | primary_value tCOLON2 tCONSTANT tOP_ASGN arg
665
645
  {
666
- if in_def?
667
- diagnostic(:error, :dynamic_const, val[2], [ val[3] ])
668
- end
669
-
670
- const = @builder.assignable(
646
+ const = @builder.const_op_assignable(
671
647
  @builder.const_fetch(val[0], val[1], val[2]))
672
648
  result = @builder.op_assign(const, val[3], val[4])
673
649
  }
674
650
  | tCOLON3 tCONSTANT tOP_ASGN arg
675
651
  {
676
- if in_def?
677
- diagnostic(:error, :dynamic_const, val[1], [ val[2] ])
678
- end
679
-
680
- const = @builder.assignable(
652
+ const = @builder.const_op_assignable(
681
653
  @builder.const_global(val[0], val[1]))
682
654
  result = @builder.op_assign(const, val[2], val[3])
683
655
  }
@@ -1487,9 +1459,14 @@ opt_block_args_tail:
1487
1459
  }
1488
1460
  | f_bad_arg
1489
1461
 
1490
- lambda: f_larglist lambda_body
1462
+ lambda: {
1463
+ @static_env.extend_dynamic
1464
+ }
1465
+ f_larglist lambda_body
1491
1466
  {
1492
- result = [ val[0], val[1] ]
1467
+ result = [ val[1], val[2] ]
1468
+
1469
+ @static_env.unextend
1493
1470
  }
1494
1471
 
1495
1472
  f_larglist: tLPAREN2 f_args opt_bv_decl tRPAREN
@@ -2336,5 +2313,5 @@ warn "warning: Ruby 2.1 is not released yet and parser support may be incomplete
2336
2313
  end
2337
2314
 
2338
2315
  def default_encoding
2339
- Encoding::BINARY
2316
+ Encoding::UTF_8
2340
2317
  end
@@ -155,7 +155,12 @@ module Parser
155
155
  source.force_encoding(@parser.default_encoding)
156
156
 
157
157
  buffer = Parser::Source::Buffer.new(filename)
158
- buffer.source = source
158
+
159
+ if @parser.class.name == 'Parser::Ruby18'
160
+ buffer.raw_source = source
161
+ else
162
+ buffer.source = source
163
+ end
159
164
 
160
165
  process_buffer(buffer)
161
166
  end
@@ -3,20 +3,32 @@
3
3
  module Parser
4
4
  module Source
5
5
 
6
+ ##
7
+ # @api public
8
+ #
6
9
  class Buffer
7
10
  attr_reader :name, :first_line
8
11
 
12
+ ENCODING_RE =
13
+ /\#.*coding\s*[:=]\s*
14
+ (
15
+ # Special-case: there's a UTF8-MAC encoding.
16
+ (utf8-mac)
17
+ |
18
+ # Chew the suffix; it's there for emacs compat.
19
+ ([A-Za-z0-9_-]+?)(-unix|-dos|-mac)
20
+ |
21
+ ([A-Za-z0-9_-]+)
22
+ )
23
+ /x
24
+
9
25
  def self.recognize_encoding(string)
10
26
  return if string.empty?
11
27
 
12
28
  # extract the first two lines in an efficient way
13
- string =~ /(.*)\n?(.*\n)?/
29
+ string =~ /\A(.*)\n?(.*\n)?/
14
30
  first_line, second_line = $1, $2
15
31
 
16
- [first_line, second_line].each do |line|
17
- line.force_encoding(Encoding::ASCII_8BIT) if line
18
- end
19
-
20
32
  if first_line =~ /\A\xef\xbb\xbf/ # BOM
21
33
  return Encoding::UTF_8
22
34
  elsif first_line[0, 2] == '#!'
@@ -25,8 +37,8 @@ module Parser
25
37
  encoding_line = first_line
26
38
  end
27
39
 
28
- if encoding_line =~ /^#.*coding\s*[:=]\s*([A-Za-z0-9_-]+)/
29
- Encoding.find($1)
40
+ if (result = ENCODING_RE.match(encoding_line))
41
+ Encoding.find(result[2] || result[3] || result[5])
30
42
  else
31
43
  nil
32
44
  end
@@ -37,15 +49,16 @@ module Parser
37
49
  # string.
38
50
  #
39
51
  def self.reencode_string(string)
40
- encoding = recognize_encoding(string)
52
+ original_encoding = string.encoding
53
+ detected_encoding = recognize_encoding(string.force_encoding(Encoding::BINARY))
41
54
 
42
- if encoding.nil?
55
+ if detected_encoding.nil?
56
+ string.force_encoding(original_encoding)
57
+ elsif detected_encoding == Encoding::BINARY
43
58
  string
44
- elsif encoding == Encoding::BINARY
45
- string.force_encoding(Encoding::BINARY)
46
59
  else
47
60
  string.
48
- force_encoding(encoding).
61
+ force_encoding(detected_encoding).
49
62
  encode(Encoding::UTF_8)
50
63
  end
51
64
  end
@@ -76,16 +89,20 @@ module Parser
76
89
  end
77
90
 
78
91
  def source=(source)
79
- if @source
80
- raise ArgumentError, 'Source::Buffer is immutable'
81
- end
82
-
83
- if source.respond_to? :encoding
92
+ if defined?(Encoding)
84
93
  source = source.dup if source.frozen?
85
94
  source = self.class.reencode_string(source)
86
95
  end
87
96
 
88
- @source = source.freeze
97
+ self.raw_source = source
98
+ end
99
+
100
+ def raw_source=(source)
101
+ if @source
102
+ raise ArgumentError, 'Source::Buffer is immutable'
103
+ end
104
+
105
+ @source = source.gsub(/\r\n/, "\n").freeze
89
106
  end
90
107
 
91
108
  def decompose_position(position)
@@ -94,12 +111,18 @@ module Parser
94
111
  [ @first_line + line_no, position - line_begin ]
95
112
  end
96
113
 
97
- def source_line(line)
114
+ def source_line(lineno)
98
115
  unless @lines
99
- @lines = @source.lines.map(&:chomp)
116
+ @lines = @source.lines.to_a
117
+ @lines.each { |line| line.gsub!(/\n$/, '') }
118
+
119
+ # Lexer has an "infinite stream of EOF symbols" after the
120
+ # actual EOF, so in some cases (e.g. EOF token of ruby-parse -E)
121
+ # tokens will refer to one line past EOF.
122
+ @lines << ""
100
123
  end
101
124
 
102
- @lines[line - @first_line].dup
125
+ @lines[lineno - @first_line].dup
103
126
  end
104
127
 
105
128
  private