parser 2.0.0.pre2 → 2.0.0.pre3

Sign up to get free protection for your applications and to get access to all the features.
@@ -35,7 +35,7 @@ module Parser
35
35
  def decorate(range, token, info)
36
36
  from, to = range.begin.column, range.end.column
37
37
 
38
- line = range.source_line
38
+ line = range.source_line + ' '
39
39
  line[from...to] = "\e[4m#{line[from...to]}\e[0m"
40
40
 
41
41
  tail_len = to - from - 1
@@ -1,3 +1,5 @@
1
+ # encoding: utf-8
2
+
1
3
  module Parser
2
4
 
3
5
  class Lexer::Literal
@@ -36,6 +38,11 @@ module Parser
36
38
  @lexer = lexer
37
39
  @nesting = 1
38
40
 
41
+ # DELIMITERS and TYPES are hashes with keys encoded in binary.
42
+ # Coerce incoming data to the same encoding.
43
+ str_type = coerce_encoding(str_type)
44
+ delimiter = coerce_encoding(delimiter)
45
+
39
46
  unless TYPES.include?(str_type)
40
47
  message = ERRORS[:unexpected_percent_str] % { :type => str_type }
41
48
  lexer.send(:diagnostic, :error, message, @lexer.send(:range, str_s, str_s + 2))
@@ -46,11 +53,6 @@ module Parser
46
53
  # Start of the string type specifier.
47
54
  @str_s = str_s
48
55
 
49
- # Data buffer.
50
- @buffer = ''
51
- # Start of the current chunk in data buffer.
52
- @buffer_s = nil
53
-
54
56
  @start_tok, @interpolate = TYPES[str_type]
55
57
  @start_delim = DELIMITERS.include?(delimiter) ? delimiter : nil
56
58
  @end_delim = DELIMITERS.fetch(delimiter, delimiter)
@@ -73,6 +75,8 @@ module Parser
73
75
  @str_type << delimiter
74
76
  end
75
77
 
78
+ clear_buffer
79
+
76
80
  emit_start_tok unless @monolithic
77
81
  end
78
82
 
@@ -98,6 +102,8 @@ module Parser
98
102
  end
99
103
 
100
104
  def munge_escape?(character)
105
+ character = coerce_encoding(character)
106
+
101
107
  if words? && character =~ /[ \t\v\r\f\n]/
102
108
  true
103
109
  else
@@ -105,15 +111,9 @@ module Parser
105
111
  end
106
112
  end
107
113
 
108
- def delimiter?(delimiter)
109
- if @indent
110
- @end_delim == delimiter.lstrip
111
- else
112
- @end_delim == delimiter
113
- end
114
- end
115
-
116
114
  def nest_and_try_closing(delimiter, ts, te)
115
+ delimiter = coerce_encoding(delimiter)
116
+
117
117
  if @start_delim && @start_delim == delimiter
118
118
  @nesting += 1
119
119
  elsif delimiter?(delimiter)
@@ -157,7 +157,11 @@ module Parser
157
157
 
158
158
  @buffer_e = te
159
159
 
160
- @buffer << string
160
+ if defined?(Encoding)
161
+ @buffer << string.encode(@lexer.encoding)
162
+ else
163
+ @buffer << string
164
+ end
161
165
  end
162
166
 
163
167
  def flush_string
@@ -169,9 +173,7 @@ module Parser
169
173
  unless @buffer.empty?
170
174
  emit(:tSTRING_CONTENT, @buffer, @buffer_s, @buffer_e)
171
175
 
172
- @buffer = ''
173
- @buffer_s = nil
174
- @buffer_e = nil
176
+ clear_buffer
175
177
  extend_content
176
178
  end
177
179
  end
@@ -192,6 +194,36 @@ module Parser
192
194
 
193
195
  protected
194
196
 
197
+ def delimiter?(delimiter)
198
+ if @indent
199
+ @end_delim == delimiter.lstrip
200
+ else
201
+ @end_delim == delimiter
202
+ end
203
+ end
204
+
205
+ def coerce_encoding(string)
206
+ if defined?(Encoding)
207
+ string.encode(Encoding::UTF_8,
208
+ :invalid => :replace, :undef => :replace)
209
+ else
210
+ string
211
+ end
212
+ end
213
+
214
+ def clear_buffer
215
+ @buffer = ''
216
+
217
+ # Prime the buffer with lexer encoding; otherwise,
218
+ # concatenation will produce varying results.
219
+ if defined?(Encoding)
220
+ @buffer.force_encoding(@lexer.encoding)
221
+ end
222
+
223
+ @buffer_s = nil
224
+ @buffer_e = nil
225
+ end
226
+
195
227
  def emit_start_tok
196
228
  str_e = @heredoc_e || @str_s + @str_type.length
197
229
  emit(@start_tok, @str_type, @str_s, str_e)
@@ -40,6 +40,8 @@ module Parser
40
40
  # Keep in mind that {Parser::Rewriter} does not take care of indentation when
41
41
  # inserting/replacing code so you'll have to do this yourself.
42
42
  #
43
+ # @api public
44
+ #
43
45
  class Rewriter < Parser::AST::Processor
44
46
  ##
45
47
  # Rewrites the AST/source buffer and returns a String containing the new
@@ -418,19 +418,11 @@ rule
418
418
  }
419
419
  | primary_value tCOLON2 tCONSTANT
420
420
  {
421
- if in_def?
422
- diagnostic(:error, :dynamic_const, val[2])
423
- end
424
-
425
421
  result = @builder.assignable(
426
422
  @builder.const_fetch(val[0], val[1], val[2]))
427
423
  }
428
424
  | tCOLON3 tCONSTANT
429
425
  {
430
- if in_def?
431
- diagnostic(:error, :dynamic_const, val[1])
432
- end
433
-
434
426
  result = @builder.assignable(
435
427
  @builder.const_global(val[0], val[1]))
436
428
  }
@@ -461,19 +453,11 @@ rule
461
453
  }
462
454
  | primary_value tCOLON2 tCONSTANT
463
455
  {
464
- if in_def?
465
- diagnostic(:error, :dynamic_const, val[2])
466
- end
467
-
468
456
  result = @builder.assignable(
469
457
  @builder.const_fetch(val[0], val[1], val[2]))
470
458
  }
471
459
  | tCOLON3 tCONSTANT
472
460
  {
473
- if in_def?
474
- diagnostic(:error, :dynamic_const, val[1])
475
- end
476
-
477
461
  result = @builder.assignable(
478
462
  @builder.const_global(val[0], val[1]))
479
463
  }
@@ -1340,7 +1324,7 @@ rule
1340
1324
  }
1341
1325
  | tPIPE block_var tPIPE
1342
1326
  {
1343
- result = @builder.args(val[0], val[1], val[2])
1327
+ result = @builder.args(val[0], val[1], val[2], false)
1344
1328
  }
1345
1329
 
1346
1330
  do_block: kDO_BLOCK
@@ -473,19 +473,11 @@ rule
473
473
  }
474
474
  | primary_value tCOLON2 tCONSTANT
475
475
  {
476
- if in_def?
477
- diagnostic(:error, :dynamic_const, val[2])
478
- end
479
-
480
476
  result = @builder.assignable(
481
477
  @builder.const_fetch(val[0], val[1], val[2]))
482
478
  }
483
479
  | tCOLON3 tCONSTANT
484
480
  {
485
- if in_def?
486
- diagnostic(:error, :dynamic_const, val[1])
487
- end
488
-
489
481
  result = @builder.assignable(
490
482
  @builder.const_global(val[0], val[1]))
491
483
  }
@@ -520,19 +512,11 @@ rule
520
512
  }
521
513
  | primary_value tCOLON2 tCONSTANT
522
514
  {
523
- if in_def?
524
- diagnostic(:error, :dynamic_const, val[2])
525
- end
526
-
527
515
  result = @builder.assignable(
528
516
  @builder.const_fetch(val[0], val[1], val[2]))
529
517
  }
530
518
  | tCOLON3 tCONSTANT
531
519
  {
532
- if in_def?
533
- diagnostic(:error, :dynamic_const, val[1])
534
- end
535
-
536
520
  result = @builder.assignable(
537
521
  @builder.const_global(val[0], val[1]))
538
522
  }
@@ -1425,9 +1409,14 @@ rule
1425
1409
  }
1426
1410
  | f_bad_arg
1427
1411
 
1428
- lambda: f_larglist lambda_body
1412
+ lambda: {
1413
+ @static_env.extend_dynamic
1414
+ }
1415
+ f_larglist lambda_body
1429
1416
  {
1430
- result = [ val[0], val[1] ]
1417
+ result = [ val[1], val[2] ]
1418
+
1419
+ @static_env.unextend
1431
1420
  }
1432
1421
 
1433
1422
  f_larglist: tLPAREN2 f_args opt_bv_decl rparen
@@ -481,19 +481,11 @@ rule
481
481
  }
482
482
  | primary_value tCOLON2 tCONSTANT
483
483
  {
484
- if in_def?
485
- diagnostic(:error, :dynamic_const, val[2])
486
- end
487
-
488
484
  result = @builder.assignable(
489
485
  @builder.const_fetch(val[0], val[1], val[2]))
490
486
  }
491
487
  | tCOLON3 tCONSTANT
492
488
  {
493
- if in_def?
494
- diagnostic(:error, :dynamic_const, val[1])
495
- end
496
-
497
489
  result = @builder.assignable(
498
490
  @builder.const_global(val[0], val[1]))
499
491
  }
@@ -528,19 +520,11 @@ rule
528
520
  }
529
521
  | primary_value tCOLON2 tCONSTANT
530
522
  {
531
- if in_def?
532
- diagnostic(:error, :dynamic_const, val[2])
533
- end
534
-
535
523
  result = @builder.assignable(
536
524
  @builder.const_fetch(val[0], val[1], val[2]))
537
525
  }
538
526
  | tCOLON3 tCONSTANT
539
527
  {
540
- if in_def?
541
- diagnostic(:error, :dynamic_const, val[1])
542
- end
543
-
544
528
  result = @builder.assignable(
545
529
  @builder.const_global(val[0], val[1]))
546
530
  }
@@ -668,21 +652,13 @@ rule
668
652
  }
669
653
  | primary_value tCOLON2 tCONSTANT tOP_ASGN arg
670
654
  {
671
- if in_def?
672
- diagnostic(:error, :dynamic_const, val[2], [ val[3] ])
673
- end
674
-
675
- const = @builder.assignable(
655
+ const = @builder.const_op_assignable(
676
656
  @builder.const_fetch(val[0], val[1], val[2]))
677
657
  result = @builder.op_assign(const, val[3], val[4])
678
658
  }
679
659
  | tCOLON3 tCONSTANT tOP_ASGN arg
680
660
  {
681
- if in_def?
682
- diagnostic(:error, :dynamic_const, val[1], [ val[2] ])
683
- end
684
-
685
- const = @builder.assignable(
661
+ const = @builder.const_op_assignable(
686
662
  @builder.const_global(val[0], val[1]))
687
663
  result = @builder.op_assign(const, val[2], val[3])
688
664
  }
@@ -1486,9 +1462,14 @@ opt_block_args_tail:
1486
1462
  }
1487
1463
  | f_bad_arg
1488
1464
 
1489
- lambda: f_larglist lambda_body
1465
+ lambda: {
1466
+ @static_env.extend_dynamic
1467
+ }
1468
+ f_larglist lambda_body
1490
1469
  {
1491
- result = [ val[0], val[1] ]
1470
+ result = [ val[1], val[2] ]
1471
+
1472
+ @static_env.unextend
1492
1473
  }
1493
1474
 
1494
1475
  f_larglist: tLPAREN2 f_args opt_bv_decl tRPAREN
@@ -116,11 +116,7 @@ rule
116
116
  stmt_or_begin: stmt
117
117
  | klBEGIN tLCURLY top_compstmt tRCURLY
118
118
  {
119
- if in_def?
120
- diagnostic(:error, :begin_in_method, val[0])
121
- end
122
-
123
- result = @builder.preexe(val[0], val[1], val[2], val[3])
119
+ diagnostic(:error, :begin_in_method, val[0])
124
120
  }
125
121
 
126
122
  stmt: kALIAS fitem
@@ -476,19 +472,11 @@ rule
476
472
  }
477
473
  | primary_value tCOLON2 tCONSTANT
478
474
  {
479
- if in_def?
480
- diagnostic(:error, :dynamic_const, val[2])
481
- end
482
-
483
475
  result = @builder.assignable(
484
476
  @builder.const_fetch(val[0], val[1], val[2]))
485
477
  }
486
478
  | tCOLON3 tCONSTANT
487
479
  {
488
- if in_def?
489
- diagnostic(:error, :dynamic_const, val[1])
490
- end
491
-
492
480
  result = @builder.assignable(
493
481
  @builder.const_global(val[0], val[1]))
494
482
  }
@@ -523,19 +511,11 @@ rule
523
511
  }
524
512
  | primary_value tCOLON2 tCONSTANT
525
513
  {
526
- if in_def?
527
- diagnostic(:error, :dynamic_const, val[2])
528
- end
529
-
530
514
  result = @builder.assignable(
531
515
  @builder.const_fetch(val[0], val[1], val[2]))
532
516
  }
533
517
  | tCOLON3 tCONSTANT
534
518
  {
535
- if in_def?
536
- diagnostic(:error, :dynamic_const, val[1])
537
- end
538
-
539
519
  result = @builder.assignable(
540
520
  @builder.const_global(val[0], val[1]))
541
521
  }
@@ -663,21 +643,13 @@ rule
663
643
  }
664
644
  | primary_value tCOLON2 tCONSTANT tOP_ASGN arg
665
645
  {
666
- if in_def?
667
- diagnostic(:error, :dynamic_const, val[2], [ val[3] ])
668
- end
669
-
670
- const = @builder.assignable(
646
+ const = @builder.const_op_assignable(
671
647
  @builder.const_fetch(val[0], val[1], val[2]))
672
648
  result = @builder.op_assign(const, val[3], val[4])
673
649
  }
674
650
  | tCOLON3 tCONSTANT tOP_ASGN arg
675
651
  {
676
- if in_def?
677
- diagnostic(:error, :dynamic_const, val[1], [ val[2] ])
678
- end
679
-
680
- const = @builder.assignable(
652
+ const = @builder.const_op_assignable(
681
653
  @builder.const_global(val[0], val[1]))
682
654
  result = @builder.op_assign(const, val[2], val[3])
683
655
  }
@@ -1487,9 +1459,14 @@ opt_block_args_tail:
1487
1459
  }
1488
1460
  | f_bad_arg
1489
1461
 
1490
- lambda: f_larglist lambda_body
1462
+ lambda: {
1463
+ @static_env.extend_dynamic
1464
+ }
1465
+ f_larglist lambda_body
1491
1466
  {
1492
- result = [ val[0], val[1] ]
1467
+ result = [ val[1], val[2] ]
1468
+
1469
+ @static_env.unextend
1493
1470
  }
1494
1471
 
1495
1472
  f_larglist: tLPAREN2 f_args opt_bv_decl tRPAREN
@@ -2336,5 +2313,5 @@ warn "warning: Ruby 2.1 is not released yet and parser support may be incomplete
2336
2313
  end
2337
2314
 
2338
2315
  def default_encoding
2339
- Encoding::BINARY
2316
+ Encoding::UTF_8
2340
2317
  end
@@ -155,7 +155,12 @@ module Parser
155
155
  source.force_encoding(@parser.default_encoding)
156
156
 
157
157
  buffer = Parser::Source::Buffer.new(filename)
158
- buffer.source = source
158
+
159
+ if @parser.class.name == 'Parser::Ruby18'
160
+ buffer.raw_source = source
161
+ else
162
+ buffer.source = source
163
+ end
159
164
 
160
165
  process_buffer(buffer)
161
166
  end
@@ -3,20 +3,32 @@
3
3
  module Parser
4
4
  module Source
5
5
 
6
+ ##
7
+ # @api public
8
+ #
6
9
  class Buffer
7
10
  attr_reader :name, :first_line
8
11
 
12
+ ENCODING_RE =
13
+ /\#.*coding\s*[:=]\s*
14
+ (
15
+ # Special-case: there's a UTF8-MAC encoding.
16
+ (utf8-mac)
17
+ |
18
+ # Chew the suffix; it's there for emacs compat.
19
+ ([A-Za-z0-9_-]+?)(-unix|-dos|-mac)
20
+ |
21
+ ([A-Za-z0-9_-]+)
22
+ )
23
+ /x
24
+
9
25
  def self.recognize_encoding(string)
10
26
  return if string.empty?
11
27
 
12
28
  # extract the first two lines in an efficient way
13
- string =~ /(.*)\n?(.*\n)?/
29
+ string =~ /\A(.*)\n?(.*\n)?/
14
30
  first_line, second_line = $1, $2
15
31
 
16
- [first_line, second_line].each do |line|
17
- line.force_encoding(Encoding::ASCII_8BIT) if line
18
- end
19
-
20
32
  if first_line =~ /\A\xef\xbb\xbf/ # BOM
21
33
  return Encoding::UTF_8
22
34
  elsif first_line[0, 2] == '#!'
@@ -25,8 +37,8 @@ module Parser
25
37
  encoding_line = first_line
26
38
  end
27
39
 
28
- if encoding_line =~ /^#.*coding\s*[:=]\s*([A-Za-z0-9_-]+)/
29
- Encoding.find($1)
40
+ if (result = ENCODING_RE.match(encoding_line))
41
+ Encoding.find(result[2] || result[3] || result[5])
30
42
  else
31
43
  nil
32
44
  end
@@ -37,15 +49,16 @@ module Parser
37
49
  # string.
38
50
  #
39
51
  def self.reencode_string(string)
40
- encoding = recognize_encoding(string)
52
+ original_encoding = string.encoding
53
+ detected_encoding = recognize_encoding(string.force_encoding(Encoding::BINARY))
41
54
 
42
- if encoding.nil?
55
+ if detected_encoding.nil?
56
+ string.force_encoding(original_encoding)
57
+ elsif detected_encoding == Encoding::BINARY
43
58
  string
44
- elsif encoding == Encoding::BINARY
45
- string.force_encoding(Encoding::BINARY)
46
59
  else
47
60
  string.
48
- force_encoding(encoding).
61
+ force_encoding(detected_encoding).
49
62
  encode(Encoding::UTF_8)
50
63
  end
51
64
  end
@@ -76,16 +89,20 @@ module Parser
76
89
  end
77
90
 
78
91
  def source=(source)
79
- if @source
80
- raise ArgumentError, 'Source::Buffer is immutable'
81
- end
82
-
83
- if source.respond_to? :encoding
92
+ if defined?(Encoding)
84
93
  source = source.dup if source.frozen?
85
94
  source = self.class.reencode_string(source)
86
95
  end
87
96
 
88
- @source = source.freeze
97
+ self.raw_source = source
98
+ end
99
+
100
+ def raw_source=(source)
101
+ if @source
102
+ raise ArgumentError, 'Source::Buffer is immutable'
103
+ end
104
+
105
+ @source = source.gsub(/\r\n/, "\n").freeze
89
106
  end
90
107
 
91
108
  def decompose_position(position)
@@ -94,12 +111,18 @@ module Parser
94
111
  [ @first_line + line_no, position - line_begin ]
95
112
  end
96
113
 
97
- def source_line(line)
114
+ def source_line(lineno)
98
115
  unless @lines
99
- @lines = @source.lines.map(&:chomp)
116
+ @lines = @source.lines.to_a
117
+ @lines.each { |line| line.gsub!(/\n$/, '') }
118
+
119
+ # Lexer has an "infinite stream of EOF symbols" after the
120
+ # actual EOF, so in some cases (e.g. EOF token of ruby-parse -E)
121
+ # tokens will refer to one line past EOF.
122
+ @lines << ""
100
123
  end
101
124
 
102
- @lines[line - @first_line].dup
125
+ @lines[lineno - @first_line].dup
103
126
  end
104
127
 
105
128
  private