parser 2.3.0.pre.6 → 2.3.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 76b617323cf8143b8935312b8ce39e4c2d847b63
4
- data.tar.gz: 13f93cde47689ad2352602d2aac0555b674f860d
3
+ metadata.gz: 8fcc0b8421a588c61ea9d3969ec4f0e1f13c7d8e
4
+ data.tar.gz: 241d5a150edb29c9f95c7ecd4361d6e2fd4bc4d3
5
5
  SHA512:
6
- metadata.gz: ddac7c4fdf0503ff15acbb45eedb8bdd9713ed38ceefd5f306044b68e8ce6c016919a689e543ad7b9648c0dcfee43870164194ed8f02fcfd90b9e383219ad9bf
7
- data.tar.gz: 7831e67d2cb5ad9e65e1230a86c5117804e17e088e78f01c28632917c1cf0b3cce3abfb46ab889007acf7232f315989467d756a3a89d4b335d71022b53899c2a
6
+ metadata.gz: a4f18eb04354a0230053a4b11db78aef983aebcad4a5edd65688a55b42e3410e9ff73d7f2a1ccb95b249c4e9ed210fdd194168aaa2bb9977a3b4d13eb0521ad2
7
+ data.tar.gz: 3125b274c9f3040d30494112d95c3788800be632508068421b583d22e04851e45b8a886cdc73bc4359648494847d3e76920682fde0d868f26cc7d660335ca840
data/.travis.yml CHANGED
@@ -4,8 +4,9 @@ rvm:
4
4
  - 1.9.2
5
5
  - 1.9.3
6
6
  - 2.0.0
7
- - 2.1
8
- - 2.2
7
+ - 2.1.8
8
+ - 2.2.4
9
+ - 2.3.0
9
10
  - ruby-head
10
11
  - jruby-18mode
11
12
  - jruby-19mode
data/LICENSE.txt CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2013 Peter Zotov <whitequark@whitequark.org>
1
+ Copyright (c) 2013-2016 whitequark <whitequark@whitequark.org>
2
2
 
3
3
  Parts of the source are derived from ruby_parser:
4
4
  Copyright (c) Ryan Davis, seattle.rb
data/README.md CHANGED
@@ -2,7 +2,6 @@
2
2
 
3
3
  [![Gem Version](https://badge.fury.io/rb/parser.png)](https://badge.fury.io/rb/parser)
4
4
  [![Build Status](https://travis-ci.org/whitequark/parser.png?branch=master)](https://travis-ci.org/whitequark/parser)
5
- [![Code Climate](https://codeclimate.com/github/whitequark/parser.png)](https://codeclimate.com/github/whitequark/parser)
6
5
  [![Coverage Status](https://coveralls.io/repos/whitequark/parser/badge.png?branch=master)](https://coveralls.io/r/whitequark/parser)
7
6
 
8
7
  _Parser_ is a production-ready Ruby parser written in pure Ruby. It recognizes as
data/lib/parser.rb CHANGED
@@ -60,6 +60,7 @@ module Parser
60
60
  require 'parser/lexer'
61
61
  require 'parser/lexer/literal'
62
62
  require 'parser/lexer/stack_state'
63
+ require 'parser/lexer/dedenter'
63
64
 
64
65
  module Builders
65
66
  require 'parser/builders/default'
@@ -186,6 +186,30 @@ module Parser
186
186
  string_map(begin_t, parts, end_t))
187
187
  end
188
188
 
189
+ # Indented (interpolated, noninterpolated, executable) strings
190
+
191
+ def dedent_string(node, dedent_level)
192
+ if !dedent_level.nil?
193
+ dedenter = Lexer::Dedenter.new(dedent_level)
194
+
195
+ if node.type == :str
196
+ str = node.children.first
197
+ dedenter.dedent(str)
198
+ elsif node.type == :dstr || node.type == :xstr
199
+ node.children.each do |str_node|
200
+ if str_node.type == :str
201
+ str = str_node.children.first
202
+ dedenter.dedent(str)
203
+ else
204
+ dedenter.interrupt
205
+ end
206
+ end
207
+ end
208
+ end
209
+
210
+ node
211
+ end
212
+
189
213
  # Regular expressions
190
214
 
191
215
  def regexp_options(regopt_t)
@@ -10,48 +10,63 @@ module Parser
10
10
 
11
11
  case RUBY_VERSION
12
12
  when /^1\.8\./
13
- if RUBY_VERSION != '1.8.7'
14
- warn_syntax_deviation 'parser/ruby18', '1.8.7'
13
+ current_version = '1.8.7'
14
+ if RUBY_VERSION != current_version
15
+ warn_syntax_deviation 'parser/ruby18', current_version
15
16
  end
16
17
 
17
18
  require 'parser/ruby18'
18
19
  CurrentRuby = Ruby18
19
20
 
20
21
  when /^1\.9\./
21
- if RUBY_VERSION != '1.9.3'
22
- warn_syntax_deviation 'parser/ruby19', '1.9.3'
22
+ current_version = '1.9.3'
23
+ if RUBY_VERSION != current_version
24
+ warn_syntax_deviation 'parser/ruby19', current_version
23
25
  end
24
26
 
25
27
  require 'parser/ruby19'
26
28
  CurrentRuby = Ruby19
27
29
 
28
30
  when /^2\.0\./
29
- if RUBY_VERSION != '2.0.0'
30
- warn_syntax_deviation 'parser/ruby20', '2.0.0'
31
+ current_version = '2.0.0'
32
+ if RUBY_VERSION != current_version
33
+ warn_syntax_deviation 'parser/ruby20', current_version
31
34
  end
32
35
 
33
36
  require 'parser/ruby20'
34
37
  CurrentRuby = Ruby20
35
38
 
36
39
  when /^2\.1\./
37
- if RUBY_VERSION != '2.1.8'
38
- warn_syntax_deviation 'parser/ruby21', '2.1.8'
40
+ current_version = '2.1.8'
41
+ if RUBY_VERSION != current_version
42
+ warn_syntax_deviation 'parser/ruby21', current_version
39
43
  end
40
44
 
41
45
  require 'parser/ruby21'
42
46
  CurrentRuby = Ruby21
43
47
 
44
48
  when /^2\.2\./
45
- if RUBY_VERSION != '2.2.4'
46
- warn_syntax_deviation 'parser/ruby22', '2.2.4'
49
+ current_version = '2.2.4'
50
+ if RUBY_VERSION != current_version
51
+ warn_syntax_deviation 'parser/ruby22', current_version
47
52
  end
48
53
 
49
54
  require 'parser/ruby22'
50
55
  CurrentRuby = Ruby22
51
56
 
52
57
  when /^2\.3\./
53
- if RUBY_VERSION != '2.3.0'
54
- warn_syntax_deviation 'parser/ruby23', '2.3.0'
58
+ current_version = '2.3.0'
59
+ if RUBY_VERSION != current_version
60
+ warn_syntax_deviation 'parser/ruby23', current_version
61
+ end
62
+
63
+ require 'parser/ruby23'
64
+ CurrentRuby = Ruby23
65
+
66
+ when /^2\.4\./
67
+ current_version = 'HEAD'
68
+ if RUBY_VERSION != current_version
69
+ warn_syntax_deviation 'parser/ruby23', current_version
55
70
  end
56
71
 
57
72
  require 'parser/ruby23'
@@ -59,7 +74,7 @@ module Parser
59
74
 
60
75
  else # :nocov:
61
76
  # Keep this in sync with released Ruby.
62
- warn_syntax_deviation 'parser/ruby22', '2.2.x'
77
+ warn_syntax_deviation 'parser/ruby23', '2.3.x'
63
78
  require 'parser/ruby22'
64
79
  CurrentRuby = Ruby22
65
80
  end
data/lib/parser/lexer.rl CHANGED
@@ -134,7 +134,7 @@ class Parser::Lexer
134
134
  @source = nil # source string
135
135
  @source_pts = nil # @source as a codepoint array
136
136
  @encoding = nil # target encoding for output strings
137
- @need_encode = nil
137
+ @need_encode = nil
138
138
 
139
139
  @p = 0 # stream position (saved manually in #advance)
140
140
  @ts = nil # token start
@@ -161,15 +161,21 @@ class Parser::Lexer
161
161
  @escape_s = nil # starting position of current sequence
162
162
  @escape = nil # last escaped sequence, as string
163
163
 
164
- # See below the section on parsing heredocs.
165
- @heredoc_e = nil
166
- @herebody_s = nil
164
+ @herebody_s = nil # starting position of current heredoc line
167
165
 
168
166
  # Ruby 1.9 ->() lambdas emit a distinct token if do/{ is
169
167
  # encountered after a matching closing parenthesis.
170
168
  @paren_nest = 0
171
169
  @lambda_stack = []
172
170
 
171
+ # After encountering the closing line of <<~SQUIGGLY_HEREDOC,
172
+ # we store the indentation level and give it out to the parser
173
+ # on request. It is not possible to infer indentation level just
174
+ # from the AST because escape sequences such as `\ ` or `\t` are
175
+ # expanded inside the lexer, but count as non-whitespace for
176
+ # indentation purposes.
177
+ @dedent_level = nil
178
+
173
179
  # If the lexer is in `command state' (aka expr_value)
174
180
  # at the entry to #advance, it will transition to expr_cmdarg
175
181
  # instead of expr_arg at certain points.
@@ -184,26 +190,21 @@ class Parser::Lexer
184
190
 
185
191
  if @source_buffer
186
192
  @source = @source_buffer.source
193
+ @need_encode = false
187
194
 
188
- if defined?(Encoding)
195
+ if @has_encode
189
196
  @encoding = @source.encoding
190
-
191
- # This is a workaround for 1.9.2, which (without force_encoding)
192
- # would convert the result to UTF-8 (source encoding of lexer.rl).
193
- @source += "\0".dup.force_encoding(@encoding)
194
- else
195
- @source += "\0"
196
197
  end
197
198
 
198
- if defined?(Encoding) && @source.encoding == Encoding::UTF_8
199
+ if @has_encode && @source.encoding == Encoding::UTF_8
199
200
  @source_pts = @source.unpack('U*')
200
- @need_encode = @has_encode && @encoding != Encoding::UTF_8
201
201
  else
202
202
  @source_pts = @source.unpack('C*')
203
203
  end
204
204
 
205
- if (@source_pts.size > 1_000_000 && @has_encode) ||
206
- @force_utf32
205
+ if @has_encode &&
206
+ (@source_pts.size > 1_000_000 || @force_utf32) &&
207
+ @encoding != Encoding::UTF_32LE
207
208
  # A heuristic: if the buffer is larger than 1M, then
208
209
  # store it in UTF-32 and convert the tokens as they're
209
210
  # going out. If it's smaller, the conversion overhead
@@ -216,7 +217,7 @@ class Parser::Lexer
216
217
  #
217
218
  # Patches accepted.
218
219
  @source = @source.encode(Encoding::UTF_32LE)
219
- @need_encode = @has_encode && @encoding != Encoding::UTF_32LE
220
+ @need_encode = true
220
221
  end
221
222
 
222
223
  if @source_pts[0] == 0xfeff
@@ -275,6 +276,13 @@ class Parser::Lexer
275
276
  @cond = @cond_stack.pop
276
277
  end
277
278
 
279
+ def dedent_level
280
+ # We erase @dedent_level as a precaution to avoid accidentally
281
+ # using a stale value.
282
+ dedent_level, @dedent_level = @dedent_level, nil
283
+ dedent_level
284
+ end
285
+
278
286
  # Return next token: [type, value].
279
287
  def advance
280
288
  if @token_queue.any?
@@ -293,7 +301,7 @@ class Parser::Lexer
293
301
  _lex_from_state_actions = klass.send :_lex_from_state_actions
294
302
  _lex_eof_trans = klass.send :_lex_eof_trans
295
303
 
296
- pe = @source.length + 1
304
+ pe = @source_pts.size + 2
297
305
  p, eof = @p, pe
298
306
 
299
307
  @command_state = (@cs == klass.lex_en_expr_value ||
@@ -309,7 +317,7 @@ class Parser::Lexer
309
317
  elsif @cs == klass.lex_error
310
318
  [ false, [ '$error', range(p - 1, p) ] ]
311
319
  else
312
- eof = @source.length
320
+ eof = @source_pts.size + 1
313
321
  [ false, [ '$eof', range(eof, eof) ] ]
314
322
  end
315
323
  end
@@ -434,6 +442,8 @@ class Parser::Lexer
434
442
  def pop_literal
435
443
  old_literal = @literal_stack.pop
436
444
 
445
+ @dedent_level = old_literal.dedent_level
446
+
437
447
  if old_literal.type == :tREGEXP_BEG
438
448
  # Fetch modifiers.
439
449
  self.class.lex_en_regexp_modifiers
@@ -739,10 +749,10 @@ class Parser::Lexer
739
749
 
740
750
  # %q[\u123] %q[\u{12]
741
751
  | 'u' ( c_any{0,4} -
742
- xdigit{4} - # \u1234 is valid
743
- ( '{' xdigit{1,3} # \u{1 \u{12 \u{123 are valid
744
- | '{' xdigit [ \t}] any # \u{1. \u{1} are valid
745
- | '{' xdigit{2} [ \t}] # \u{12. \u{12} are valid
752
+ xdigit{4} - # \u1234 is valid
753
+ ( '{' xdigit{1,3} # \u{1 \u{12 \u{123 are valid
754
+ | '{' xdigit [ \t}] any? # \u{1. \u{1} are valid
755
+ | '{' xdigit{2} [ \t}] # \u{12. \u{12} are valid
746
756
  )
747
757
  )
748
758
  % {
@@ -818,10 +828,10 @@ class Parser::Lexer
818
828
  # the result is: " i am a heredoc\n"
819
829
  #
820
830
  # To parse them, lexer refers to two kinds (remember, nested heredocs)
821
- # of positions in the input stream, namely @heredoc_e
831
+ # of positions in the input stream, namely heredoc_e
822
832
  # (HEREDOC declaration End) and @herebody_s (HEREdoc BODY line Start).
823
833
  #
824
- # @heredoc_e is simply contained inside the corresponding Literal, and
834
+ # heredoc_e is simply contained inside the corresponding Literal, and
825
835
  # when the heredoc is closed, the lexing is restarted from that position.
826
836
  #
827
837
  # @herebody_s is quite more complex. First, @herebody_s changes after each
@@ -844,7 +854,7 @@ class Parser::Lexer
844
854
  };
845
855
 
846
856
  action extend_string {
847
- string = @source[@ts...@te]
857
+ string = tok
848
858
  string = string.encode(@encoding) if @need_encode
849
859
 
850
860
  # tLABEL_END is only possible in non-cond context on >= 2.2
@@ -950,6 +960,9 @@ class Parser::Lexer
950
960
  p = current_literal.heredoc_e - 1
951
961
  fnext *pop_literal; fbreak;
952
962
  else
963
+ # Calculate indentation level for <<~HEREDOCs.
964
+ current_literal.infer_indent_level(line)
965
+
953
966
  # Ditto.
954
967
  @herebody_s = @te
955
968
  end
@@ -1288,7 +1301,7 @@ class Parser::Lexer
1288
1301
  #
1289
1302
  expr_fname := |*
1290
1303
  keyword
1291
- => { emit(KEYWORDS_BEGIN[tok]);
1304
+ => { emit_table(KEYWORDS_BEGIN);
1292
1305
  fnext expr_endfn; fbreak; };
1293
1306
 
1294
1307
  constant
@@ -1642,21 +1655,21 @@ class Parser::Lexer
1642
1655
  # /=/ (disambiguation with /=)
1643
1656
  '/' c_any
1644
1657
  => {
1645
- type = delimiter = tok[0].chr
1658
+ type = delimiter = @source[@ts].chr
1646
1659
  fhold; fgoto *push_literal(type, delimiter, @ts);
1647
1660
  };
1648
1661
 
1649
1662
  # %<string>
1650
1663
  '%' ( any - [A-Za-z] )
1651
1664
  => {
1652
- type, delimiter = tok[0].chr, tok[-1].chr
1665
+ type, delimiter = @source[@ts].chr, tok[-1].chr
1653
1666
  fgoto *push_literal(type, delimiter, @ts);
1654
1667
  };
1655
1668
 
1656
1669
  # %w(we are the people)
1657
1670
  '%' [A-Za-z]+ c_any
1658
1671
  => {
1659
- type, delimiter = tok[0..-2], tok[-1].chr
1672
+ type, delimiter = tok[0..-2], @source[@te - 1].chr
1660
1673
  fgoto *push_literal(type, delimiter, @ts);
1661
1674
  };
1662
1675
 
@@ -1666,27 +1679,36 @@ class Parser::Lexer
1666
1679
  };
1667
1680
 
1668
1681
  # Heredoc start.
1669
- # <<EOF | <<-END | <<"FOOBAR" | <<-`SMTH`
1670
- '<<' '-'?
1682
+ # <<END | <<'END' | <<"END" | <<`END` |
1683
+ # <<-END | <<-'END' | <<-"END" | <<-`END` |
1684
+ # <<~END | <<~'END' | <<~"END" | <<~`END`
1685
+ '<<' [~\-]?
1671
1686
  ( '"' ( c_line - '"' )* '"'
1672
1687
  | "'" ( c_line - "'" )* "'"
1673
1688
  | "`" ( c_line - "`" )* "`"
1674
- | bareword ) % { @heredoc_e = p }
1689
+ | bareword ) % { heredoc_e = p }
1675
1690
  c_line* c_nl % { new_herebody_s = p }
1676
1691
  => {
1677
- tok(@ts, @heredoc_e) =~ /^<<(-?)(["'`]?)(.*)\2$/
1692
+ tok(@ts, heredoc_e) =~ /^<<(-?)(~?)(["'`]?)(.*)\3$/
1678
1693
 
1679
- indent = !$1.empty?
1680
- type = '<<' + ($2.empty? ? '"' : $2)
1681
- delimiter = $3
1694
+ indent = !$1.empty? || !$2.empty?
1695
+ dedent_body = !$2.empty?
1696
+ type = '<<' + ($3.empty? ? '"' : $3)
1697
+ delimiter = $4
1682
1698
 
1683
- fnext *push_literal(type, delimiter, @ts, @heredoc_e, indent);
1699
+ if dedent_body && version?(18, 19, 20, 21, 22)
1700
+ emit(:tLSHFT, '<<', @ts, @ts + 2)
1701
+ p = @ts + 1
1702
+ fnext expr_beg; fbreak;
1703
+ else
1704
+ fnext *push_literal(type, delimiter, @ts, heredoc_e, indent, dedent_body);
1684
1705
 
1685
- if @herebody_s.nil?
1686
- @herebody_s = new_herebody_s
1687
- end
1706
+ if @herebody_s.nil?
1707
+ @herebody_s = new_herebody_s
1708
+ end
1688
1709
 
1689
- p = @herebody_s - 1
1710
+ p = @herebody_s - 1
1711
+ end
1690
1712
  };
1691
1713
 
1692
1714
  #
@@ -1696,7 +1718,7 @@ class Parser::Lexer
1696
1718
  # :"bar", :'baz'
1697
1719
  ':' ['"] # '
1698
1720
  => {
1699
- type, delimiter = tok, tok[-1].chr
1721
+ type, delimiter = tok, @source[@te - 1].chr
1700
1722
  fgoto *push_literal(type, delimiter, @ts);
1701
1723
  };
1702
1724
 
@@ -1718,7 +1740,9 @@ class Parser::Lexer
1718
1740
  # AMBIGUOUS TERNARY OPERATOR
1719
1741
  #
1720
1742
 
1721
- '?' ( e_bs escape
1743
+ # Character constant, like ?a, ?\n, ?\u1000, and so on
1744
+ # Don't accept \u escape with multiple codepoints, like \u{1 2 3}
1745
+ '?' ( e_bs ( escape - ( '\u{' (xdigit+ [ \t]+)+ xdigit+ '}' ))
1722
1746
  | (c_any - c_space_nl - e_bs) % { @escape = nil }
1723
1747
  )
1724
1748
  => {
@@ -1739,7 +1763,7 @@ class Parser::Lexer
1739
1763
  '?' c_space_nl
1740
1764
  => {
1741
1765
  escape = { " " => '\s', "\r" => '\r', "\n" => '\n', "\t" => '\t',
1742
- "\v" => '\v', "\f" => '\f' }[tok[1]]
1766
+ "\v" => '\v', "\f" => '\f' }[@source[@ts + 1]]
1743
1767
  diagnostic :warning, :invalid_escape_use, { :escape => escape }, range
1744
1768
 
1745
1769
  p = @ts - 1
@@ -1769,16 +1793,19 @@ class Parser::Lexer
1769
1793
  @lambda_stack.pop
1770
1794
  emit(:tLAMBEG)
1771
1795
  else
1772
- emit_table(PUNCTUATION_BEGIN)
1796
+ emit(:tLBRACE)
1773
1797
  end
1774
1798
  fbreak;
1775
1799
  };
1776
1800
 
1777
1801
  # a([1, 2])
1778
- e_lbrack |
1802
+ e_lbrack
1803
+ => { emit(:tLBRACK)
1804
+ fbreak; };
1805
+
1779
1806
  # a()
1780
1807
  e_lparen
1781
- => { emit_table(PUNCTUATION_BEGIN)
1808
+ => { emit(:tLPAREN)
1782
1809
  fbreak; };
1783
1810
 
1784
1811
  # a(+b)
@@ -1789,7 +1816,7 @@ class Parser::Lexer
1789
1816
  # rescue Exception => e: Block rescue.
1790
1817
  # Special because it should transition to expr_mid.
1791
1818
  'rescue' %{ tm = p } '=>'?
1792
- => { emit_table(KEYWORDS_BEGIN, @ts, tm)
1819
+ => { emit(:kRESCUE, tok(@ts, tm), @ts, tm)
1793
1820
  p = tm - 1
1794
1821
  fnext expr_mid; fbreak; };
1795
1822
 
@@ -1809,7 +1836,7 @@ class Parser::Lexer
1809
1836
  if version?(18)
1810
1837
  ident = tok(@ts, @te - 2)
1811
1838
 
1812
- emit((tok[0] =~ /[A-Z]/) ? :tCONSTANT : :tIDENTIFIER,
1839
+ emit((@source[@ts] =~ /[A-Z]/) ? :tCONSTANT : :tIDENTIFIER,
1813
1840
  ident, @ts, @te - 2)
1814
1841
  fhold; # continue as a symbol
1815
1842
 
@@ -1898,7 +1925,7 @@ class Parser::Lexer
1898
1925
  # "bar", 'baz'
1899
1926
  ['"] # '
1900
1927
  => {
1901
- fgoto *push_literal(tok, tok, @ts, nil, false, false);
1928
+ fgoto *push_literal(tok, tok, @ts);
1902
1929
  };
1903
1930
 
1904
1931
  w_space_comment;
@@ -1919,7 +1946,7 @@ class Parser::Lexer
1919
1946
 
1920
1947
  '->'
1921
1948
  => {
1922
- emit_table(PUNCTUATION, @ts, @ts + 2)
1949
+ emit(:tLAMBDA, tok(@ts, @ts + 2), @ts, @ts + 2)
1923
1950
 
1924
1951
  @lambda_stack.push @paren_nest
1925
1952
  fnext expr_endfn; fbreak;
@@ -1937,7 +1964,7 @@ class Parser::Lexer
1937
1964
  end
1938
1965
  else
1939
1966
  if tok == '{'
1940
- emit_table(PUNCTUATION)
1967
+ emit(:tLCURLY)
1941
1968
  else # 'do'
1942
1969
  emit_do
1943
1970
  end
@@ -1989,13 +2016,11 @@ class Parser::Lexer
1989
2016
  if version?(18)
1990
2017
  emit(:tIDENTIFIER)
1991
2018
 
1992
- if !@static_env.nil? && @static_env.declared?(tok)
1993
- fnext expr_end;
1994
- else
2019
+ unless !@static_env.nil? && @static_env.declared?(tok)
1995
2020
  fnext *arg_or_cmdarg;
1996
2021
  end
1997
2022
  else
1998
- emit_table(KEYWORDS)
2023
+ emit(:k__ENCODING__)
1999
2024
  end
2000
2025
  fbreak;
2001
2026
  };
@@ -2093,8 +2118,8 @@ class Parser::Lexer
2093
2118
  # `echo foo`, "bar", 'baz'
2094
2119
  '`' | ['"] # '
2095
2120
  => {
2096
- type, delimiter = tok, tok[-1].chr
2097
- fgoto *push_literal(type, delimiter, @ts, nil, false, true);
2121
+ type, delimiter = tok, @source[@te - 1].chr
2122
+ fgoto *push_literal(type, delimiter, @ts, nil, false, false, true);
2098
2123
  };
2099
2124
 
2100
2125
  #
@@ -2166,11 +2191,11 @@ class Parser::Lexer
2166
2191
  fnext expr_beg; fbreak; };
2167
2192
 
2168
2193
  '?'
2169
- => { emit_table(PUNCTUATION)
2194
+ => { emit(:tEH)
2170
2195
  fnext expr_value; fbreak; };
2171
2196
 
2172
2197
  e_lbrack
2173
- => { emit_table(PUNCTUATION)
2198
+ => { emit(:tLBRACK2)
2174
2199
  fnext expr_beg; fbreak; };
2175
2200
 
2176
2201
  punctuation_end
@@ -2187,7 +2212,7 @@ class Parser::Lexer
2187
2212
  => { fgoto leading_dot; };
2188
2213
 
2189
2214
  ';'
2190
- => { emit_table(PUNCTUATION)
2215
+ => { emit(:tSEMI)
2191
2216
  fnext expr_value; fbreak; };
2192
2217
 
2193
2218
  '\\' c_line {
@@ -0,0 +1,48 @@
1
+ module Parser
2
+
3
+ class Lexer::Dedenter
4
+ def initialize(dedent_level)
5
+ @dedent_level = dedent_level
6
+ @at_line_begin = true
7
+ @indent_level = 0
8
+ end
9
+
10
+ def dedent(string)
11
+ space_begin = space_end = offset = 0
12
+ string.chars.each_with_index do |char, index|
13
+ if @at_line_begin
14
+ if char == ?\n || @indent_level >= @dedent_level
15
+ string.slice!(space_begin...space_end)
16
+ offset += space_end - space_begin - 1
17
+ @at_line_begin = false
18
+ redo if char == ?\n
19
+ end
20
+
21
+ case char
22
+ when ?\s
23
+ @indent_level += 1
24
+ space_end += 1
25
+ when ?\t
26
+ @indent_level += 8 - @indent_level % 8
27
+ space_end += 1
28
+ end
29
+ elsif char == ?\n
30
+ @at_line_begin = true
31
+ @indent_level = 0
32
+ space_begin = space_end = index - offset + 1
33
+ end
34
+ end
35
+
36
+ if @at_line_begin
37
+ string.slice!(space_begin..space_end)
38
+ end
39
+
40
+ nil
41
+ end
42
+
43
+ def interrupt
44
+ @at_line_begin = false
45
+ end
46
+ end
47
+
48
+ end
@@ -34,11 +34,11 @@ module Parser
34
34
  '<<`' => [ :tXSTRING_BEG, true ],
35
35
  }
36
36
 
37
- attr_reader :heredoc_e, :str_s
37
+ attr_reader :heredoc_e, :str_s, :dedent_level
38
38
  attr_accessor :saved_herebody_s
39
39
 
40
40
  def initialize(lexer, str_type, delimiter, str_s, heredoc_e = nil,
41
- indent = false, label_allowed = false)
41
+ indent = false, dedent_body = false, label_allowed = false)
42
42
  @lexer = lexer
43
43
  @nesting = 1
44
44
 
@@ -65,6 +65,9 @@ module Parser
65
65
  @indent = indent
66
66
  @label_allowed = label_allowed
67
67
 
68
+ @dedent_body = dedent_body
69
+ @dedent_level = nil
70
+
68
71
  @interp_braces = 0
69
72
 
70
73
  @space_emitted = true
@@ -150,6 +153,25 @@ module Parser
150
153
  end
151
154
  end
152
155
 
156
+ def infer_indent_level(line)
157
+ return if !@dedent_body
158
+
159
+ indent_level = 0
160
+ line.each_char do |char|
161
+ case char
162
+ when ?\s
163
+ indent_level += 1
164
+ when ?\t
165
+ indent_level += (8 - indent_level % 8)
166
+ else
167
+ if @dedent_level.nil? || @dedent_level > indent_level
168
+ @dedent_level = indent_level
169
+ end
170
+ break
171
+ end
172
+ end
173
+ end
174
+
153
175
  def start_interp_brace
154
176
  @interp_braces += 1
155
177
  end
data/lib/parser/meta.rb CHANGED
@@ -6,7 +6,7 @@ module Parser
6
6
  # will be able to produce every possible node.
7
7
  NODE_TYPES =
8
8
  %w(
9
- true false nil int float str dstr str
9
+ true false nil int float str dstr
10
10
  sym dsym xstr regopt regexp array splat
11
11
  array pair kwsplat hash irange erange self
12
12
  lvar ivar cvar gvar const defined? lvasgn
data/lib/parser/ruby23.y CHANGED
@@ -1676,11 +1676,13 @@ opt_block_args_tail:
1676
1676
 
1677
1677
  string1: tSTRING_BEG string_contents tSTRING_END
1678
1678
  {
1679
- result = @builder.string_compose(val[0], val[1], val[2])
1679
+ string = @builder.string_compose(val[0], val[1], val[2])
1680
+ result = @builder.dedent_string(string, @lexer.dedent_level)
1680
1681
  }
1681
1682
  | tSTRING
1682
1683
  {
1683
- result = @builder.string(val[0])
1684
+ string = @builder.string(val[0])
1685
+ result = @builder.dedent_string(string, @lexer.dedent_level)
1684
1686
  }
1685
1687
  | tCHARACTER
1686
1688
  {
@@ -1689,7 +1691,8 @@ opt_block_args_tail:
1689
1691
 
1690
1692
  xstring: tXSTRING_BEG xstring_contents tSTRING_END
1691
1693
  {
1692
- result = @builder.xstring_compose(val[0], val[1], val[2])
1694
+ string = @builder.xstring_compose(val[0], val[1], val[2])
1695
+ result = @builder.dedent_string(string, @lexer.dedent_level)
1693
1696
  }
1694
1697
 
1695
1698
  regexp: tREGEXP_BEG regexp_contents tSTRING_END tREGEXP_OPT
@@ -190,6 +190,25 @@ module Parser
190
190
  [ @first_line + line_no, position - line_begin ]
191
191
  end
192
192
 
193
+ ##
194
+ # Return an `Array` of source code lines.
195
+ #
196
+ # @return [Array<String>]
197
+ #
198
+ def source_lines
199
+ @lines ||= begin
200
+ lines = @source.lines.to_a
201
+ lines << '' if @source.end_with?("\n")
202
+
203
+ lines.each do |line|
204
+ line.chomp!(NEW_LINE)
205
+ line.freeze
206
+ end
207
+
208
+ lines.freeze
209
+ end
210
+ end
211
+
193
212
  ##
194
213
  # Extract line `lineno` from source, taking `first_line` into account.
195
214
  #
@@ -198,16 +217,7 @@ module Parser
198
217
  # @raise [IndexError] if `lineno` is out of bounds
199
218
  #
200
219
  def source_line(lineno)
201
- unless @lines
202
- @lines = @source.lines.to_a
203
- @lines.each { |line| line.chomp!(NEW_LINE) }
204
-
205
- # If a file ends with a newline, the EOF token will appear
206
- # to be one line further than the end of file.
207
- @lines << ""
208
- end
209
-
210
- @lines.fetch(lineno - @first_line).dup
220
+ source_lines.fetch(lineno - @first_line).dup
211
221
  end
212
222
 
213
223
  ##
@@ -230,6 +240,15 @@ module Parser
230
240
  end
231
241
  end
232
242
 
243
+ ##
244
+ # Number of last line in the buffer
245
+ #
246
+ # @return [Integer]
247
+ #
248
+ def last_line
249
+ line_begins.size + @first_line - 1
250
+ end
251
+
233
252
  private
234
253
 
235
254
  def line_begins
@@ -1,3 +1,3 @@
1
1
  module Parser
2
- VERSION = '2.3.0.pre.6'
2
+ VERSION = '2.3.0.0'
3
3
  end
data/test/test_current.rb CHANGED
@@ -16,6 +16,8 @@ class TestCurrent < Minitest::Test
16
16
  assert_equal Parser::Ruby22, Parser::CurrentRuby
17
17
  when /^2\.3\.\d+/
18
18
  assert_equal Parser::Ruby23, Parser::CurrentRuby
19
+ when /^2\.4\.\d+/
20
+ assert_equal Parser::Ruby23, Parser::CurrentRuby
19
21
  else
20
22
  flunk "Update test_current for #{RUBY_VERSION}"
21
23
  end
data/test/test_lexer.rb CHANGED
@@ -1107,6 +1107,55 @@ class TestLexer < Minitest::Test
1107
1107
  assert_scanned '?\M-\C-a', :tCHARACTER, "\M-\C-a"
1108
1108
  end
1109
1109
 
1110
+ def test_question_eh_escape_u_1_digit
1111
+ setup_lexer 19
1112
+
1113
+ refute_scanned '?\\u1'
1114
+ end
1115
+
1116
+ def test_question_eh_escape_u_2_digits
1117
+ setup_lexer 19
1118
+
1119
+ refute_scanned '?\\u12'
1120
+ end
1121
+
1122
+ def test_question_eh_escape_u_3_digits
1123
+ setup_lexer 19
1124
+
1125
+ refute_scanned '?\\u123'
1126
+ end
1127
+
1128
+ def test_question_eh_escape_u_4_digits
1129
+ if RUBY_VERSION >= '1.9'
1130
+ setup_lexer 19
1131
+ assert_scanned '?\\u0001', :tCHARACTER, "\u0001"
1132
+ end
1133
+ end
1134
+
1135
+ def test_question_eh_single_unicode_point
1136
+ if RUBY_VERSION >= '1.9'
1137
+ setup_lexer 19
1138
+ assert_scanned '?\\u{123}', :tCHARACTER, "\u0123"
1139
+
1140
+ setup_lexer 19
1141
+ assert_scanned '?\\u{a}', :tCHARACTER, "\n"
1142
+ end
1143
+ end
1144
+
1145
+ def test_question_eh_multiple_unicode_points
1146
+ setup_lexer 19
1147
+ refute_scanned '?\\u{1 2 3}'
1148
+
1149
+ setup_lexer 19
1150
+ refute_scanned '?\\u{a b}'
1151
+ end
1152
+
1153
+ def test_question_eh_escape_u_unclosed_bracket
1154
+ setup_lexer 19
1155
+
1156
+ refute_scanned '?\\u{123'
1157
+ end
1158
+
1110
1159
  def test_integer_hex
1111
1160
  assert_scanned "0x2a", :tINTEGER, 42
1112
1161
  end
data/test/test_parser.rb CHANGED
@@ -262,6 +262,159 @@ class TestParser < Minitest::Test
262
262
  | ~~~~ heredoc_end})
263
263
  end
264
264
 
265
+ def test_dedenting_heredoc
266
+ assert_parses(
267
+ s(:begin,
268
+ s(:send,
269
+ s(:send, nil, :p), :<<,
270
+ s(:send,
271
+ s(:const, nil, :E), :~)),
272
+ s(:const, nil, :E)),
273
+ %Q{p <<~E\nE},
274
+ %q{},
275
+ %w(1.8 1.9 2.0 2.1 2.2 ios mac))
276
+
277
+ assert_parses(
278
+ s(:send, nil, :p,
279
+ s(:dstr)),
280
+ %Q{p <<~E\nE},
281
+ %q{},
282
+ ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
283
+
284
+ assert_parses(
285
+ s(:send, nil, :p,
286
+ s(:dstr)),
287
+ %Q{p <<~E\n E},
288
+ %q{},
289
+ ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
290
+
291
+ assert_parses(
292
+ s(:send, nil, :p,
293
+ s(:str, "x\n")),
294
+ %Q{p <<~E\n x\nE},
295
+ %q{},
296
+ ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
297
+
298
+ assert_parses(
299
+ s(:send, nil, :p,
300
+ s(:dstr,
301
+ s(:str, "x\n"),
302
+ s(:str, " y\n"))),
303
+ %Q{p <<~E\n x\n y\nE},
304
+ %q{},
305
+ ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
306
+
307
+ assert_parses(
308
+ s(:send, nil, :p,
309
+ s(:dstr,
310
+ s(:str, "x\n"),
311
+ s(:str, "y\n"))),
312
+ %Q{p <<~E\n\tx\n y\nE},
313
+ %q{},
314
+ ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
315
+
316
+ assert_parses(
317
+ s(:send, nil, :p,
318
+ s(:dstr,
319
+ s(:str, "x\n"),
320
+ s(:str, "y\n"))),
321
+ %Q{p <<~E\n\tx\n y\nE},
322
+ %q{},
323
+ ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
324
+
325
+ assert_parses(
326
+ s(:send, nil, :p,
327
+ s(:dstr,
328
+ s(:str, "x\n"),
329
+ s(:str, "y\n"))),
330
+ %Q{p <<~E\n \tx\n y\nE},
331
+ %q{},
332
+ ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
333
+
334
+ assert_parses(
335
+ s(:send, nil, :p,
336
+ s(:dstr,
337
+ s(:str, "\tx\n"),
338
+ s(:str, "y\n"))),
339
+ %Q{p <<~E\n \tx\n\ty\nE},
340
+ %q{},
341
+ ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
342
+
343
+ assert_parses(
344
+ s(:send, nil, :p,
345
+ s(:dstr,
346
+ s(:str, " x\n"),
347
+ s(:str, "\n"),
348
+ s(:str, "y\n"))),
349
+ %Q{p <<~E\n x\n\ny\nE},
350
+ %q{},
351
+ ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
352
+
353
+ assert_parses(
354
+ s(:send, nil, :p,
355
+ s(:dstr,
356
+ s(:str, "x\n"),
357
+ s(:str, " \n"),
358
+ s(:str, "y\n"))),
359
+ %Q{p <<~E\n x\n \n y\nE},
360
+ %q{},
361
+ ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
362
+
363
+ assert_parses(
364
+ s(:send, nil, :p,
365
+ s(:dstr,
366
+ s(:str, " x\n"),
367
+ s(:str, " y\n"))),
368
+ %Q{p <<~E\n x\n \\ y\nE},
369
+ %q{},
370
+ ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
371
+
372
+ assert_parses(
373
+ s(:send, nil, :p,
374
+ s(:dstr,
375
+ s(:str, " x\n"),
376
+ s(:str, "\ty\n"))),
377
+ %Q{p <<~E\n x\n \\\ty\nE},
378
+ %q{},
379
+ ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
380
+
381
+ assert_parses(
382
+ s(:send, nil, :p,
383
+ s(:dstr,
384
+ s(:str, " x\n"),
385
+ s(:str, ""),
386
+ s(:begin,
387
+ s(:lvar, :foo)),
388
+ s(:str, "\n"))),
389
+ %Q{p <<~"E"\n x\n \#{foo}\nE},
390
+ %q{},
391
+ ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
392
+
393
+ assert_parses(
394
+ s(:send, nil, :p,
395
+ s(:xstr,
396
+ s(:str, " x\n"),
397
+ s(:str, ""),
398
+ s(:begin,
399
+ s(:lvar, :foo)),
400
+ s(:str, "\n"))),
401
+ %Q{p <<~`E`\n x\n \#{foo}\nE},
402
+ %q{},
403
+ ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
404
+
405
+ assert_parses(
406
+ s(:send, nil, :p,
407
+ s(:dstr,
408
+ s(:str, " x\n"),
409
+ s(:str, ""),
410
+ s(:begin,
411
+ s(:str, " y")),
412
+ s(:str, "\n"))),
413
+ %Q{p <<~"E"\n x\n \#{" y"}\nE},
414
+ %q{},
415
+ ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
416
+ end
417
+
265
418
  # Symbols
266
419
 
267
420
  def test_symbol_plain
@@ -5124,6 +5277,28 @@ class TestParser < Minitest::Test
5124
5277
  ALL_VERSIONS - %w(1.8 1.9 mac ios 2.0)) # no 1.9 backport
5125
5278
  end
5126
5279
 
5280
+ # We implement broken behavior, and Ruby is not fixed as of 2016-01-14.
5281
+ def test_ruby_bug_11989
5282
+ assert_parses(
5283
+ s(:send, nil, :p,
5284
+ s(:str, "x\n y\n")),
5285
+ %Q{p <<~"E"\n x\\n y\nE},
5286
+ %q{},
5287
+ ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
5288
+ end
5289
+
5290
+ # We implement correct behavior, but Ruby is not fixed as of 2016-01-14.
5291
+ def test_ruby_bug_11990
5292
+ assert_parses(
5293
+ s(:send, nil, :p,
5294
+ s(:dstr,
5295
+ s(:str, "x\n"),
5296
+ s(:str, " y"))),
5297
+ %Q{p <<~E " y"\n x\nE},
5298
+ %q{},
5299
+ ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
5300
+ end
5301
+
5127
5302
  def test_parser_bug_198
5128
5303
  assert_parses(
5129
5304
  s(:array,
@@ -116,4 +116,29 @@ class TestSourceBuffer < Minitest::Test
116
116
  @buffer.line_range(9)
117
117
  end
118
118
  end
119
+
120
+ def test_last_line
121
+ @buffer.source = "1\nfoo\nbar"
122
+ assert_equal 3, @buffer.last_line
123
+
124
+ @buffer = Parser::Source::Buffer.new('(string)', 5)
125
+ @buffer.source = ""
126
+ assert_equal 5, @buffer.last_line
127
+
128
+ @buffer = Parser::Source::Buffer.new('(string)', 5)
129
+ @buffer.source = "abc\n"
130
+ assert_equal 6, @buffer.last_line
131
+ end
132
+
133
+ def test_source_lines
134
+ @buffer.source = "1\nfoo\nbar\n"
135
+
136
+ assert_equal ['1', 'foo', 'bar', ''], @buffer.source_lines
137
+ assert @buffer.source_lines.frozen?
138
+ assert @buffer.source_lines.all?(&:frozen?)
139
+
140
+ @buffer = Parser::Source::Buffer.new('(string)', 5)
141
+ @buffer.source = "foo\nbar"
142
+ assert_equal ['foo', 'bar'], @buffer.source_lines
143
+ end
119
144
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.3.0.pre.6
4
+ version: 2.3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - whitequark
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-20 00:00:00.000000000 Z
11
+ date: 2016-01-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ast
@@ -260,6 +260,7 @@ files:
260
260
  - lib/parser/diagnostic/engine.rb
261
261
  - lib/parser/lexer.rb
262
262
  - lib/parser/lexer.rl
263
+ - lib/parser/lexer/dedenter.rb
263
264
  - lib/parser/lexer/explanation.rb
264
265
  - lib/parser/lexer/literal.rb
265
266
  - lib/parser/lexer/stack_state.rb
@@ -348,12 +349,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
348
349
  version: '0'
349
350
  required_rubygems_version: !ruby/object:Gem::Requirement
350
351
  requirements:
351
- - - ">"
352
+ - - ">="
352
353
  - !ruby/object:Gem::Version
353
- version: 1.3.1
354
+ version: '0'
354
355
  requirements: []
355
356
  rubyforge_project:
356
- rubygems_version: 2.4.1
357
+ rubygems_version: 2.5.1
357
358
  signing_key:
358
359
  specification_version: 4
359
360
  summary: A Ruby parser written in pure Ruby.