parser 2.3.0.3 → 2.3.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +0 -1
- data/CHANGELOG.md +4 -1
- data/lib/parser/lexer.rl +25 -53
- data/lib/parser/lexer/literal.rb +1 -1
- data/lib/parser/source/buffer.rb +21 -3
- data/lib/parser/source/range.rb +1 -1
- data/lib/parser/version.rb +1 -1
- data/test/test_parser.rb +10 -0
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1cb171255b96bf71809258731c221082841d8af1
|
|
4
|
+
data.tar.gz: 2ef132309fc78e96f665e60bdbfecb389f4bda6d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9f374c086ac8fff2fb0dfc2d0cfb9badc4e6da09b9612992c51e3207bdeff35491ccd7edb13e07b0f7dfe31bd9129df31bf4a4d550f58d9534bc9fdf55c6b064
|
|
7
|
+
data.tar.gz: f1821293d14bf5716f5c9e3d8f9af4d6eb7f84f05f3fed3552521770b33add9fd639502c6949c9c04a61775b275d208a199e3261b01c5b1082cf624c37968532
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
Changelog
|
|
2
2
|
=========
|
|
3
3
|
|
|
4
|
+
v2.3.0.4 (2016-02-09)
|
|
5
|
+
---------------------
|
|
6
|
+
|
|
4
7
|
v2.3.0.3 (2016-02-06)
|
|
5
8
|
---------------------
|
|
6
9
|
|
|
@@ -14,7 +17,7 @@ Bugs fixed:
|
|
|
14
17
|
* Add :csend to Parser::Meta::NODE_TYPES (Markus Schirp)
|
|
15
18
|
* lexer/dedenter: "\<\<x\n y\\n z\nx": don't dedent after escaped newline. (whitequark)
|
|
16
19
|
|
|
17
|
-
v2.3.0.
|
|
20
|
+
v2.3.0.4 (2016-01-16)
|
|
18
21
|
---------------------
|
|
19
22
|
|
|
20
23
|
v2.3.0.1 (2016-01-14)
|
data/lib/parser/lexer.rl
CHANGED
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
# they're pointing to ("current"), plus one. `@ts` contains the index
|
|
20
20
|
# of the corresponding character. The code for extracting matched token is:
|
|
21
21
|
#
|
|
22
|
-
# @
|
|
22
|
+
# @source_buffer.slice(@ts...@te)
|
|
23
23
|
#
|
|
24
24
|
# * If your input is `foooooooobar` and the rule is:
|
|
25
25
|
#
|
|
@@ -109,8 +109,6 @@ class Parser::Lexer
|
|
|
109
109
|
@tokens = nil
|
|
110
110
|
@comments = nil
|
|
111
111
|
|
|
112
|
-
@has_encode = ''.respond_to?(:encode)
|
|
113
|
-
|
|
114
112
|
reset
|
|
115
113
|
end
|
|
116
114
|
|
|
@@ -128,10 +126,7 @@ class Parser::Lexer
|
|
|
128
126
|
|
|
129
127
|
@force_utf32 = false # Set to true by some tests
|
|
130
128
|
|
|
131
|
-
@source = nil # source string
|
|
132
129
|
@source_pts = nil # @source as a codepoint array
|
|
133
|
-
@encoding = nil # target encoding for output strings
|
|
134
|
-
@need_encode = nil
|
|
135
130
|
|
|
136
131
|
@p = 0 # stream position (saved manually in #advance)
|
|
137
132
|
@ts = nil # token start
|
|
@@ -186,33 +181,12 @@ class Parser::Lexer
|
|
|
186
181
|
@source_buffer = source_buffer
|
|
187
182
|
|
|
188
183
|
if @source_buffer
|
|
189
|
-
|
|
190
|
-
@need_encode = false
|
|
191
|
-
|
|
192
|
-
@encoding = @source.encoding if @has_encode
|
|
184
|
+
source = @source_buffer.source
|
|
193
185
|
|
|
194
|
-
if
|
|
195
|
-
@source_pts =
|
|
186
|
+
if source.respond_to?(:encode) && source.encoding == Encoding::UTF_8
|
|
187
|
+
@source_pts = source.unpack('U*')
|
|
196
188
|
else
|
|
197
|
-
@source_pts =
|
|
198
|
-
end
|
|
199
|
-
|
|
200
|
-
if @has_encode &&
|
|
201
|
-
(@source_pts.size > 1_000_000 || @force_utf32) &&
|
|
202
|
-
@encoding != Encoding::UTF_32LE
|
|
203
|
-
# A heuristic: if the buffer is larger than 1M, then
|
|
204
|
-
# store it in UTF-32 and convert the tokens as they're
|
|
205
|
-
# going out. If it's smaller, the conversion overhead
|
|
206
|
-
# dominates runtime and this stops being beneficial.
|
|
207
|
-
#
|
|
208
|
-
# This is not really a good heuristic, as the result
|
|
209
|
-
# heavily depends on token/character ratio. If it's low,
|
|
210
|
-
# say the gem consists mostly of long identifiers and
|
|
211
|
-
# symbols, then storing the source in UTF-8 would be faster.
|
|
212
|
-
#
|
|
213
|
-
# Patches accepted.
|
|
214
|
-
@source = @source.encode(Encoding::UTF_32LE)
|
|
215
|
-
@need_encode = true
|
|
189
|
+
@source_pts = source.unpack('C*')
|
|
216
190
|
end
|
|
217
191
|
|
|
218
192
|
if @source_pts[0] == 0xfeff
|
|
@@ -220,7 +194,6 @@ class Parser::Lexer
|
|
|
220
194
|
@p = 1
|
|
221
195
|
end
|
|
222
196
|
else
|
|
223
|
-
@source = nil
|
|
224
197
|
@source_pts = nil
|
|
225
198
|
end
|
|
226
199
|
end
|
|
@@ -327,24 +300,24 @@ class Parser::Lexer
|
|
|
327
300
|
versions.include?(@version)
|
|
328
301
|
end
|
|
329
302
|
|
|
303
|
+
def stack_pop
|
|
304
|
+
@top -= 1
|
|
305
|
+
@stack[@top]
|
|
306
|
+
end
|
|
307
|
+
|
|
330
308
|
if "".respond_to?(:encode)
|
|
331
309
|
def encode_escape(ord)
|
|
332
|
-
ord.chr.force_encoding(
|
|
310
|
+
ord.chr.force_encoding(source_buffer.source.encoding)
|
|
333
311
|
end
|
|
334
312
|
|
|
335
|
-
def tok(s = @ts, e = @te)
|
|
336
|
-
source = @source[s...e]
|
|
337
|
-
return source unless @need_encode
|
|
338
|
-
source.encode(@encoding)
|
|
339
|
-
end
|
|
340
313
|
else
|
|
341
314
|
def encode_escape(ord)
|
|
342
315
|
ord.chr
|
|
343
316
|
end
|
|
317
|
+
end
|
|
344
318
|
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
end
|
|
319
|
+
def tok(s = @ts, e = @te)
|
|
320
|
+
@source_buffer.slice(s...e)
|
|
348
321
|
end
|
|
349
322
|
|
|
350
323
|
def range(s = @ts, e = @te)
|
|
@@ -707,13 +680,13 @@ class Parser::Lexer
|
|
|
707
680
|
|
|
708
681
|
maybe_escaped_char = (
|
|
709
682
|
'\\' c_any %unescape_char
|
|
710
|
-
| ( c_any - [\\] ) % { @escape = @
|
|
683
|
+
| ( c_any - [\\] ) % { @escape = @source_buffer.slice(p - 1).chr }
|
|
711
684
|
);
|
|
712
685
|
|
|
713
686
|
maybe_escaped_ctrl_char = ( # why?!
|
|
714
687
|
'\\' c_any %unescape_char %slash_c_char
|
|
715
688
|
| '?' % { @escape = "\x7f" }
|
|
716
|
-
| ( c_any - [\\?] ) % { @escape = @
|
|
689
|
+
| ( c_any - [\\?] ) % { @escape = @source_buffer.slice(p - 1).chr } %slash_c_char
|
|
717
690
|
);
|
|
718
691
|
|
|
719
692
|
escape = (
|
|
@@ -840,8 +813,7 @@ class Parser::Lexer
|
|
|
840
813
|
|
|
841
814
|
# tLABEL_END is only possible in non-cond context on >= 2.2
|
|
842
815
|
if @version >= 22 && !@cond.active?
|
|
843
|
-
lookahead = @
|
|
844
|
-
lookahead = lookahead.encode(@encoding) if @need_encode
|
|
816
|
+
lookahead = @source_buffer.slice(@te...@te+2)
|
|
845
817
|
end
|
|
846
818
|
|
|
847
819
|
current_literal = literal
|
|
@@ -863,7 +835,7 @@ class Parser::Lexer
|
|
|
863
835
|
action extend_string_escaped {
|
|
864
836
|
current_literal = literal
|
|
865
837
|
# Get the first character after the backslash.
|
|
866
|
-
escaped_char = @
|
|
838
|
+
escaped_char = @source_buffer.slice(@escape_s).chr
|
|
867
839
|
|
|
868
840
|
if current_literal.munge_escape? escaped_char
|
|
869
841
|
# If this particular literal uses this character as an opening
|
|
@@ -1022,7 +994,7 @@ class Parser::Lexer
|
|
|
1022
994
|
end
|
|
1023
995
|
|
|
1024
996
|
fhold;
|
|
1025
|
-
|
|
997
|
+
fnext *stack_pop;
|
|
1026
998
|
fbreak;
|
|
1027
999
|
end
|
|
1028
1000
|
end
|
|
@@ -1241,7 +1213,7 @@ class Parser::Lexer
|
|
|
1241
1213
|
emit(:tGVAR)
|
|
1242
1214
|
end
|
|
1243
1215
|
|
|
1244
|
-
|
|
1216
|
+
fnext *stack_pop; fbreak;
|
|
1245
1217
|
};
|
|
1246
1218
|
|
|
1247
1219
|
class_var_v
|
|
@@ -1251,7 +1223,7 @@ class Parser::Lexer
|
|
|
1251
1223
|
end
|
|
1252
1224
|
|
|
1253
1225
|
emit(:tCVAR)
|
|
1254
|
-
|
|
1226
|
+
fnext *stack_pop; fbreak;
|
|
1255
1227
|
};
|
|
1256
1228
|
|
|
1257
1229
|
instance_var_v
|
|
@@ -1261,7 +1233,7 @@ class Parser::Lexer
|
|
|
1261
1233
|
end
|
|
1262
1234
|
|
|
1263
1235
|
emit(:tIVAR)
|
|
1264
|
-
|
|
1236
|
+
fnext *stack_pop; fbreak;
|
|
1265
1237
|
};
|
|
1266
1238
|
*|;
|
|
1267
1239
|
|
|
@@ -1637,7 +1609,7 @@ class Parser::Lexer
|
|
|
1637
1609
|
# %<string>
|
|
1638
1610
|
'%' ( any - [A-Za-z] )
|
|
1639
1611
|
=> {
|
|
1640
|
-
type, delimiter = @
|
|
1612
|
+
type, delimiter = @source_buffer.slice(@ts).chr, tok[-1].chr
|
|
1641
1613
|
fgoto *push_literal(type, delimiter, @ts);
|
|
1642
1614
|
};
|
|
1643
1615
|
|
|
@@ -1732,7 +1704,7 @@ class Parser::Lexer
|
|
|
1732
1704
|
'?' c_space_nl
|
|
1733
1705
|
=> {
|
|
1734
1706
|
escape = { " " => '\s', "\r" => '\r', "\n" => '\n', "\t" => '\t',
|
|
1735
|
-
"\v" => '\v', "\f" => '\f' }[@
|
|
1707
|
+
"\v" => '\v', "\f" => '\f' }[@source_buffer.slice(@ts + 1)]
|
|
1736
1708
|
diagnostic :warning, :invalid_escape_use, { :escape => escape }, range
|
|
1737
1709
|
|
|
1738
1710
|
p = @ts - 1
|
|
@@ -1805,7 +1777,7 @@ class Parser::Lexer
|
|
|
1805
1777
|
if version?(18)
|
|
1806
1778
|
ident = tok(@ts, @te - 2)
|
|
1807
1779
|
|
|
1808
|
-
emit((@
|
|
1780
|
+
emit((@source_buffer.slice(@ts) =~ /[A-Z]/) ? :tCONSTANT : :tIDENTIFIER,
|
|
1809
1781
|
ident, @ts, @te - 2)
|
|
1810
1782
|
fhold; # continue as a symbol
|
|
1811
1783
|
|
data/lib/parser/lexer/literal.rb
CHANGED
|
@@ -242,7 +242,7 @@ module Parser
|
|
|
242
242
|
# Prime the buffer with lexer encoding; otherwise,
|
|
243
243
|
# concatenation will produce varying results.
|
|
244
244
|
if defined?(Encoding)
|
|
245
|
-
@buffer.force_encoding(@lexer.encoding)
|
|
245
|
+
@buffer.force_encoding(@lexer.source_buffer.source.encoding)
|
|
246
246
|
end
|
|
247
247
|
|
|
248
248
|
@buffer_s = nil
|
data/lib/parser/source/buffer.rb
CHANGED
|
@@ -107,9 +107,12 @@ module Parser
|
|
|
107
107
|
@lines = nil
|
|
108
108
|
@line_begins = nil
|
|
109
109
|
|
|
110
|
+
# UTF-32-reencoded source for O(1) slicing
|
|
111
|
+
@slice_source = nil
|
|
112
|
+
|
|
110
113
|
# Cache for fast lookup
|
|
111
|
-
@line_for_position
|
|
112
|
-
@
|
|
114
|
+
@line_for_position = {}
|
|
115
|
+
@column_for_position = {}
|
|
113
116
|
end
|
|
114
117
|
|
|
115
118
|
##
|
|
@@ -178,6 +181,21 @@ module Parser
|
|
|
178
181
|
end
|
|
179
182
|
|
|
180
183
|
@source = input.gsub("\r\n".freeze, "\n".freeze).freeze
|
|
184
|
+
|
|
185
|
+
if defined?(Encoding) &&
|
|
186
|
+
!@source.ascii_only? &&
|
|
187
|
+
@source.encoding != Encoding::UTF_32LE &&
|
|
188
|
+
@source.encoding != Encoding::BINARY
|
|
189
|
+
@slice_source = @source.encode(Encoding::UTF_32LE)
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def slice(range)
|
|
194
|
+
if @slice_source.nil?
|
|
195
|
+
@source[range]
|
|
196
|
+
else
|
|
197
|
+
@slice_source[range].encode(@source.encoding)
|
|
198
|
+
end
|
|
181
199
|
end
|
|
182
200
|
|
|
183
201
|
##
|
|
@@ -214,7 +232,7 @@ module Parser
|
|
|
214
232
|
# @api private
|
|
215
233
|
#
|
|
216
234
|
def column_for_position(position)
|
|
217
|
-
@
|
|
235
|
+
@column_for_position[position] ||= begin
|
|
218
236
|
_, line_begin = line_for(position)
|
|
219
237
|
position - line_begin
|
|
220
238
|
end
|
data/lib/parser/source/range.rb
CHANGED
data/lib/parser/version.rb
CHANGED
data/test/test_parser.rb
CHANGED
|
@@ -5330,6 +5330,16 @@ class TestParser < Minitest::Test
|
|
|
5330
5330
|
ALL_VERSIONS - %w(1.8))
|
|
5331
5331
|
end
|
|
5332
5332
|
|
|
5333
|
+
def test_parser_bug_272
|
|
5334
|
+
assert_parses(
|
|
5335
|
+
s(:block,
|
|
5336
|
+
s(:send, nil, :a,
|
|
5337
|
+
s(:ivar, :@b)),
|
|
5338
|
+
s(:args,
|
|
5339
|
+
s(:arg, :c)), nil),
|
|
5340
|
+
%q{a @b do |c|;end})
|
|
5341
|
+
end
|
|
5342
|
+
|
|
5333
5343
|
def test_bug_lambda_leakage
|
|
5334
5344
|
assert_parses(
|
|
5335
5345
|
s(:begin,
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: parser
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.3.0.
|
|
4
|
+
version: 2.3.0.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- whitequark
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2016-02-
|
|
11
|
+
date: 2016-02-09 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: ast
|