parser 2.3.0.3 → 2.3.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +0 -1
- data/CHANGELOG.md +4 -1
- data/lib/parser/lexer.rl +25 -53
- data/lib/parser/lexer/literal.rb +1 -1
- data/lib/parser/source/buffer.rb +21 -3
- data/lib/parser/source/range.rb +1 -1
- data/lib/parser/version.rb +1 -1
- data/test/test_parser.rb +10 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1cb171255b96bf71809258731c221082841d8af1
|
4
|
+
data.tar.gz: 2ef132309fc78e96f665e60bdbfecb389f4bda6d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9f374c086ac8fff2fb0dfc2d0cfb9badc4e6da09b9612992c51e3207bdeff35491ccd7edb13e07b0f7dfe31bd9129df31bf4a4d550f58d9534bc9fdf55c6b064
|
7
|
+
data.tar.gz: f1821293d14bf5716f5c9e3d8f9af4d6eb7f84f05f3fed3552521770b33add9fd639502c6949c9c04a61775b275d208a199e3261b01c5b1082cf624c37968532
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
Changelog
|
2
2
|
=========
|
3
3
|
|
4
|
+
v2.3.0.4 (2016-02-09)
|
5
|
+
---------------------
|
6
|
+
|
4
7
|
v2.3.0.3 (2016-02-06)
|
5
8
|
---------------------
|
6
9
|
|
@@ -14,7 +17,7 @@ Bugs fixed:
|
|
14
17
|
* Add :csend to Parser::Meta::NODE_TYPES (Markus Schirp)
|
15
18
|
* lexer/dedenter: "\<\<x\n y\\n z\nx": don't dedent after escaped newline. (whitequark)
|
16
19
|
|
17
|
-
v2.3.0.
|
20
|
+
v2.3.0.4 (2016-01-16)
|
18
21
|
---------------------
|
19
22
|
|
20
23
|
v2.3.0.1 (2016-01-14)
|
data/lib/parser/lexer.rl
CHANGED
@@ -19,7 +19,7 @@
|
|
19
19
|
# they're pointing to ("current"), plus one. `@ts` contains the index
|
20
20
|
# of the corresponding character. The code for extracting matched token is:
|
21
21
|
#
|
22
|
-
# @
|
22
|
+
# @source_buffer.slice(@ts...@te)
|
23
23
|
#
|
24
24
|
# * If your input is `foooooooobar` and the rule is:
|
25
25
|
#
|
@@ -109,8 +109,6 @@ class Parser::Lexer
|
|
109
109
|
@tokens = nil
|
110
110
|
@comments = nil
|
111
111
|
|
112
|
-
@has_encode = ''.respond_to?(:encode)
|
113
|
-
|
114
112
|
reset
|
115
113
|
end
|
116
114
|
|
@@ -128,10 +126,7 @@ class Parser::Lexer
|
|
128
126
|
|
129
127
|
@force_utf32 = false # Set to true by some tests
|
130
128
|
|
131
|
-
@source = nil # source string
|
132
129
|
@source_pts = nil # @source as a codepoint array
|
133
|
-
@encoding = nil # target encoding for output strings
|
134
|
-
@need_encode = nil
|
135
130
|
|
136
131
|
@p = 0 # stream position (saved manually in #advance)
|
137
132
|
@ts = nil # token start
|
@@ -186,33 +181,12 @@ class Parser::Lexer
|
|
186
181
|
@source_buffer = source_buffer
|
187
182
|
|
188
183
|
if @source_buffer
|
189
|
-
|
190
|
-
@need_encode = false
|
191
|
-
|
192
|
-
@encoding = @source.encoding if @has_encode
|
184
|
+
source = @source_buffer.source
|
193
185
|
|
194
|
-
if
|
195
|
-
@source_pts =
|
186
|
+
if source.respond_to?(:encode) && source.encoding == Encoding::UTF_8
|
187
|
+
@source_pts = source.unpack('U*')
|
196
188
|
else
|
197
|
-
@source_pts =
|
198
|
-
end
|
199
|
-
|
200
|
-
if @has_encode &&
|
201
|
-
(@source_pts.size > 1_000_000 || @force_utf32) &&
|
202
|
-
@encoding != Encoding::UTF_32LE
|
203
|
-
# A heuristic: if the buffer is larger than 1M, then
|
204
|
-
# store it in UTF-32 and convert the tokens as they're
|
205
|
-
# going out. If it's smaller, the conversion overhead
|
206
|
-
# dominates runtime and this stops being beneficial.
|
207
|
-
#
|
208
|
-
# This is not really a good heuristic, as the result
|
209
|
-
# heavily depends on token/character ratio. If it's low,
|
210
|
-
# say the gem consists mostly of long identifiers and
|
211
|
-
# symbols, then storing the source in UTF-8 would be faster.
|
212
|
-
#
|
213
|
-
# Patches accepted.
|
214
|
-
@source = @source.encode(Encoding::UTF_32LE)
|
215
|
-
@need_encode = true
|
189
|
+
@source_pts = source.unpack('C*')
|
216
190
|
end
|
217
191
|
|
218
192
|
if @source_pts[0] == 0xfeff
|
@@ -220,7 +194,6 @@ class Parser::Lexer
|
|
220
194
|
@p = 1
|
221
195
|
end
|
222
196
|
else
|
223
|
-
@source = nil
|
224
197
|
@source_pts = nil
|
225
198
|
end
|
226
199
|
end
|
@@ -327,24 +300,24 @@ class Parser::Lexer
|
|
327
300
|
versions.include?(@version)
|
328
301
|
end
|
329
302
|
|
303
|
+
def stack_pop
|
304
|
+
@top -= 1
|
305
|
+
@stack[@top]
|
306
|
+
end
|
307
|
+
|
330
308
|
if "".respond_to?(:encode)
|
331
309
|
def encode_escape(ord)
|
332
|
-
ord.chr.force_encoding(
|
310
|
+
ord.chr.force_encoding(source_buffer.source.encoding)
|
333
311
|
end
|
334
312
|
|
335
|
-
def tok(s = @ts, e = @te)
|
336
|
-
source = @source[s...e]
|
337
|
-
return source unless @need_encode
|
338
|
-
source.encode(@encoding)
|
339
|
-
end
|
340
313
|
else
|
341
314
|
def encode_escape(ord)
|
342
315
|
ord.chr
|
343
316
|
end
|
317
|
+
end
|
344
318
|
|
345
|
-
|
346
|
-
|
347
|
-
end
|
319
|
+
def tok(s = @ts, e = @te)
|
320
|
+
@source_buffer.slice(s...e)
|
348
321
|
end
|
349
322
|
|
350
323
|
def range(s = @ts, e = @te)
|
@@ -707,13 +680,13 @@ class Parser::Lexer
|
|
707
680
|
|
708
681
|
maybe_escaped_char = (
|
709
682
|
'\\' c_any %unescape_char
|
710
|
-
| ( c_any - [\\] ) % { @escape = @
|
683
|
+
| ( c_any - [\\] ) % { @escape = @source_buffer.slice(p - 1).chr }
|
711
684
|
);
|
712
685
|
|
713
686
|
maybe_escaped_ctrl_char = ( # why?!
|
714
687
|
'\\' c_any %unescape_char %slash_c_char
|
715
688
|
| '?' % { @escape = "\x7f" }
|
716
|
-
| ( c_any - [\\?] ) % { @escape = @
|
689
|
+
| ( c_any - [\\?] ) % { @escape = @source_buffer.slice(p - 1).chr } %slash_c_char
|
717
690
|
);
|
718
691
|
|
719
692
|
escape = (
|
@@ -840,8 +813,7 @@ class Parser::Lexer
|
|
840
813
|
|
841
814
|
# tLABEL_END is only possible in non-cond context on >= 2.2
|
842
815
|
if @version >= 22 && !@cond.active?
|
843
|
-
lookahead = @
|
844
|
-
lookahead = lookahead.encode(@encoding) if @need_encode
|
816
|
+
lookahead = @source_buffer.slice(@te...@te+2)
|
845
817
|
end
|
846
818
|
|
847
819
|
current_literal = literal
|
@@ -863,7 +835,7 @@ class Parser::Lexer
|
|
863
835
|
action extend_string_escaped {
|
864
836
|
current_literal = literal
|
865
837
|
# Get the first character after the backslash.
|
866
|
-
escaped_char = @
|
838
|
+
escaped_char = @source_buffer.slice(@escape_s).chr
|
867
839
|
|
868
840
|
if current_literal.munge_escape? escaped_char
|
869
841
|
# If this particular literal uses this character as an opening
|
@@ -1022,7 +994,7 @@ class Parser::Lexer
|
|
1022
994
|
end
|
1023
995
|
|
1024
996
|
fhold;
|
1025
|
-
|
997
|
+
fnext *stack_pop;
|
1026
998
|
fbreak;
|
1027
999
|
end
|
1028
1000
|
end
|
@@ -1241,7 +1213,7 @@ class Parser::Lexer
|
|
1241
1213
|
emit(:tGVAR)
|
1242
1214
|
end
|
1243
1215
|
|
1244
|
-
|
1216
|
+
fnext *stack_pop; fbreak;
|
1245
1217
|
};
|
1246
1218
|
|
1247
1219
|
class_var_v
|
@@ -1251,7 +1223,7 @@ class Parser::Lexer
|
|
1251
1223
|
end
|
1252
1224
|
|
1253
1225
|
emit(:tCVAR)
|
1254
|
-
|
1226
|
+
fnext *stack_pop; fbreak;
|
1255
1227
|
};
|
1256
1228
|
|
1257
1229
|
instance_var_v
|
@@ -1261,7 +1233,7 @@ class Parser::Lexer
|
|
1261
1233
|
end
|
1262
1234
|
|
1263
1235
|
emit(:tIVAR)
|
1264
|
-
|
1236
|
+
fnext *stack_pop; fbreak;
|
1265
1237
|
};
|
1266
1238
|
*|;
|
1267
1239
|
|
@@ -1637,7 +1609,7 @@ class Parser::Lexer
|
|
1637
1609
|
# %<string>
|
1638
1610
|
'%' ( any - [A-Za-z] )
|
1639
1611
|
=> {
|
1640
|
-
type, delimiter = @
|
1612
|
+
type, delimiter = @source_buffer.slice(@ts).chr, tok[-1].chr
|
1641
1613
|
fgoto *push_literal(type, delimiter, @ts);
|
1642
1614
|
};
|
1643
1615
|
|
@@ -1732,7 +1704,7 @@ class Parser::Lexer
|
|
1732
1704
|
'?' c_space_nl
|
1733
1705
|
=> {
|
1734
1706
|
escape = { " " => '\s', "\r" => '\r', "\n" => '\n', "\t" => '\t',
|
1735
|
-
"\v" => '\v', "\f" => '\f' }[@
|
1707
|
+
"\v" => '\v', "\f" => '\f' }[@source_buffer.slice(@ts + 1)]
|
1736
1708
|
diagnostic :warning, :invalid_escape_use, { :escape => escape }, range
|
1737
1709
|
|
1738
1710
|
p = @ts - 1
|
@@ -1805,7 +1777,7 @@ class Parser::Lexer
|
|
1805
1777
|
if version?(18)
|
1806
1778
|
ident = tok(@ts, @te - 2)
|
1807
1779
|
|
1808
|
-
emit((@
|
1780
|
+
emit((@source_buffer.slice(@ts) =~ /[A-Z]/) ? :tCONSTANT : :tIDENTIFIER,
|
1809
1781
|
ident, @ts, @te - 2)
|
1810
1782
|
fhold; # continue as a symbol
|
1811
1783
|
|
data/lib/parser/lexer/literal.rb
CHANGED
@@ -242,7 +242,7 @@ module Parser
|
|
242
242
|
# Prime the buffer with lexer encoding; otherwise,
|
243
243
|
# concatenation will produce varying results.
|
244
244
|
if defined?(Encoding)
|
245
|
-
@buffer.force_encoding(@lexer.encoding)
|
245
|
+
@buffer.force_encoding(@lexer.source_buffer.source.encoding)
|
246
246
|
end
|
247
247
|
|
248
248
|
@buffer_s = nil
|
data/lib/parser/source/buffer.rb
CHANGED
@@ -107,9 +107,12 @@ module Parser
|
|
107
107
|
@lines = nil
|
108
108
|
@line_begins = nil
|
109
109
|
|
110
|
+
# UTF-32-reencoded source for O(1) slicing
|
111
|
+
@slice_source = nil
|
112
|
+
|
110
113
|
# Cache for fast lookup
|
111
|
-
@line_for_position
|
112
|
-
@
|
114
|
+
@line_for_position = {}
|
115
|
+
@column_for_position = {}
|
113
116
|
end
|
114
117
|
|
115
118
|
##
|
@@ -178,6 +181,21 @@ module Parser
|
|
178
181
|
end
|
179
182
|
|
180
183
|
@source = input.gsub("\r\n".freeze, "\n".freeze).freeze
|
184
|
+
|
185
|
+
if defined?(Encoding) &&
|
186
|
+
!@source.ascii_only? &&
|
187
|
+
@source.encoding != Encoding::UTF_32LE &&
|
188
|
+
@source.encoding != Encoding::BINARY
|
189
|
+
@slice_source = @source.encode(Encoding::UTF_32LE)
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
def slice(range)
|
194
|
+
if @slice_source.nil?
|
195
|
+
@source[range]
|
196
|
+
else
|
197
|
+
@slice_source[range].encode(@source.encoding)
|
198
|
+
end
|
181
199
|
end
|
182
200
|
|
183
201
|
##
|
@@ -214,7 +232,7 @@ module Parser
|
|
214
232
|
# @api private
|
215
233
|
#
|
216
234
|
def column_for_position(position)
|
217
|
-
@
|
235
|
+
@column_for_position[position] ||= begin
|
218
236
|
_, line_begin = line_for(position)
|
219
237
|
position - line_begin
|
220
238
|
end
|
data/lib/parser/source/range.rb
CHANGED
data/lib/parser/version.rb
CHANGED
data/test/test_parser.rb
CHANGED
@@ -5330,6 +5330,16 @@ class TestParser < Minitest::Test
|
|
5330
5330
|
ALL_VERSIONS - %w(1.8))
|
5331
5331
|
end
|
5332
5332
|
|
5333
|
+
def test_parser_bug_272
|
5334
|
+
assert_parses(
|
5335
|
+
s(:block,
|
5336
|
+
s(:send, nil, :a,
|
5337
|
+
s(:ivar, :@b)),
|
5338
|
+
s(:args,
|
5339
|
+
s(:arg, :c)), nil),
|
5340
|
+
%q{a @b do |c|;end})
|
5341
|
+
end
|
5342
|
+
|
5333
5343
|
def test_bug_lambda_leakage
|
5334
5344
|
assert_parses(
|
5335
5345
|
s(:begin,
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.3.0.
|
4
|
+
version: 2.3.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- whitequark
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-02-
|
11
|
+
date: 2016-02-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ast
|