prism 1.7.0 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +31 -1
- data/Makefile +7 -1
- data/config.yml +4 -4
- data/docs/releasing.md +2 -4
- data/docs/ripper_translation.md +8 -17
- data/docs/ruby_api.md +1 -0
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +4 -4
- data/include/prism/version.h +2 -2
- data/lib/prism/compiler.rb +152 -152
- data/lib/prism/lex_compat.rb +133 -150
- data/lib/prism/node.rb +1131 -20
- data/lib/prism/parse_result.rb +9 -0
- data/lib/prism/serialize.rb +1 -1
- data/lib/prism/translation/parser_current.rb +1 -1
- data/lib/prism/translation/parser_versions.rb +36 -0
- data/lib/prism/translation/ripper/filter.rb +53 -0
- data/lib/prism/translation/ripper/lexer.rb +135 -0
- data/lib/prism/translation/ripper.rb +84 -38
- data/lib/prism/translation/ruby_parser.rb +1 -1
- data/lib/prism/translation.rb +5 -5
- data/lib/prism/visitor.rb +152 -152
- data/lib/prism.rb +1 -14
- data/prism.gemspec +5 -11
- data/rbi/prism/node.rbi +3 -0
- data/rbi/prism/translation/parser_versions.rbi +23 -0
- data/rbi/prism.rbi +0 -3
- data/sig/prism/node.rbs +4 -0
- data/sig/prism/parse_result.rbs +1 -0
- data/sig/prism.rbs +54 -40
- data/src/prism.c +48 -27
- metadata +5 -11
- data/lib/prism/translation/parser33.rb +0 -13
- data/lib/prism/translation/parser34.rb +0 -13
- data/lib/prism/translation/parser35.rb +0 -8
- data/lib/prism/translation/parser40.rb +0 -13
- data/lib/prism/translation/parser41.rb +0 -13
- data/rbi/prism/translation/parser33.rbi +0 -6
- data/rbi/prism/translation/parser34.rbi +0 -6
- data/rbi/prism/translation/parser35.rbi +0 -4
- data/rbi/prism/translation/parser40.rbi +0 -6
- data/rbi/prism/translation/parser41.rbi +0 -6
data/lib/prism/lex_compat.rb
CHANGED
|
@@ -1,9 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
# :markup: markdown
|
|
3
3
|
|
|
4
|
-
require "delegate"
|
|
5
|
-
require "ripper"
|
|
6
|
-
|
|
7
4
|
module Prism
|
|
8
5
|
# This class is responsible for lexing the source using prism and then
|
|
9
6
|
# converting those tokens to be compatible with Ripper. In the vast majority
|
|
@@ -202,87 +199,51 @@ module Prism
|
|
|
202
199
|
# When we produce tokens, we produce the same arrays that Ripper does.
|
|
203
200
|
# However, we add a couple of convenience methods onto them to make them a
|
|
204
201
|
# little easier to work with. We delegate all other methods to the array.
|
|
205
|
-
class Token <
|
|
206
|
-
#
|
|
202
|
+
class Token < BasicObject
|
|
203
|
+
# Create a new token object with the given ripper-compatible array.
|
|
204
|
+
def initialize(array)
|
|
205
|
+
@array = array
|
|
206
|
+
end
|
|
207
207
|
|
|
208
208
|
# The location of the token in the source.
|
|
209
209
|
def location
|
|
210
|
-
|
|
210
|
+
@array[0]
|
|
211
211
|
end
|
|
212
212
|
|
|
213
213
|
# The type of the token.
|
|
214
214
|
def event
|
|
215
|
-
|
|
215
|
+
@array[1]
|
|
216
216
|
end
|
|
217
217
|
|
|
218
218
|
# The slice of the source that this token represents.
|
|
219
219
|
def value
|
|
220
|
-
|
|
220
|
+
@array[2]
|
|
221
221
|
end
|
|
222
222
|
|
|
223
223
|
# The state of the lexer when this token was produced.
|
|
224
224
|
def state
|
|
225
|
-
|
|
225
|
+
@array[3]
|
|
226
226
|
end
|
|
227
|
-
end
|
|
228
227
|
|
|
229
|
-
|
|
230
|
-
# trim it down to just the content on the first line when comparing.
|
|
231
|
-
class EndContentToken < Token
|
|
228
|
+
# We want to pretend that this is just an Array.
|
|
232
229
|
def ==(other) # :nodoc:
|
|
233
|
-
|
|
230
|
+
@array == other
|
|
234
231
|
end
|
|
235
|
-
end
|
|
236
|
-
|
|
237
|
-
# Tokens where state should be ignored
|
|
238
|
-
# used for :on_comment, :on_heredoc_end, :on_embexpr_end
|
|
239
|
-
class IgnoreStateToken < Token
|
|
240
|
-
def ==(other) # :nodoc:
|
|
241
|
-
self[0...-1] == other[0...-1]
|
|
242
|
-
end
|
|
243
|
-
end
|
|
244
232
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
# through named captures in regular expressions). In that case we don't
|
|
248
|
-
# compare the state.
|
|
249
|
-
class IdentToken < Token
|
|
250
|
-
def ==(other) # :nodoc:
|
|
251
|
-
(self[0...-1] == other[0...-1]) && (
|
|
252
|
-
(other[3] == Ripper::EXPR_LABEL | Ripper::EXPR_END) ||
|
|
253
|
-
(other[3] & Ripper::EXPR_ARG_ANY != 0)
|
|
254
|
-
)
|
|
233
|
+
def respond_to_missing?(name, include_private = false) # :nodoc:
|
|
234
|
+
@array.respond_to?(name, include_private)
|
|
255
235
|
end
|
|
256
|
-
end
|
|
257
|
-
|
|
258
|
-
# Ignored newlines can occasionally have a LABEL state attached to them, so
|
|
259
|
-
# we compare the state differently here.
|
|
260
|
-
class IgnoredNewlineToken < Token
|
|
261
|
-
def ==(other) # :nodoc:
|
|
262
|
-
return false unless self[0...-1] == other[0...-1]
|
|
263
236
|
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
else
|
|
267
|
-
self[3] == other[3]
|
|
268
|
-
end
|
|
237
|
+
def method_missing(name, ...) # :nodoc:
|
|
238
|
+
@array.send(name, ...)
|
|
269
239
|
end
|
|
270
240
|
end
|
|
271
241
|
|
|
272
|
-
#
|
|
273
|
-
#
|
|
274
|
-
|
|
275
|
-
#
|
|
276
|
-
# then Ripper will mark bar as END|LABEL if there is a local in a parent
|
|
277
|
-
# scope named bar because it hasn't pushed the local table yet. We do this
|
|
278
|
-
# more accurately, so we need to allow comparing against both END and
|
|
279
|
-
# END|LABEL.
|
|
280
|
-
class ParamToken < Token
|
|
242
|
+
# Tokens where state should be ignored
|
|
243
|
+
# used for :on_sp, :on_comment, :on_heredoc_end, :on_embexpr_end
|
|
244
|
+
class IgnoreStateToken < Token
|
|
281
245
|
def ==(other) # :nodoc:
|
|
282
|
-
|
|
283
|
-
(other[3] == Ripper::EXPR_END) ||
|
|
284
|
-
(other[3] == Ripper::EXPR_END | Ripper::EXPR_LABEL)
|
|
285
|
-
)
|
|
246
|
+
self[0...-1] == other[0...-1]
|
|
286
247
|
end
|
|
287
248
|
end
|
|
288
249
|
|
|
@@ -615,10 +576,15 @@ module Prism
|
|
|
615
576
|
|
|
616
577
|
private_constant :Heredoc
|
|
617
578
|
|
|
618
|
-
|
|
579
|
+
# In previous versions of Ruby, Ripper wouldn't flush the bom before the
|
|
580
|
+
# first token, so we had to have a hack in place to account for that.
|
|
581
|
+
BOM_FLUSHED = RUBY_VERSION >= "3.3.0"
|
|
582
|
+
private_constant :BOM_FLUSHED
|
|
619
583
|
|
|
620
|
-
|
|
621
|
-
|
|
584
|
+
attr_reader :options
|
|
585
|
+
|
|
586
|
+
def initialize(code, **options)
|
|
587
|
+
@code = code
|
|
622
588
|
@options = options
|
|
623
589
|
end
|
|
624
590
|
|
|
@@ -628,16 +594,14 @@ module Prism
|
|
|
628
594
|
state = :default
|
|
629
595
|
heredoc_stack = [[]] #: Array[Array[Heredoc::PlainHeredoc | Heredoc::DashHeredoc | Heredoc::DedentingHeredoc]]
|
|
630
596
|
|
|
631
|
-
result = Prism.lex(
|
|
597
|
+
result = Prism.lex(@code, **options)
|
|
598
|
+
source = result.source
|
|
632
599
|
result_value = result.value
|
|
633
|
-
previous_state = nil #:
|
|
600
|
+
previous_state = nil #: State?
|
|
634
601
|
last_heredoc_end = nil #: Integer?
|
|
602
|
+
eof_token = nil
|
|
635
603
|
|
|
636
|
-
|
|
637
|
-
# first token, so we had to have a hack in place to account for that. This
|
|
638
|
-
# checks for that behavior.
|
|
639
|
-
bom_flushed = Ripper.lex("\xEF\xBB\xBF# test")[0][0][1] == 0
|
|
640
|
-
bom = source.byteslice(0..2) == "\xEF\xBB\xBF"
|
|
604
|
+
bom = source.slice(0, 3) == "\xEF\xBB\xBF"
|
|
641
605
|
|
|
642
606
|
result_value.each_with_index do |(token, lex_state), index|
|
|
643
607
|
lineno = token.location.start_line
|
|
@@ -651,7 +615,7 @@ module Prism
|
|
|
651
615
|
if bom && lineno == 1
|
|
652
616
|
column -= 3
|
|
653
617
|
|
|
654
|
-
if index == 0 && column == 0 && !
|
|
618
|
+
if index == 0 && column == 0 && !BOM_FLUSHED
|
|
655
619
|
flushed =
|
|
656
620
|
case token.type
|
|
657
621
|
when :BACK_REFERENCE, :INSTANCE_VARIABLE, :CLASS_VARIABLE,
|
|
@@ -675,12 +639,15 @@ module Prism
|
|
|
675
639
|
|
|
676
640
|
event = RIPPER.fetch(token.type)
|
|
677
641
|
value = token.value
|
|
678
|
-
lex_state = Ripper::Lexer::State.
|
|
642
|
+
lex_state = Translation::Ripper::Lexer::State.cached(lex_state)
|
|
679
643
|
|
|
680
644
|
token =
|
|
681
645
|
case event
|
|
682
646
|
when :on___end__
|
|
683
|
-
|
|
647
|
+
# Ripper doesn't include the rest of the token in the event, so we need to
|
|
648
|
+
# trim it down to just the content on the first line.
|
|
649
|
+
value = value[0..value.index("\n")]
|
|
650
|
+
Token.new([[lineno, column], event, value, lex_state])
|
|
684
651
|
when :on_comment
|
|
685
652
|
IgnoreStateToken.new([[lineno, column], event, value, lex_state])
|
|
686
653
|
when :on_heredoc_end
|
|
@@ -688,33 +655,18 @@ module Prism
|
|
|
688
655
|
# want to bother comparing the state on them.
|
|
689
656
|
last_heredoc_end = token.location.end_offset
|
|
690
657
|
IgnoreStateToken.new([[lineno, column], event, value, lex_state])
|
|
691
|
-
when :on_ident
|
|
692
|
-
if lex_state == Ripper::EXPR_END
|
|
693
|
-
# If we have an identifier that follows a method name like:
|
|
694
|
-
#
|
|
695
|
-
# def foo bar
|
|
696
|
-
#
|
|
697
|
-
# then Ripper will mark bar as END|LABEL if there is a local in a
|
|
698
|
-
# parent scope named bar because it hasn't pushed the local table
|
|
699
|
-
# yet. We do this more accurately, so we need to allow comparing
|
|
700
|
-
# against both END and END|LABEL.
|
|
701
|
-
ParamToken.new([[lineno, column], event, value, lex_state])
|
|
702
|
-
elsif lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL
|
|
703
|
-
# In the event that we're comparing identifiers, we're going to
|
|
704
|
-
# allow a little divergence. Ripper doesn't account for local
|
|
705
|
-
# variables introduced through named captures in regexes, and we
|
|
706
|
-
# do, which accounts for this difference.
|
|
707
|
-
IdentToken.new([[lineno, column], event, value, lex_state])
|
|
708
|
-
else
|
|
709
|
-
Token.new([[lineno, column], event, value, lex_state])
|
|
710
|
-
end
|
|
711
658
|
when :on_embexpr_end
|
|
712
659
|
IgnoreStateToken.new([[lineno, column], event, value, lex_state])
|
|
713
|
-
when :
|
|
714
|
-
#
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
660
|
+
when :on_words_sep
|
|
661
|
+
# Ripper emits one token each per line.
|
|
662
|
+
value.each_line.with_index do |line, index|
|
|
663
|
+
if index > 0
|
|
664
|
+
lineno += 1
|
|
665
|
+
column = 0
|
|
666
|
+
end
|
|
667
|
+
tokens << Token.new([[lineno, column], event, line, lex_state])
|
|
668
|
+
end
|
|
669
|
+
tokens.pop
|
|
718
670
|
when :on_regexp_end
|
|
719
671
|
# On regex end, Ripper scans and then sets end state, so the ripper
|
|
720
672
|
# lexed output is begin, when it should be end. prism sets lex state
|
|
@@ -739,13 +691,14 @@ module Prism
|
|
|
739
691
|
counter += { on_embexpr_beg: -1, on_embexpr_end: 1 }[current_event] || 0
|
|
740
692
|
end
|
|
741
693
|
|
|
742
|
-
Ripper::Lexer::State.
|
|
694
|
+
Translation::Ripper::Lexer::State.cached(result_value[current_index][1])
|
|
743
695
|
else
|
|
744
696
|
previous_state
|
|
745
697
|
end
|
|
746
698
|
|
|
747
699
|
Token.new([[lineno, column], event, value, lex_state])
|
|
748
700
|
when :on_eof
|
|
701
|
+
eof_token = token
|
|
749
702
|
previous_token = result_value[index - 1][0]
|
|
750
703
|
|
|
751
704
|
# If we're at the end of the file and the previous token was a
|
|
@@ -768,7 +721,7 @@ module Prism
|
|
|
768
721
|
end_offset += 3
|
|
769
722
|
end
|
|
770
723
|
|
|
771
|
-
tokens << Token.new([[lineno, 0], :on_nl, source.
|
|
724
|
+
tokens << Token.new([[lineno, 0], :on_nl, source.slice(start_offset, end_offset - start_offset), lex_state])
|
|
772
725
|
end
|
|
773
726
|
end
|
|
774
727
|
|
|
@@ -859,70 +812,100 @@ module Prism
|
|
|
859
812
|
# Drop the EOF token from the list
|
|
860
813
|
tokens = tokens[0...-1]
|
|
861
814
|
|
|
862
|
-
# We sort by location
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
private_constant :LexCompat
|
|
815
|
+
# We sort by location because Ripper.lex sorts.
|
|
816
|
+
# Manually implemented instead of `sort_by!(&:location)` for performance.
|
|
817
|
+
tokens.sort_by! do |token|
|
|
818
|
+
line, column = token.location
|
|
819
|
+
source.byte_offset(line, column)
|
|
820
|
+
end
|
|
870
821
|
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
class LexRipper # :nodoc:
|
|
874
|
-
attr_reader :source
|
|
822
|
+
# Add :on_sp tokens
|
|
823
|
+
tokens = add_on_sp_tokens(tokens, source, result.data_loc, bom, eof_token)
|
|
875
824
|
|
|
876
|
-
|
|
877
|
-
@source = source
|
|
825
|
+
Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, source)
|
|
878
826
|
end
|
|
879
827
|
|
|
880
|
-
def
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
828
|
+
def add_on_sp_tokens(tokens, source, data_loc, bom, eof_token)
|
|
829
|
+
new_tokens = []
|
|
830
|
+
|
|
831
|
+
prev_token_state = Translation::Ripper::Lexer::State.cached(Translation::Ripper::EXPR_BEG)
|
|
832
|
+
prev_token_end = bom ? 3 : 0
|
|
833
|
+
|
|
834
|
+
tokens.each do |token|
|
|
835
|
+
line, column = token.location
|
|
836
|
+
start_offset = source.byte_offset(line, column)
|
|
837
|
+
|
|
838
|
+
# Ripper reports columns on line 1 without counting the BOM, so we
|
|
839
|
+
# adjust to get the real offset
|
|
840
|
+
start_offset += 3 if line == 1 && bom
|
|
841
|
+
|
|
842
|
+
if start_offset > prev_token_end
|
|
843
|
+
sp_value = source.slice(prev_token_end, start_offset - prev_token_end)
|
|
844
|
+
sp_line = source.line(prev_token_end)
|
|
845
|
+
sp_column = source.column(prev_token_end)
|
|
846
|
+
# Ripper reports columns on line 1 without counting the BOM
|
|
847
|
+
sp_column -= 3 if sp_line == 1 && bom
|
|
848
|
+
continuation_index = sp_value.byteindex("\\")
|
|
849
|
+
|
|
850
|
+
# ripper emits up to three :on_sp tokens when line continuations are used
|
|
851
|
+
if continuation_index
|
|
852
|
+
next_whitespace_index = continuation_index + 1
|
|
853
|
+
next_whitespace_index += 1 if sp_value.byteslice(next_whitespace_index) == "\r"
|
|
854
|
+
next_whitespace_index += 1
|
|
855
|
+
first_whitespace = sp_value[0...continuation_index]
|
|
856
|
+
continuation = sp_value[continuation_index...next_whitespace_index]
|
|
857
|
+
second_whitespace = sp_value[next_whitespace_index..]
|
|
858
|
+
|
|
859
|
+
new_tokens << IgnoreStateToken.new([
|
|
860
|
+
[sp_line, sp_column],
|
|
861
|
+
:on_sp,
|
|
862
|
+
first_whitespace,
|
|
863
|
+
prev_token_state
|
|
864
|
+
]) unless first_whitespace.empty?
|
|
865
|
+
|
|
866
|
+
new_tokens << IgnoreStateToken.new([
|
|
867
|
+
[sp_line, sp_column + continuation_index],
|
|
868
|
+
:on_sp,
|
|
869
|
+
continuation,
|
|
870
|
+
prev_token_state
|
|
871
|
+
])
|
|
872
|
+
|
|
873
|
+
new_tokens << IgnoreStateToken.new([
|
|
874
|
+
[sp_line + 1, 0],
|
|
875
|
+
:on_sp,
|
|
876
|
+
second_whitespace,
|
|
877
|
+
prev_token_state
|
|
878
|
+
]) unless second_whitespace.empty?
|
|
891
879
|
else
|
|
892
|
-
|
|
893
|
-
|
|
880
|
+
new_tokens << IgnoreStateToken.new([
|
|
881
|
+
[sp_line, sp_column],
|
|
882
|
+
:on_sp,
|
|
883
|
+
sp_value,
|
|
884
|
+
prev_token_state
|
|
885
|
+
])
|
|
894
886
|
end
|
|
895
|
-
when :on_words_sep
|
|
896
|
-
if previous[1] == :on_words_sep
|
|
897
|
-
previous[2] << token[2]
|
|
898
|
-
else
|
|
899
|
-
results << token
|
|
900
|
-
previous = token
|
|
901
|
-
end
|
|
902
|
-
else
|
|
903
|
-
results << token
|
|
904
|
-
previous = token
|
|
905
887
|
end
|
|
906
|
-
end
|
|
907
|
-
|
|
908
|
-
results
|
|
909
|
-
end
|
|
910
|
-
|
|
911
|
-
private
|
|
912
888
|
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
889
|
+
new_tokens << token
|
|
890
|
+
prev_token_state = token.state
|
|
891
|
+
prev_token_end = start_offset + token.value.bytesize
|
|
916
892
|
end
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
893
|
+
|
|
894
|
+
unless data_loc # no trailing :on_sp with __END__ as it is always preceded by :on_nl
|
|
895
|
+
end_offset = eof_token.location.end_offset
|
|
896
|
+
if prev_token_end < end_offset
|
|
897
|
+
new_tokens << IgnoreStateToken.new([
|
|
898
|
+
[source.line(prev_token_end), source.column(prev_token_end)],
|
|
899
|
+
:on_sp,
|
|
900
|
+
source.slice(prev_token_end, end_offset - prev_token_end),
|
|
901
|
+
prev_token_state
|
|
902
|
+
])
|
|
922
903
|
end
|
|
923
904
|
end
|
|
905
|
+
|
|
906
|
+
new_tokens
|
|
924
907
|
end
|
|
925
908
|
end
|
|
926
909
|
|
|
927
|
-
private_constant :
|
|
910
|
+
private_constant :LexCompat
|
|
928
911
|
end
|