prism 0.23.0 → 0.25.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/BSDmakefile +58 -0
- data/CHANGELOG.md +65 -1
- data/Makefile +5 -2
- data/README.md +45 -6
- data/config.yml +499 -4
- data/docs/build_system.md +31 -0
- data/docs/configuration.md +2 -0
- data/docs/cruby_compilation.md +1 -1
- data/docs/parser_translation.md +14 -9
- data/docs/releasing.md +3 -3
- data/docs/ripper_translation.md +50 -0
- data/docs/ruby_api.md +1 -0
- data/docs/serialization.md +26 -5
- data/ext/prism/api_node.c +2342 -1801
- data/ext/prism/api_pack.c +9 -0
- data/ext/prism/extconf.rb +27 -11
- data/ext/prism/extension.c +313 -66
- data/ext/prism/extension.h +5 -4
- data/include/prism/ast.h +213 -64
- data/include/prism/defines.h +106 -2
- data/include/prism/diagnostic.h +134 -71
- data/include/prism/encoding.h +22 -4
- data/include/prism/node.h +93 -0
- data/include/prism/options.h +82 -7
- data/include/prism/pack.h +11 -0
- data/include/prism/parser.h +198 -53
- data/include/prism/prettyprint.h +8 -0
- data/include/prism/static_literals.h +118 -0
- data/include/prism/util/pm_buffer.h +65 -2
- data/include/prism/util/pm_constant_pool.h +18 -1
- data/include/prism/util/pm_integer.h +119 -0
- data/include/prism/util/pm_list.h +1 -1
- data/include/prism/util/pm_newline_list.h +12 -3
- data/include/prism/util/pm_string.h +26 -2
- data/include/prism/version.h +2 -2
- data/include/prism.h +59 -1
- data/lib/prism/compiler.rb +8 -1
- data/lib/prism/debug.rb +46 -3
- data/lib/prism/desugar_compiler.rb +225 -80
- data/lib/prism/dispatcher.rb +29 -0
- data/lib/prism/dot_visitor.rb +87 -16
- data/lib/prism/dsl.rb +315 -300
- data/lib/prism/ffi.rb +165 -84
- data/lib/prism/lex_compat.rb +17 -15
- data/lib/prism/mutation_compiler.rb +11 -0
- data/lib/prism/node.rb +4857 -3750
- data/lib/prism/node_ext.rb +77 -29
- data/lib/prism/pack.rb +4 -0
- data/lib/prism/parse_result/comments.rb +34 -17
- data/lib/prism/parse_result/newlines.rb +3 -1
- data/lib/prism/parse_result.rb +88 -34
- data/lib/prism/pattern.rb +16 -4
- data/lib/prism/polyfill/string.rb +12 -0
- data/lib/prism/serialize.rb +960 -327
- data/lib/prism/translation/parser/compiler.rb +152 -50
- data/lib/prism/translation/parser/lexer.rb +103 -22
- data/lib/prism/translation/parser/rubocop.rb +47 -11
- data/lib/prism/translation/parser.rb +134 -10
- data/lib/prism/translation/parser33.rb +12 -0
- data/lib/prism/translation/parser34.rb +12 -0
- data/lib/prism/translation/ripper/sexp.rb +125 -0
- data/lib/prism/translation/ripper/shim.rb +5 -0
- data/lib/prism/translation/ripper.rb +3248 -379
- data/lib/prism/translation/ruby_parser.rb +35 -18
- data/lib/prism/translation.rb +3 -1
- data/lib/prism/visitor.rb +10 -0
- data/lib/prism.rb +8 -2
- data/prism.gemspec +35 -4
- data/rbi/prism/compiler.rbi +14 -0
- data/rbi/prism/desugar_compiler.rbi +5 -0
- data/rbi/prism/mutation_compiler.rbi +5 -0
- data/rbi/prism/node.rbi +8221 -0
- data/rbi/prism/node_ext.rbi +102 -0
- data/rbi/prism/parse_result.rbi +304 -0
- data/rbi/prism/translation/parser/compiler.rbi +13 -0
- data/rbi/prism/translation/ripper/ripper_compiler.rbi +5 -0
- data/rbi/prism/translation/ripper.rbi +25 -0
- data/rbi/prism/translation/ruby_parser.rbi +11 -0
- data/rbi/prism/visitor.rbi +470 -0
- data/rbi/prism.rbi +39 -7749
- data/sig/prism/compiler.rbs +9 -0
- data/sig/prism/dispatcher.rbs +16 -0
- data/sig/prism/dot_visitor.rbs +6 -0
- data/sig/prism/dsl.rbs +462 -0
- data/sig/prism/mutation_compiler.rbs +158 -0
- data/sig/prism/node.rbs +3529 -0
- data/sig/prism/node_ext.rbs +78 -0
- data/sig/prism/pack.rbs +43 -0
- data/sig/prism/parse_result.rbs +127 -0
- data/sig/prism/pattern.rbs +13 -0
- data/sig/prism/serialize.rbs +7 -0
- data/sig/prism/visitor.rbs +168 -0
- data/sig/prism.rbs +188 -4767
- data/src/diagnostic.c +575 -230
- data/src/encoding.c +211 -108
- data/src/node.c +7526 -447
- data/src/options.c +36 -12
- data/src/pack.c +33 -17
- data/src/prettyprint.c +1297 -1388
- data/src/prism.c +3665 -1121
- data/src/regexp.c +17 -2
- data/src/serialize.c +47 -28
- data/src/static_literals.c +552 -0
- data/src/token_type.c +1 -0
- data/src/util/pm_buffer.c +147 -20
- data/src/util/pm_char.c +4 -4
- data/src/util/pm_constant_pool.c +35 -11
- data/src/util/pm_integer.c +629 -0
- data/src/util/pm_list.c +1 -1
- data/src/util/pm_newline_list.c +20 -8
- data/src/util/pm_string.c +134 -5
- data/src/util/pm_string_list.c +2 -2
- metadata +37 -6
- data/docs/ripper.md +0 -36
- data/rbi/prism_static.rbi +0 -207
- data/sig/prism_static.rbs +0 -201
@@ -116,7 +116,14 @@ module Prism
|
|
116
116
|
builder.pair_keyword([node.key.unescaped, srange(node.key.location)], visit(node.value))
|
117
117
|
end
|
118
118
|
elsif node.value.is_a?(ImplicitNode)
|
119
|
-
|
119
|
+
if (value = node.value.value).is_a?(LocalVariableReadNode)
|
120
|
+
builder.pair_keyword(
|
121
|
+
[node.key.unescaped, srange(node.key)],
|
122
|
+
builder.ident([value.name, srange(node.key.value_loc)]).updated(:lvar)
|
123
|
+
)
|
124
|
+
else
|
125
|
+
builder.pair_label([node.key.unescaped, srange(node.key.location)])
|
126
|
+
end
|
120
127
|
elsif node.operator_loc
|
121
128
|
builder.pair(visit(node.key), token(node.operator_loc), visit(node.value))
|
122
129
|
elsif node.key.is_a?(SymbolNode) && node.key.opening_loc.nil?
|
@@ -247,18 +254,30 @@ module Prism
|
|
247
254
|
|
248
255
|
if node.call_operator_loc.nil?
|
249
256
|
case name
|
257
|
+
when :-@
|
258
|
+
case (receiver = node.receiver).type
|
259
|
+
when :integer_node, :float_node, :rational_node, :imaginary_node
|
260
|
+
return visit(numeric_negate(node.message_loc, receiver))
|
261
|
+
end
|
250
262
|
when :!
|
251
263
|
return visit_block(builder.not_op(token(node.message_loc), token(node.opening_loc), visit(node.receiver), token(node.closing_loc)), block)
|
264
|
+
when :=~
|
265
|
+
if (receiver = node.receiver).is_a?(RegularExpressionNode)
|
266
|
+
return builder.match_op(visit(receiver), token(node.message_loc), visit(node.arguments.arguments.first))
|
267
|
+
end
|
252
268
|
when :[]
|
253
269
|
return visit_block(builder.index(visit(node.receiver), token(node.opening_loc), visit_all(arguments), token(node.closing_loc)), block)
|
254
270
|
when :[]=
|
255
271
|
if node.message != "[]=" && node.arguments && block.nil? && !node.safe_navigation?
|
272
|
+
arguments = node.arguments.arguments[...-1]
|
273
|
+
arguments << node.block if node.block
|
274
|
+
|
256
275
|
return visit_block(
|
257
276
|
builder.assign(
|
258
277
|
builder.index_asgn(
|
259
278
|
visit(node.receiver),
|
260
279
|
token(node.opening_loc),
|
261
|
-
visit_all(
|
280
|
+
visit_all(arguments),
|
262
281
|
token(node.closing_loc),
|
263
282
|
),
|
264
283
|
srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset, ["="]),
|
@@ -387,9 +406,6 @@ module Prism
|
|
387
406
|
|
388
407
|
# @@foo = 1
|
389
408
|
# ^^^^^^^^^
|
390
|
-
#
|
391
|
-
# @@foo, @@bar = 1
|
392
|
-
# ^^^^^ ^^^^^
|
393
409
|
def visit_class_variable_write_node(node)
|
394
410
|
builder.assign(
|
395
411
|
builder.assignable(builder.cvar(token(node.name_loc))),
|
@@ -682,9 +698,6 @@ module Prism
|
|
682
698
|
|
683
699
|
# $foo = 1
|
684
700
|
# ^^^^^^^^
|
685
|
-
#
|
686
|
-
# $foo, $bar = 1
|
687
|
-
# ^^^^ ^^^^
|
688
701
|
def visit_global_variable_write_node(node)
|
689
702
|
builder.assign(
|
690
703
|
builder.assignable(builder.gvar(token(node.name_loc))),
|
@@ -788,8 +801,9 @@ module Prism
|
|
788
801
|
end
|
789
802
|
|
790
803
|
# 1i
|
804
|
+
# ^^
|
791
805
|
def visit_imaginary_node(node)
|
792
|
-
visit_numeric(node, builder.complex([node
|
806
|
+
visit_numeric(node, builder.complex([imaginary_value(node), srange(node.location)]))
|
793
807
|
end
|
794
808
|
|
795
809
|
# { foo: }
|
@@ -875,9 +889,6 @@ module Prism
|
|
875
889
|
|
876
890
|
# @foo = 1
|
877
891
|
# ^^^^^^^^
|
878
|
-
#
|
879
|
-
# @foo, @bar = 1
|
880
|
-
# ^^^^ ^^^^
|
881
892
|
def visit_instance_variable_write_node(node)
|
882
893
|
builder.assign(
|
883
894
|
builder.assignable(builder.ivar(token(node.name_loc))),
|
@@ -934,16 +945,37 @@ module Prism
|
|
934
945
|
# "foo #{bar}"
|
935
946
|
# ^^^^^^^^^^^^
|
936
947
|
def visit_interpolated_string_node(node)
|
937
|
-
if node.
|
948
|
+
if node.heredoc?
|
938
949
|
children, closing = visit_heredoc(node)
|
939
|
-
|
950
|
+
|
951
|
+
return builder.string_compose(token(node.opening_loc), children, closing)
|
952
|
+
end
|
953
|
+
|
954
|
+
parts = if node.parts.one? { |part| part.type == :string_node }
|
955
|
+
node.parts.flat_map do |node|
|
956
|
+
if node.type == :string_node && node.unescaped.lines.count >= 2
|
957
|
+
start_offset = node.content_loc.start_offset
|
958
|
+
|
959
|
+
node.unescaped.lines.map do |line|
|
960
|
+
end_offset = start_offset + line.length
|
961
|
+
offsets = srange_offsets(start_offset, end_offset)
|
962
|
+
start_offset = end_offset
|
963
|
+
|
964
|
+
builder.string_internal([line, offsets])
|
965
|
+
end
|
966
|
+
else
|
967
|
+
visit(node)
|
968
|
+
end
|
969
|
+
end
|
940
970
|
else
|
941
|
-
|
942
|
-
token(node.opening_loc),
|
943
|
-
visit_all(node.parts),
|
944
|
-
token(node.closing_loc)
|
945
|
-
)
|
971
|
+
visit_all(node.parts)
|
946
972
|
end
|
973
|
+
|
974
|
+
builder.string_compose(
|
975
|
+
token(node.opening_loc),
|
976
|
+
parts,
|
977
|
+
token(node.closing_loc)
|
978
|
+
)
|
947
979
|
end
|
948
980
|
|
949
981
|
# :"foo #{bar}"
|
@@ -959,7 +991,7 @@ module Prism
|
|
959
991
|
# `foo #{bar}`
|
960
992
|
# ^^^^^^^^^^^^
|
961
993
|
def visit_interpolated_x_string_node(node)
|
962
|
-
if node.
|
994
|
+
if node.heredoc?
|
963
995
|
children, closing = visit_heredoc(node)
|
964
996
|
builder.xstring_compose(token(node.opening_loc), children, closing)
|
965
997
|
else
|
@@ -990,6 +1022,7 @@ module Prism
|
|
990
1022
|
end
|
991
1023
|
|
992
1024
|
# -> {}
|
1025
|
+
# ^^^^^
|
993
1026
|
def visit_lambda_node(node)
|
994
1027
|
parameters = node.parameters
|
995
1028
|
|
@@ -1021,9 +1054,6 @@ module Prism
|
|
1021
1054
|
|
1022
1055
|
# foo = 1
|
1023
1056
|
# ^^^^^^^
|
1024
|
-
#
|
1025
|
-
# foo, bar = 1
|
1026
|
-
# ^^^ ^^^
|
1027
1057
|
def visit_local_variable_write_node(node)
|
1028
1058
|
builder.assign(
|
1029
1059
|
builder.assignable(builder.ident(token(node.name_loc))),
|
@@ -1062,22 +1092,12 @@ module Prism
|
|
1062
1092
|
|
1063
1093
|
# foo in bar
|
1064
1094
|
# ^^^^^^^^^^
|
1065
|
-
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1071
|
-
)
|
1072
|
-
end
|
1073
|
-
else
|
1074
|
-
def visit_match_predicate_node(node)
|
1075
|
-
builder.match_pattern(
|
1076
|
-
visit(node.value),
|
1077
|
-
token(node.operator_loc),
|
1078
|
-
within_pattern { |compiler| node.pattern.accept(compiler) }
|
1079
|
-
)
|
1080
|
-
end
|
1095
|
+
def visit_match_predicate_node(node)
|
1096
|
+
builder.match_pattern_p(
|
1097
|
+
visit(node.value),
|
1098
|
+
token(node.operator_loc),
|
1099
|
+
within_pattern { |compiler| node.pattern.accept(compiler) }
|
1100
|
+
)
|
1081
1101
|
end
|
1082
1102
|
|
1083
1103
|
# foo => bar
|
@@ -1263,7 +1283,8 @@ module Prism
|
|
1263
1283
|
# foo => ^(bar)
|
1264
1284
|
# ^^^^^^
|
1265
1285
|
def visit_pinned_expression_node(node)
|
1266
|
-
builder.
|
1286
|
+
expression = builder.begin(token(node.lparen_loc), visit(node.expression), token(node.rparen_loc))
|
1287
|
+
builder.pin(token(node.operator_loc), expression)
|
1267
1288
|
end
|
1268
1289
|
|
1269
1290
|
# foo = 1 and bar => ^foo
|
@@ -1322,7 +1343,7 @@ module Prism
|
|
1322
1343
|
# 1r
|
1323
1344
|
# ^^
|
1324
1345
|
def visit_rational_node(node)
|
1325
|
-
visit_numeric(node, builder.rational([node
|
1346
|
+
visit_numeric(node, builder.rational([rational_value(node), srange(node.location)]))
|
1326
1347
|
end
|
1327
1348
|
|
1328
1349
|
# redo
|
@@ -1418,6 +1439,11 @@ module Prism
|
|
1418
1439
|
builder.self(token(node.location))
|
1419
1440
|
end
|
1420
1441
|
|
1442
|
+
# A shareable constant.
|
1443
|
+
def visit_shareable_constant_node(node)
|
1444
|
+
visit(node.write)
|
1445
|
+
end
|
1446
|
+
|
1421
1447
|
# class << self; end
|
1422
1448
|
# ^^^^^^^^^^^^^^^^^^
|
1423
1449
|
def visit_singleton_class_node(node)
|
@@ -1476,15 +1502,29 @@ module Prism
|
|
1476
1502
|
# "foo"
|
1477
1503
|
# ^^^^^
|
1478
1504
|
def visit_string_node(node)
|
1479
|
-
if node.
|
1480
|
-
children, closing = visit_heredoc(
|
1505
|
+
if node.heredoc?
|
1506
|
+
children, closing = visit_heredoc(node.to_interpolated)
|
1481
1507
|
builder.string_compose(token(node.opening_loc), children, closing)
|
1482
1508
|
elsif node.opening == "?"
|
1483
1509
|
builder.character([node.unescaped, srange(node.location)])
|
1484
1510
|
else
|
1511
|
+
parts = if node.content.lines.count <= 1 || node.unescaped.lines.count <= 1
|
1512
|
+
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
|
1513
|
+
else
|
1514
|
+
start_offset = node.content_loc.start_offset
|
1515
|
+
|
1516
|
+
[node.content.lines, node.unescaped.lines].transpose.map do |content_line, unescaped_line|
|
1517
|
+
end_offset = start_offset + content_line.length
|
1518
|
+
offsets = srange_offsets(start_offset, end_offset)
|
1519
|
+
start_offset = end_offset
|
1520
|
+
|
1521
|
+
builder.string_internal([unescaped_line, offsets])
|
1522
|
+
end
|
1523
|
+
end
|
1524
|
+
|
1485
1525
|
builder.string_compose(
|
1486
1526
|
token(node.opening_loc),
|
1487
|
-
|
1527
|
+
parts,
|
1488
1528
|
token(node.closing_loc)
|
1489
1529
|
)
|
1490
1530
|
end
|
@@ -1523,9 +1563,23 @@ module Prism
|
|
1523
1563
|
builder.symbol([node.unescaped, srange(node.location)])
|
1524
1564
|
end
|
1525
1565
|
else
|
1566
|
+
parts = if node.value.lines.one?
|
1567
|
+
[builder.string_internal([node.unescaped, srange(node.value_loc)])]
|
1568
|
+
else
|
1569
|
+
start_offset = node.value_loc.start_offset
|
1570
|
+
|
1571
|
+
node.value.lines.map do |line|
|
1572
|
+
end_offset = start_offset + line.length
|
1573
|
+
offsets = srange_offsets(start_offset, end_offset)
|
1574
|
+
start_offset = end_offset
|
1575
|
+
|
1576
|
+
builder.string_internal([line, offsets])
|
1577
|
+
end
|
1578
|
+
end
|
1579
|
+
|
1526
1580
|
builder.symbol_compose(
|
1527
1581
|
token(node.opening_loc),
|
1528
|
-
|
1582
|
+
parts,
|
1529
1583
|
token(node.closing_loc)
|
1530
1584
|
)
|
1531
1585
|
end
|
@@ -1604,7 +1658,11 @@ module Prism
|
|
1604
1658
|
builder.when(
|
1605
1659
|
token(node.keyword_loc),
|
1606
1660
|
visit_all(node.conditions),
|
1607
|
-
|
1661
|
+
if node.then_keyword_loc
|
1662
|
+
token(node.then_keyword_loc)
|
1663
|
+
else
|
1664
|
+
srange_find(node.conditions.last.location.end_offset, node.statements&.location&.start_offset || (node.conditions.last.location.end_offset + 1), [";"])
|
1665
|
+
end,
|
1608
1666
|
visit(node.statements)
|
1609
1667
|
)
|
1610
1668
|
end
|
@@ -1637,13 +1695,27 @@ module Prism
|
|
1637
1695
|
# `foo`
|
1638
1696
|
# ^^^^^
|
1639
1697
|
def visit_x_string_node(node)
|
1640
|
-
if node.
|
1641
|
-
children, closing = visit_heredoc(
|
1698
|
+
if node.heredoc?
|
1699
|
+
children, closing = visit_heredoc(node.to_interpolated)
|
1642
1700
|
builder.xstring_compose(token(node.opening_loc), children, closing)
|
1643
1701
|
else
|
1702
|
+
parts = if node.unescaped.lines.one?
|
1703
|
+
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
|
1704
|
+
else
|
1705
|
+
start_offset = node.content_loc.start_offset
|
1706
|
+
|
1707
|
+
node.unescaped.lines.map do |line|
|
1708
|
+
end_offset = start_offset + line.length
|
1709
|
+
offsets = srange_offsets(start_offset, end_offset)
|
1710
|
+
start_offset = end_offset
|
1711
|
+
|
1712
|
+
builder.string_internal([line, offsets])
|
1713
|
+
end
|
1714
|
+
end
|
1715
|
+
|
1644
1716
|
builder.xstring_compose(
|
1645
1717
|
token(node.opening_loc),
|
1646
|
-
|
1718
|
+
parts,
|
1647
1719
|
token(node.closing_loc)
|
1648
1720
|
)
|
1649
1721
|
end
|
@@ -1687,6 +1759,26 @@ module Prism
|
|
1687
1759
|
forwarding
|
1688
1760
|
end
|
1689
1761
|
|
1762
|
+
# Because we have mutated the AST to allow for newlines in the middle of
|
1763
|
+
# a rational, we need to manually handle the value here.
|
1764
|
+
def imaginary_value(node)
|
1765
|
+
Complex(0, node.numeric.is_a?(RationalNode) ? rational_value(node.numeric) : node.numeric.value)
|
1766
|
+
end
|
1767
|
+
|
1768
|
+
# Negate the value of a numeric node. This is a special case where you
|
1769
|
+
# have a negative sign on one line and then a number on the next line.
|
1770
|
+
# In normal Ruby, this will always be a method call. The parser gem,
|
1771
|
+
# however, marks this as a numeric literal. We have to massage the tree
|
1772
|
+
# here to get it into the correct form.
|
1773
|
+
def numeric_negate(message_loc, receiver)
|
1774
|
+
case receiver.type
|
1775
|
+
when :integer_node, :float_node
|
1776
|
+
receiver.copy(value: -receiver.value, location: message_loc.join(receiver.location))
|
1777
|
+
when :rational_node, :imaginary_node
|
1778
|
+
receiver.copy(numeric: numeric_negate(message_loc, receiver.numeric), location: message_loc.join(receiver.location))
|
1779
|
+
end
|
1780
|
+
end
|
1781
|
+
|
1690
1782
|
# Blocks can have a special set of parameters that automatically expand
|
1691
1783
|
# when given arrays if they have a single required parameter and no
|
1692
1784
|
# other parameters.
|
@@ -1701,6 +1793,16 @@ module Prism
|
|
1701
1793
|
parameters.block.nil?
|
1702
1794
|
end
|
1703
1795
|
|
1796
|
+
# Because we have mutated the AST to allow for newlines in the middle of
|
1797
|
+
# a rational, we need to manually handle the value here.
|
1798
|
+
def rational_value(node)
|
1799
|
+
if node.numeric.is_a?(IntegerNode)
|
1800
|
+
Rational(node.numeric.value)
|
1801
|
+
else
|
1802
|
+
Rational(node.slice.gsub(/\s/, "").chomp("r"))
|
1803
|
+
end
|
1804
|
+
end
|
1805
|
+
|
1704
1806
|
# Locations in the parser gem AST are generated using this class. We
|
1705
1807
|
# store a reference to its constant to make it slightly faster to look
|
1706
1808
|
# up.
|
@@ -1767,7 +1869,7 @@ module Prism
|
|
1767
1869
|
|
1768
1870
|
# Visit a heredoc that can be either a string or an xstring.
|
1769
1871
|
def visit_heredoc(node)
|
1770
|
-
children =
|
1872
|
+
children = Array.new
|
1771
1873
|
node.parts.each do |part|
|
1772
1874
|
pushing =
|
1773
1875
|
if part.is_a?(StringNode) && part.unescaped.include?("\n")
|
@@ -167,7 +167,7 @@ module Prism
|
|
167
167
|
TILDE: :tTILDE,
|
168
168
|
UAMPERSAND: :tAMPER,
|
169
169
|
UCOLON_COLON: :tCOLON3,
|
170
|
-
UDOT_DOT: :
|
170
|
+
UDOT_DOT: :tBDOT2,
|
171
171
|
UDOT_DOT_DOT: :tBDOT3,
|
172
172
|
UMINUS: :tUMINUS,
|
173
173
|
UMINUS_NUM: :tUNARY_NUM,
|
@@ -177,12 +177,23 @@ module Prism
|
|
177
177
|
WORDS_SEP: :tSPACE
|
178
178
|
}
|
179
179
|
|
180
|
-
|
180
|
+
# These constants represent flags in our lex state. We really, really
|
181
|
+
# don't want to be using them and we really, really don't want to be
|
182
|
+
# exposing them as part of our public API. Unfortunately, we don't have
|
183
|
+
# another way of matching the exact tokens that the parser gem expects
|
184
|
+
# without them. We should find another way to do this, but in the
|
185
|
+
# meantime we'll hide them from the documentation and mark them as
|
186
|
+
# private constants.
|
187
|
+
EXPR_BEG = 0x1 # :nodoc:
|
188
|
+
EXPR_LABEL = 0x400 # :nodoc:
|
189
|
+
|
190
|
+
private_constant :TYPES, :EXPR_BEG, :EXPR_LABEL
|
181
191
|
|
182
192
|
# The Parser::Source::Buffer that the tokens were lexed from.
|
183
193
|
attr_reader :source_buffer
|
184
194
|
|
185
|
-
# An array of prism tokens
|
195
|
+
# An array of tuples that contain prism tokens and their associated lex
|
196
|
+
# state when they were lexed.
|
186
197
|
attr_reader :lexed
|
187
198
|
|
188
199
|
# A hash that maps offsets in bytes to offsets in characters.
|
@@ -202,12 +213,16 @@ module Prism
|
|
202
213
|
# Convert the prism tokens into the expected format for the parser gem.
|
203
214
|
def to_a
|
204
215
|
tokens = []
|
216
|
+
|
205
217
|
index = 0
|
218
|
+
length = lexed.length
|
219
|
+
|
220
|
+
heredoc_identifier_stack = []
|
206
221
|
|
207
|
-
while index <
|
208
|
-
token, = lexed[index]
|
222
|
+
while index < length
|
223
|
+
token, state = lexed[index]
|
209
224
|
index += 1
|
210
|
-
next if
|
225
|
+
next if %i[IGNORED_NEWLINE __END__ EOF].include?(token.type)
|
211
226
|
|
212
227
|
type = TYPES.fetch(token.type)
|
213
228
|
value = token.value
|
@@ -218,14 +233,18 @@ module Prism
|
|
218
233
|
value.delete_prefix!("?")
|
219
234
|
when :tCOMMENT
|
220
235
|
if token.type == :EMBDOC_BEGIN
|
221
|
-
|
236
|
+
start_index = index
|
237
|
+
|
238
|
+
while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1)
|
222
239
|
value += next_token.value
|
223
240
|
index += 1
|
224
241
|
end
|
225
242
|
|
226
|
-
|
227
|
-
|
228
|
-
|
243
|
+
if start_index != index
|
244
|
+
value += next_token.value
|
245
|
+
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index][0].location.end_offset])
|
246
|
+
index += 1
|
247
|
+
end
|
229
248
|
else
|
230
249
|
value.chomp!
|
231
250
|
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1])
|
@@ -233,7 +252,7 @@ module Prism
|
|
233
252
|
when :tNL
|
234
253
|
value = nil
|
235
254
|
when :tFLOAT
|
236
|
-
value =
|
255
|
+
value = parse_float(value)
|
237
256
|
when :tIMAGINARY
|
238
257
|
value = parse_complex(value)
|
239
258
|
when :tINTEGER
|
@@ -242,13 +261,15 @@ module Prism
|
|
242
261
|
location = Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])
|
243
262
|
end
|
244
263
|
|
245
|
-
value =
|
264
|
+
value = parse_integer(value)
|
246
265
|
when :tLABEL
|
247
266
|
value.chomp!(":")
|
248
267
|
when :tLABEL_END
|
249
268
|
value.chomp!(":")
|
269
|
+
when :tLCURLY
|
270
|
+
type = :tLBRACE if state == EXPR_BEG | EXPR_LABEL
|
250
271
|
when :tNTH_REF
|
251
|
-
value =
|
272
|
+
value = parse_integer(value.delete_prefix("$"))
|
252
273
|
when :tOP_ASGN
|
253
274
|
value.chomp!("=")
|
254
275
|
when :tRATIONAL
|
@@ -256,31 +277,69 @@ module Prism
|
|
256
277
|
when :tSPACE
|
257
278
|
value = nil
|
258
279
|
when :tSTRING_BEG
|
259
|
-
if
|
280
|
+
if token.type == :HEREDOC_START
|
281
|
+
heredoc_identifier_stack.push(value.match(/<<[-~]?["'`]?(?<heredoc_identifier>.*?)["'`]?\z/)[:heredoc_identifier])
|
282
|
+
end
|
283
|
+
if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END
|
260
284
|
next_location = token.location.join(next_token.location)
|
261
285
|
type = :tSTRING
|
262
286
|
value = ""
|
263
287
|
location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
|
264
288
|
index += 1
|
265
|
-
elsif ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_CONTENT && (next_next_token = lexed[index + 1]) && next_next_token.type == :STRING_END
|
289
|
+
elsif ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_CONTENT && next_token.value.lines.count <= 1 && (next_next_token = lexed[index + 1][0]) && next_next_token.type == :STRING_END
|
266
290
|
next_location = token.location.join(next_next_token.location)
|
267
291
|
type = :tSTRING
|
268
|
-
value = next_token.value
|
292
|
+
value = next_token.value.gsub("\\\\", "\\")
|
269
293
|
location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
|
270
294
|
index += 2
|
271
295
|
elsif value.start_with?("<<")
|
272
296
|
quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
|
273
|
-
|
297
|
+
if quote == "`"
|
298
|
+
type = :tXSTRING_BEG
|
299
|
+
value = "<<`"
|
300
|
+
else
|
301
|
+
value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
|
302
|
+
end
|
303
|
+
end
|
304
|
+
when :tSTRING_CONTENT
|
305
|
+
unless (lines = token.value.lines).one?
|
306
|
+
start_offset = offset_cache[token.location.start_offset]
|
307
|
+
lines.map do |line|
|
308
|
+
newline = line.end_with?("\r\n") ? "\r\n" : "\n"
|
309
|
+
chomped_line = line.chomp
|
310
|
+
if match = chomped_line.match(/(?<backslashes>\\+)\z/)
|
311
|
+
adjustment = match[:backslashes].size / 2
|
312
|
+
adjusted_line = chomped_line.delete_suffix("\\" * adjustment)
|
313
|
+
if match[:backslashes].size.odd?
|
314
|
+
adjusted_line.delete_suffix!("\\")
|
315
|
+
adjustment += 2
|
316
|
+
else
|
317
|
+
adjusted_line << newline
|
318
|
+
end
|
319
|
+
else
|
320
|
+
adjusted_line = line
|
321
|
+
adjustment = 0
|
322
|
+
end
|
323
|
+
|
324
|
+
end_offset = start_offset + adjusted_line.length + adjustment
|
325
|
+
tokens << [:tSTRING_CONTENT, [adjusted_line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]]
|
326
|
+
start_offset = end_offset
|
327
|
+
end
|
328
|
+
next
|
274
329
|
end
|
275
330
|
when :tSTRING_DVAR
|
276
331
|
value = nil
|
277
332
|
when :tSTRING_END
|
278
|
-
if token.type == :
|
333
|
+
if token.type == :HEREDOC_END && value.end_with?("\n")
|
334
|
+
newline_length = value.end_with?("\r\n") ? 2 : 1
|
335
|
+
value = heredoc_identifier_stack.pop
|
336
|
+
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - newline_length])
|
337
|
+
elsif token.type == :REGEXP_END
|
279
338
|
value = value[0]
|
280
339
|
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
|
281
340
|
end
|
282
341
|
when :tSYMBEG
|
283
|
-
if (next_token = lexed[index]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR
|
342
|
+
if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR
|
284
343
|
next_location = token.location.join(next_token.location)
|
285
344
|
type = :tSYMBOL
|
286
345
|
value = next_token.value
|
@@ -289,9 +348,13 @@ module Prism
|
|
289
348
|
index += 1
|
290
349
|
end
|
291
350
|
when :tFID
|
292
|
-
if tokens
|
351
|
+
if !tokens.empty? && tokens.dig(-1, 0) == :kDEF
|
293
352
|
type = :tIDENTIFIER
|
294
353
|
end
|
354
|
+
when :tXSTRING_BEG
|
355
|
+
if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :STRING_END
|
356
|
+
type = :tBACK_REF2
|
357
|
+
end
|
295
358
|
end
|
296
359
|
|
297
360
|
tokens << [type, [value, location]]
|
@@ -306,6 +369,20 @@ module Prism
|
|
306
369
|
|
307
370
|
private
|
308
371
|
|
372
|
+
# Parse an integer from the string representation.
|
373
|
+
def parse_integer(value)
|
374
|
+
Integer(value)
|
375
|
+
rescue ArgumentError
|
376
|
+
0
|
377
|
+
end
|
378
|
+
|
379
|
+
# Parse a float from the string representation.
|
380
|
+
def parse_float(value)
|
381
|
+
Float(value)
|
382
|
+
rescue ArgumentError
|
383
|
+
0.0
|
384
|
+
end
|
385
|
+
|
309
386
|
# Parse a complex from the string representation.
|
310
387
|
def parse_complex(value)
|
311
388
|
value.chomp!("i")
|
@@ -313,10 +390,12 @@ module Prism
|
|
313
390
|
if value.end_with?("r")
|
314
391
|
Complex(0, parse_rational(value))
|
315
392
|
elsif value.start_with?(/0[BbOoDdXx]/)
|
316
|
-
Complex(0,
|
393
|
+
Complex(0, parse_integer(value))
|
317
394
|
else
|
318
395
|
Complex(0, value)
|
319
396
|
end
|
397
|
+
rescue ArgumentError
|
398
|
+
0i
|
320
399
|
end
|
321
400
|
|
322
401
|
# Parse a rational from the string representation.
|
@@ -324,10 +403,12 @@ module Prism
|
|
324
403
|
value.chomp!("r")
|
325
404
|
|
326
405
|
if value.start_with?(/0[BbOoDdXx]/)
|
327
|
-
Rational(
|
406
|
+
Rational(parse_integer(value))
|
328
407
|
else
|
329
408
|
Rational(value)
|
330
409
|
end
|
410
|
+
rescue ArgumentError
|
411
|
+
0r
|
331
412
|
end
|
332
413
|
end
|
333
414
|
end
|