prism 0.23.0 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/BSDmakefile +58 -0
- data/CHANGELOG.md +65 -1
- data/Makefile +5 -2
- data/README.md +45 -6
- data/config.yml +499 -4
- data/docs/build_system.md +31 -0
- data/docs/configuration.md +2 -0
- data/docs/cruby_compilation.md +1 -1
- data/docs/parser_translation.md +14 -9
- data/docs/releasing.md +3 -3
- data/docs/ripper_translation.md +50 -0
- data/docs/ruby_api.md +1 -0
- data/docs/serialization.md +26 -5
- data/ext/prism/api_node.c +2342 -1801
- data/ext/prism/api_pack.c +9 -0
- data/ext/prism/extconf.rb +27 -11
- data/ext/prism/extension.c +313 -66
- data/ext/prism/extension.h +5 -4
- data/include/prism/ast.h +213 -64
- data/include/prism/defines.h +106 -2
- data/include/prism/diagnostic.h +134 -71
- data/include/prism/encoding.h +22 -4
- data/include/prism/node.h +93 -0
- data/include/prism/options.h +82 -7
- data/include/prism/pack.h +11 -0
- data/include/prism/parser.h +198 -53
- data/include/prism/prettyprint.h +8 -0
- data/include/prism/static_literals.h +118 -0
- data/include/prism/util/pm_buffer.h +65 -2
- data/include/prism/util/pm_constant_pool.h +18 -1
- data/include/prism/util/pm_integer.h +119 -0
- data/include/prism/util/pm_list.h +1 -1
- data/include/prism/util/pm_newline_list.h +12 -3
- data/include/prism/util/pm_string.h +26 -2
- data/include/prism/version.h +2 -2
- data/include/prism.h +59 -1
- data/lib/prism/compiler.rb +8 -1
- data/lib/prism/debug.rb +46 -3
- data/lib/prism/desugar_compiler.rb +225 -80
- data/lib/prism/dispatcher.rb +29 -0
- data/lib/prism/dot_visitor.rb +87 -16
- data/lib/prism/dsl.rb +315 -300
- data/lib/prism/ffi.rb +165 -84
- data/lib/prism/lex_compat.rb +17 -15
- data/lib/prism/mutation_compiler.rb +11 -0
- data/lib/prism/node.rb +4857 -3750
- data/lib/prism/node_ext.rb +77 -29
- data/lib/prism/pack.rb +4 -0
- data/lib/prism/parse_result/comments.rb +34 -17
- data/lib/prism/parse_result/newlines.rb +3 -1
- data/lib/prism/parse_result.rb +88 -34
- data/lib/prism/pattern.rb +16 -4
- data/lib/prism/polyfill/string.rb +12 -0
- data/lib/prism/serialize.rb +960 -327
- data/lib/prism/translation/parser/compiler.rb +152 -50
- data/lib/prism/translation/parser/lexer.rb +103 -22
- data/lib/prism/translation/parser/rubocop.rb +47 -11
- data/lib/prism/translation/parser.rb +134 -10
- data/lib/prism/translation/parser33.rb +12 -0
- data/lib/prism/translation/parser34.rb +12 -0
- data/lib/prism/translation/ripper/sexp.rb +125 -0
- data/lib/prism/translation/ripper/shim.rb +5 -0
- data/lib/prism/translation/ripper.rb +3248 -379
- data/lib/prism/translation/ruby_parser.rb +35 -18
- data/lib/prism/translation.rb +3 -1
- data/lib/prism/visitor.rb +10 -0
- data/lib/prism.rb +8 -2
- data/prism.gemspec +35 -4
- data/rbi/prism/compiler.rbi +14 -0
- data/rbi/prism/desugar_compiler.rbi +5 -0
- data/rbi/prism/mutation_compiler.rbi +5 -0
- data/rbi/prism/node.rbi +8221 -0
- data/rbi/prism/node_ext.rbi +102 -0
- data/rbi/prism/parse_result.rbi +304 -0
- data/rbi/prism/translation/parser/compiler.rbi +13 -0
- data/rbi/prism/translation/ripper/ripper_compiler.rbi +5 -0
- data/rbi/prism/translation/ripper.rbi +25 -0
- data/rbi/prism/translation/ruby_parser.rbi +11 -0
- data/rbi/prism/visitor.rbi +470 -0
- data/rbi/prism.rbi +39 -7749
- data/sig/prism/compiler.rbs +9 -0
- data/sig/prism/dispatcher.rbs +16 -0
- data/sig/prism/dot_visitor.rbs +6 -0
- data/sig/prism/dsl.rbs +462 -0
- data/sig/prism/mutation_compiler.rbs +158 -0
- data/sig/prism/node.rbs +3529 -0
- data/sig/prism/node_ext.rbs +78 -0
- data/sig/prism/pack.rbs +43 -0
- data/sig/prism/parse_result.rbs +127 -0
- data/sig/prism/pattern.rbs +13 -0
- data/sig/prism/serialize.rbs +7 -0
- data/sig/prism/visitor.rbs +168 -0
- data/sig/prism.rbs +188 -4767
- data/src/diagnostic.c +575 -230
- data/src/encoding.c +211 -108
- data/src/node.c +7526 -447
- data/src/options.c +36 -12
- data/src/pack.c +33 -17
- data/src/prettyprint.c +1297 -1388
- data/src/prism.c +3665 -1121
- data/src/regexp.c +17 -2
- data/src/serialize.c +47 -28
- data/src/static_literals.c +552 -0
- data/src/token_type.c +1 -0
- data/src/util/pm_buffer.c +147 -20
- data/src/util/pm_char.c +4 -4
- data/src/util/pm_constant_pool.c +35 -11
- data/src/util/pm_integer.c +629 -0
- data/src/util/pm_list.c +1 -1
- data/src/util/pm_newline_list.c +20 -8
- data/src/util/pm_string.c +134 -5
- data/src/util/pm_string_list.c +2 -2
- metadata +37 -6
- data/docs/ripper.md +0 -36
- data/rbi/prism_static.rbi +0 -207
- data/sig/prism_static.rbs +0 -201
@@ -116,7 +116,14 @@ module Prism
|
|
116
116
|
builder.pair_keyword([node.key.unescaped, srange(node.key.location)], visit(node.value))
|
117
117
|
end
|
118
118
|
elsif node.value.is_a?(ImplicitNode)
|
119
|
-
|
119
|
+
if (value = node.value.value).is_a?(LocalVariableReadNode)
|
120
|
+
builder.pair_keyword(
|
121
|
+
[node.key.unescaped, srange(node.key)],
|
122
|
+
builder.ident([value.name, srange(node.key.value_loc)]).updated(:lvar)
|
123
|
+
)
|
124
|
+
else
|
125
|
+
builder.pair_label([node.key.unescaped, srange(node.key.location)])
|
126
|
+
end
|
120
127
|
elsif node.operator_loc
|
121
128
|
builder.pair(visit(node.key), token(node.operator_loc), visit(node.value))
|
122
129
|
elsif node.key.is_a?(SymbolNode) && node.key.opening_loc.nil?
|
@@ -247,18 +254,30 @@ module Prism
|
|
247
254
|
|
248
255
|
if node.call_operator_loc.nil?
|
249
256
|
case name
|
257
|
+
when :-@
|
258
|
+
case (receiver = node.receiver).type
|
259
|
+
when :integer_node, :float_node, :rational_node, :imaginary_node
|
260
|
+
return visit(numeric_negate(node.message_loc, receiver))
|
261
|
+
end
|
250
262
|
when :!
|
251
263
|
return visit_block(builder.not_op(token(node.message_loc), token(node.opening_loc), visit(node.receiver), token(node.closing_loc)), block)
|
264
|
+
when :=~
|
265
|
+
if (receiver = node.receiver).is_a?(RegularExpressionNode)
|
266
|
+
return builder.match_op(visit(receiver), token(node.message_loc), visit(node.arguments.arguments.first))
|
267
|
+
end
|
252
268
|
when :[]
|
253
269
|
return visit_block(builder.index(visit(node.receiver), token(node.opening_loc), visit_all(arguments), token(node.closing_loc)), block)
|
254
270
|
when :[]=
|
255
271
|
if node.message != "[]=" && node.arguments && block.nil? && !node.safe_navigation?
|
272
|
+
arguments = node.arguments.arguments[...-1]
|
273
|
+
arguments << node.block if node.block
|
274
|
+
|
256
275
|
return visit_block(
|
257
276
|
builder.assign(
|
258
277
|
builder.index_asgn(
|
259
278
|
visit(node.receiver),
|
260
279
|
token(node.opening_loc),
|
261
|
-
visit_all(
|
280
|
+
visit_all(arguments),
|
262
281
|
token(node.closing_loc),
|
263
282
|
),
|
264
283
|
srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset, ["="]),
|
@@ -387,9 +406,6 @@ module Prism
|
|
387
406
|
|
388
407
|
# @@foo = 1
|
389
408
|
# ^^^^^^^^^
|
390
|
-
#
|
391
|
-
# @@foo, @@bar = 1
|
392
|
-
# ^^^^^ ^^^^^
|
393
409
|
def visit_class_variable_write_node(node)
|
394
410
|
builder.assign(
|
395
411
|
builder.assignable(builder.cvar(token(node.name_loc))),
|
@@ -682,9 +698,6 @@ module Prism
|
|
682
698
|
|
683
699
|
# $foo = 1
|
684
700
|
# ^^^^^^^^
|
685
|
-
#
|
686
|
-
# $foo, $bar = 1
|
687
|
-
# ^^^^ ^^^^
|
688
701
|
def visit_global_variable_write_node(node)
|
689
702
|
builder.assign(
|
690
703
|
builder.assignable(builder.gvar(token(node.name_loc))),
|
@@ -788,8 +801,9 @@ module Prism
|
|
788
801
|
end
|
789
802
|
|
790
803
|
# 1i
|
804
|
+
# ^^
|
791
805
|
def visit_imaginary_node(node)
|
792
|
-
visit_numeric(node, builder.complex([node
|
806
|
+
visit_numeric(node, builder.complex([imaginary_value(node), srange(node.location)]))
|
793
807
|
end
|
794
808
|
|
795
809
|
# { foo: }
|
@@ -875,9 +889,6 @@ module Prism
|
|
875
889
|
|
876
890
|
# @foo = 1
|
877
891
|
# ^^^^^^^^
|
878
|
-
#
|
879
|
-
# @foo, @bar = 1
|
880
|
-
# ^^^^ ^^^^
|
881
892
|
def visit_instance_variable_write_node(node)
|
882
893
|
builder.assign(
|
883
894
|
builder.assignable(builder.ivar(token(node.name_loc))),
|
@@ -934,16 +945,37 @@ module Prism
|
|
934
945
|
# "foo #{bar}"
|
935
946
|
# ^^^^^^^^^^^^
|
936
947
|
def visit_interpolated_string_node(node)
|
937
|
-
if node.
|
948
|
+
if node.heredoc?
|
938
949
|
children, closing = visit_heredoc(node)
|
939
|
-
|
950
|
+
|
951
|
+
return builder.string_compose(token(node.opening_loc), children, closing)
|
952
|
+
end
|
953
|
+
|
954
|
+
parts = if node.parts.one? { |part| part.type == :string_node }
|
955
|
+
node.parts.flat_map do |node|
|
956
|
+
if node.type == :string_node && node.unescaped.lines.count >= 2
|
957
|
+
start_offset = node.content_loc.start_offset
|
958
|
+
|
959
|
+
node.unescaped.lines.map do |line|
|
960
|
+
end_offset = start_offset + line.length
|
961
|
+
offsets = srange_offsets(start_offset, end_offset)
|
962
|
+
start_offset = end_offset
|
963
|
+
|
964
|
+
builder.string_internal([line, offsets])
|
965
|
+
end
|
966
|
+
else
|
967
|
+
visit(node)
|
968
|
+
end
|
969
|
+
end
|
940
970
|
else
|
941
|
-
|
942
|
-
token(node.opening_loc),
|
943
|
-
visit_all(node.parts),
|
944
|
-
token(node.closing_loc)
|
945
|
-
)
|
971
|
+
visit_all(node.parts)
|
946
972
|
end
|
973
|
+
|
974
|
+
builder.string_compose(
|
975
|
+
token(node.opening_loc),
|
976
|
+
parts,
|
977
|
+
token(node.closing_loc)
|
978
|
+
)
|
947
979
|
end
|
948
980
|
|
949
981
|
# :"foo #{bar}"
|
@@ -959,7 +991,7 @@ module Prism
|
|
959
991
|
# `foo #{bar}`
|
960
992
|
# ^^^^^^^^^^^^
|
961
993
|
def visit_interpolated_x_string_node(node)
|
962
|
-
if node.
|
994
|
+
if node.heredoc?
|
963
995
|
children, closing = visit_heredoc(node)
|
964
996
|
builder.xstring_compose(token(node.opening_loc), children, closing)
|
965
997
|
else
|
@@ -990,6 +1022,7 @@ module Prism
|
|
990
1022
|
end
|
991
1023
|
|
992
1024
|
# -> {}
|
1025
|
+
# ^^^^^
|
993
1026
|
def visit_lambda_node(node)
|
994
1027
|
parameters = node.parameters
|
995
1028
|
|
@@ -1021,9 +1054,6 @@ module Prism
|
|
1021
1054
|
|
1022
1055
|
# foo = 1
|
1023
1056
|
# ^^^^^^^
|
1024
|
-
#
|
1025
|
-
# foo, bar = 1
|
1026
|
-
# ^^^ ^^^
|
1027
1057
|
def visit_local_variable_write_node(node)
|
1028
1058
|
builder.assign(
|
1029
1059
|
builder.assignable(builder.ident(token(node.name_loc))),
|
@@ -1062,22 +1092,12 @@ module Prism
|
|
1062
1092
|
|
1063
1093
|
# foo in bar
|
1064
1094
|
# ^^^^^^^^^^
|
1065
|
-
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1071
|
-
)
|
1072
|
-
end
|
1073
|
-
else
|
1074
|
-
def visit_match_predicate_node(node)
|
1075
|
-
builder.match_pattern(
|
1076
|
-
visit(node.value),
|
1077
|
-
token(node.operator_loc),
|
1078
|
-
within_pattern { |compiler| node.pattern.accept(compiler) }
|
1079
|
-
)
|
1080
|
-
end
|
1095
|
+
def visit_match_predicate_node(node)
|
1096
|
+
builder.match_pattern_p(
|
1097
|
+
visit(node.value),
|
1098
|
+
token(node.operator_loc),
|
1099
|
+
within_pattern { |compiler| node.pattern.accept(compiler) }
|
1100
|
+
)
|
1081
1101
|
end
|
1082
1102
|
|
1083
1103
|
# foo => bar
|
@@ -1263,7 +1283,8 @@ module Prism
|
|
1263
1283
|
# foo => ^(bar)
|
1264
1284
|
# ^^^^^^
|
1265
1285
|
def visit_pinned_expression_node(node)
|
1266
|
-
builder.
|
1286
|
+
expression = builder.begin(token(node.lparen_loc), visit(node.expression), token(node.rparen_loc))
|
1287
|
+
builder.pin(token(node.operator_loc), expression)
|
1267
1288
|
end
|
1268
1289
|
|
1269
1290
|
# foo = 1 and bar => ^foo
|
@@ -1322,7 +1343,7 @@ module Prism
|
|
1322
1343
|
# 1r
|
1323
1344
|
# ^^
|
1324
1345
|
def visit_rational_node(node)
|
1325
|
-
visit_numeric(node, builder.rational([node
|
1346
|
+
visit_numeric(node, builder.rational([rational_value(node), srange(node.location)]))
|
1326
1347
|
end
|
1327
1348
|
|
1328
1349
|
# redo
|
@@ -1418,6 +1439,11 @@ module Prism
|
|
1418
1439
|
builder.self(token(node.location))
|
1419
1440
|
end
|
1420
1441
|
|
1442
|
+
# A shareable constant.
|
1443
|
+
def visit_shareable_constant_node(node)
|
1444
|
+
visit(node.write)
|
1445
|
+
end
|
1446
|
+
|
1421
1447
|
# class << self; end
|
1422
1448
|
# ^^^^^^^^^^^^^^^^^^
|
1423
1449
|
def visit_singleton_class_node(node)
|
@@ -1476,15 +1502,29 @@ module Prism
|
|
1476
1502
|
# "foo"
|
1477
1503
|
# ^^^^^
|
1478
1504
|
def visit_string_node(node)
|
1479
|
-
if node.
|
1480
|
-
children, closing = visit_heredoc(
|
1505
|
+
if node.heredoc?
|
1506
|
+
children, closing = visit_heredoc(node.to_interpolated)
|
1481
1507
|
builder.string_compose(token(node.opening_loc), children, closing)
|
1482
1508
|
elsif node.opening == "?"
|
1483
1509
|
builder.character([node.unescaped, srange(node.location)])
|
1484
1510
|
else
|
1511
|
+
parts = if node.content.lines.count <= 1 || node.unescaped.lines.count <= 1
|
1512
|
+
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
|
1513
|
+
else
|
1514
|
+
start_offset = node.content_loc.start_offset
|
1515
|
+
|
1516
|
+
[node.content.lines, node.unescaped.lines].transpose.map do |content_line, unescaped_line|
|
1517
|
+
end_offset = start_offset + content_line.length
|
1518
|
+
offsets = srange_offsets(start_offset, end_offset)
|
1519
|
+
start_offset = end_offset
|
1520
|
+
|
1521
|
+
builder.string_internal([unescaped_line, offsets])
|
1522
|
+
end
|
1523
|
+
end
|
1524
|
+
|
1485
1525
|
builder.string_compose(
|
1486
1526
|
token(node.opening_loc),
|
1487
|
-
|
1527
|
+
parts,
|
1488
1528
|
token(node.closing_loc)
|
1489
1529
|
)
|
1490
1530
|
end
|
@@ -1523,9 +1563,23 @@ module Prism
|
|
1523
1563
|
builder.symbol([node.unescaped, srange(node.location)])
|
1524
1564
|
end
|
1525
1565
|
else
|
1566
|
+
parts = if node.value.lines.one?
|
1567
|
+
[builder.string_internal([node.unescaped, srange(node.value_loc)])]
|
1568
|
+
else
|
1569
|
+
start_offset = node.value_loc.start_offset
|
1570
|
+
|
1571
|
+
node.value.lines.map do |line|
|
1572
|
+
end_offset = start_offset + line.length
|
1573
|
+
offsets = srange_offsets(start_offset, end_offset)
|
1574
|
+
start_offset = end_offset
|
1575
|
+
|
1576
|
+
builder.string_internal([line, offsets])
|
1577
|
+
end
|
1578
|
+
end
|
1579
|
+
|
1526
1580
|
builder.symbol_compose(
|
1527
1581
|
token(node.opening_loc),
|
1528
|
-
|
1582
|
+
parts,
|
1529
1583
|
token(node.closing_loc)
|
1530
1584
|
)
|
1531
1585
|
end
|
@@ -1604,7 +1658,11 @@ module Prism
|
|
1604
1658
|
builder.when(
|
1605
1659
|
token(node.keyword_loc),
|
1606
1660
|
visit_all(node.conditions),
|
1607
|
-
|
1661
|
+
if node.then_keyword_loc
|
1662
|
+
token(node.then_keyword_loc)
|
1663
|
+
else
|
1664
|
+
srange_find(node.conditions.last.location.end_offset, node.statements&.location&.start_offset || (node.conditions.last.location.end_offset + 1), [";"])
|
1665
|
+
end,
|
1608
1666
|
visit(node.statements)
|
1609
1667
|
)
|
1610
1668
|
end
|
@@ -1637,13 +1695,27 @@ module Prism
|
|
1637
1695
|
# `foo`
|
1638
1696
|
# ^^^^^
|
1639
1697
|
def visit_x_string_node(node)
|
1640
|
-
if node.
|
1641
|
-
children, closing = visit_heredoc(
|
1698
|
+
if node.heredoc?
|
1699
|
+
children, closing = visit_heredoc(node.to_interpolated)
|
1642
1700
|
builder.xstring_compose(token(node.opening_loc), children, closing)
|
1643
1701
|
else
|
1702
|
+
parts = if node.unescaped.lines.one?
|
1703
|
+
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
|
1704
|
+
else
|
1705
|
+
start_offset = node.content_loc.start_offset
|
1706
|
+
|
1707
|
+
node.unescaped.lines.map do |line|
|
1708
|
+
end_offset = start_offset + line.length
|
1709
|
+
offsets = srange_offsets(start_offset, end_offset)
|
1710
|
+
start_offset = end_offset
|
1711
|
+
|
1712
|
+
builder.string_internal([line, offsets])
|
1713
|
+
end
|
1714
|
+
end
|
1715
|
+
|
1644
1716
|
builder.xstring_compose(
|
1645
1717
|
token(node.opening_loc),
|
1646
|
-
|
1718
|
+
parts,
|
1647
1719
|
token(node.closing_loc)
|
1648
1720
|
)
|
1649
1721
|
end
|
@@ -1687,6 +1759,26 @@ module Prism
|
|
1687
1759
|
forwarding
|
1688
1760
|
end
|
1689
1761
|
|
1762
|
+
# Because we have mutated the AST to allow for newlines in the middle of
|
1763
|
+
# a rational, we need to manually handle the value here.
|
1764
|
+
def imaginary_value(node)
|
1765
|
+
Complex(0, node.numeric.is_a?(RationalNode) ? rational_value(node.numeric) : node.numeric.value)
|
1766
|
+
end
|
1767
|
+
|
1768
|
+
# Negate the value of a numeric node. This is a special case where you
|
1769
|
+
# have a negative sign on one line and then a number on the next line.
|
1770
|
+
# In normal Ruby, this will always be a method call. The parser gem,
|
1771
|
+
# however, marks this as a numeric literal. We have to massage the tree
|
1772
|
+
# here to get it into the correct form.
|
1773
|
+
def numeric_negate(message_loc, receiver)
|
1774
|
+
case receiver.type
|
1775
|
+
when :integer_node, :float_node
|
1776
|
+
receiver.copy(value: -receiver.value, location: message_loc.join(receiver.location))
|
1777
|
+
when :rational_node, :imaginary_node
|
1778
|
+
receiver.copy(numeric: numeric_negate(message_loc, receiver.numeric), location: message_loc.join(receiver.location))
|
1779
|
+
end
|
1780
|
+
end
|
1781
|
+
|
1690
1782
|
# Blocks can have a special set of parameters that automatically expand
|
1691
1783
|
# when given arrays if they have a single required parameter and no
|
1692
1784
|
# other parameters.
|
@@ -1701,6 +1793,16 @@ module Prism
|
|
1701
1793
|
parameters.block.nil?
|
1702
1794
|
end
|
1703
1795
|
|
1796
|
+
# Because we have mutated the AST to allow for newlines in the middle of
|
1797
|
+
# a rational, we need to manually handle the value here.
|
1798
|
+
def rational_value(node)
|
1799
|
+
if node.numeric.is_a?(IntegerNode)
|
1800
|
+
Rational(node.numeric.value)
|
1801
|
+
else
|
1802
|
+
Rational(node.slice.gsub(/\s/, "").chomp("r"))
|
1803
|
+
end
|
1804
|
+
end
|
1805
|
+
|
1704
1806
|
# Locations in the parser gem AST are generated using this class. We
|
1705
1807
|
# store a reference to its constant to make it slightly faster to look
|
1706
1808
|
# up.
|
@@ -1767,7 +1869,7 @@ module Prism
|
|
1767
1869
|
|
1768
1870
|
# Visit a heredoc that can be either a string or an xstring.
|
1769
1871
|
def visit_heredoc(node)
|
1770
|
-
children =
|
1872
|
+
children = Array.new
|
1771
1873
|
node.parts.each do |part|
|
1772
1874
|
pushing =
|
1773
1875
|
if part.is_a?(StringNode) && part.unescaped.include?("\n")
|
@@ -167,7 +167,7 @@ module Prism
|
|
167
167
|
TILDE: :tTILDE,
|
168
168
|
UAMPERSAND: :tAMPER,
|
169
169
|
UCOLON_COLON: :tCOLON3,
|
170
|
-
UDOT_DOT: :
|
170
|
+
UDOT_DOT: :tBDOT2,
|
171
171
|
UDOT_DOT_DOT: :tBDOT3,
|
172
172
|
UMINUS: :tUMINUS,
|
173
173
|
UMINUS_NUM: :tUNARY_NUM,
|
@@ -177,12 +177,23 @@ module Prism
|
|
177
177
|
WORDS_SEP: :tSPACE
|
178
178
|
}
|
179
179
|
|
180
|
-
|
180
|
+
# These constants represent flags in our lex state. We really, really
|
181
|
+
# don't want to be using them and we really, really don't want to be
|
182
|
+
# exposing them as part of our public API. Unfortunately, we don't have
|
183
|
+
# another way of matching the exact tokens that the parser gem expects
|
184
|
+
# without them. We should find another way to do this, but in the
|
185
|
+
# meantime we'll hide them from the documentation and mark them as
|
186
|
+
# private constants.
|
187
|
+
EXPR_BEG = 0x1 # :nodoc:
|
188
|
+
EXPR_LABEL = 0x400 # :nodoc:
|
189
|
+
|
190
|
+
private_constant :TYPES, :EXPR_BEG, :EXPR_LABEL
|
181
191
|
|
182
192
|
# The Parser::Source::Buffer that the tokens were lexed from.
|
183
193
|
attr_reader :source_buffer
|
184
194
|
|
185
|
-
# An array of prism tokens
|
195
|
+
# An array of tuples that contain prism tokens and their associated lex
|
196
|
+
# state when they were lexed.
|
186
197
|
attr_reader :lexed
|
187
198
|
|
188
199
|
# A hash that maps offsets in bytes to offsets in characters.
|
@@ -202,12 +213,16 @@ module Prism
|
|
202
213
|
# Convert the prism tokens into the expected format for the parser gem.
|
203
214
|
def to_a
|
204
215
|
tokens = []
|
216
|
+
|
205
217
|
index = 0
|
218
|
+
length = lexed.length
|
219
|
+
|
220
|
+
heredoc_identifier_stack = []
|
206
221
|
|
207
|
-
while index <
|
208
|
-
token, = lexed[index]
|
222
|
+
while index < length
|
223
|
+
token, state = lexed[index]
|
209
224
|
index += 1
|
210
|
-
next if
|
225
|
+
next if %i[IGNORED_NEWLINE __END__ EOF].include?(token.type)
|
211
226
|
|
212
227
|
type = TYPES.fetch(token.type)
|
213
228
|
value = token.value
|
@@ -218,14 +233,18 @@ module Prism
|
|
218
233
|
value.delete_prefix!("?")
|
219
234
|
when :tCOMMENT
|
220
235
|
if token.type == :EMBDOC_BEGIN
|
221
|
-
|
236
|
+
start_index = index
|
237
|
+
|
238
|
+
while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1)
|
222
239
|
value += next_token.value
|
223
240
|
index += 1
|
224
241
|
end
|
225
242
|
|
226
|
-
|
227
|
-
|
228
|
-
|
243
|
+
if start_index != index
|
244
|
+
value += next_token.value
|
245
|
+
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index][0].location.end_offset])
|
246
|
+
index += 1
|
247
|
+
end
|
229
248
|
else
|
230
249
|
value.chomp!
|
231
250
|
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1])
|
@@ -233,7 +252,7 @@ module Prism
|
|
233
252
|
when :tNL
|
234
253
|
value = nil
|
235
254
|
when :tFLOAT
|
236
|
-
value =
|
255
|
+
value = parse_float(value)
|
237
256
|
when :tIMAGINARY
|
238
257
|
value = parse_complex(value)
|
239
258
|
when :tINTEGER
|
@@ -242,13 +261,15 @@ module Prism
|
|
242
261
|
location = Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])
|
243
262
|
end
|
244
263
|
|
245
|
-
value =
|
264
|
+
value = parse_integer(value)
|
246
265
|
when :tLABEL
|
247
266
|
value.chomp!(":")
|
248
267
|
when :tLABEL_END
|
249
268
|
value.chomp!(":")
|
269
|
+
when :tLCURLY
|
270
|
+
type = :tLBRACE if state == EXPR_BEG | EXPR_LABEL
|
250
271
|
when :tNTH_REF
|
251
|
-
value =
|
272
|
+
value = parse_integer(value.delete_prefix("$"))
|
252
273
|
when :tOP_ASGN
|
253
274
|
value.chomp!("=")
|
254
275
|
when :tRATIONAL
|
@@ -256,31 +277,69 @@ module Prism
|
|
256
277
|
when :tSPACE
|
257
278
|
value = nil
|
258
279
|
when :tSTRING_BEG
|
259
|
-
if
|
280
|
+
if token.type == :HEREDOC_START
|
281
|
+
heredoc_identifier_stack.push(value.match(/<<[-~]?["'`]?(?<heredoc_identifier>.*?)["'`]?\z/)[:heredoc_identifier])
|
282
|
+
end
|
283
|
+
if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END
|
260
284
|
next_location = token.location.join(next_token.location)
|
261
285
|
type = :tSTRING
|
262
286
|
value = ""
|
263
287
|
location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
|
264
288
|
index += 1
|
265
|
-
elsif ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_CONTENT && (next_next_token = lexed[index + 1]) && next_next_token.type == :STRING_END
|
289
|
+
elsif ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_CONTENT && next_token.value.lines.count <= 1 && (next_next_token = lexed[index + 1][0]) && next_next_token.type == :STRING_END
|
266
290
|
next_location = token.location.join(next_next_token.location)
|
267
291
|
type = :tSTRING
|
268
|
-
value = next_token.value
|
292
|
+
value = next_token.value.gsub("\\\\", "\\")
|
269
293
|
location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
|
270
294
|
index += 2
|
271
295
|
elsif value.start_with?("<<")
|
272
296
|
quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
|
273
|
-
|
297
|
+
if quote == "`"
|
298
|
+
type = :tXSTRING_BEG
|
299
|
+
value = "<<`"
|
300
|
+
else
|
301
|
+
value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
|
302
|
+
end
|
303
|
+
end
|
304
|
+
when :tSTRING_CONTENT
|
305
|
+
unless (lines = token.value.lines).one?
|
306
|
+
start_offset = offset_cache[token.location.start_offset]
|
307
|
+
lines.map do |line|
|
308
|
+
newline = line.end_with?("\r\n") ? "\r\n" : "\n"
|
309
|
+
chomped_line = line.chomp
|
310
|
+
if match = chomped_line.match(/(?<backslashes>\\+)\z/)
|
311
|
+
adjustment = match[:backslashes].size / 2
|
312
|
+
adjusted_line = chomped_line.delete_suffix("\\" * adjustment)
|
313
|
+
if match[:backslashes].size.odd?
|
314
|
+
adjusted_line.delete_suffix!("\\")
|
315
|
+
adjustment += 2
|
316
|
+
else
|
317
|
+
adjusted_line << newline
|
318
|
+
end
|
319
|
+
else
|
320
|
+
adjusted_line = line
|
321
|
+
adjustment = 0
|
322
|
+
end
|
323
|
+
|
324
|
+
end_offset = start_offset + adjusted_line.length + adjustment
|
325
|
+
tokens << [:tSTRING_CONTENT, [adjusted_line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]]
|
326
|
+
start_offset = end_offset
|
327
|
+
end
|
328
|
+
next
|
274
329
|
end
|
275
330
|
when :tSTRING_DVAR
|
276
331
|
value = nil
|
277
332
|
when :tSTRING_END
|
278
|
-
if token.type == :
|
333
|
+
if token.type == :HEREDOC_END && value.end_with?("\n")
|
334
|
+
newline_length = value.end_with?("\r\n") ? 2 : 1
|
335
|
+
value = heredoc_identifier_stack.pop
|
336
|
+
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - newline_length])
|
337
|
+
elsif token.type == :REGEXP_END
|
279
338
|
value = value[0]
|
280
339
|
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
|
281
340
|
end
|
282
341
|
when :tSYMBEG
|
283
|
-
if (next_token = lexed[index]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR
|
342
|
+
if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR
|
284
343
|
next_location = token.location.join(next_token.location)
|
285
344
|
type = :tSYMBOL
|
286
345
|
value = next_token.value
|
@@ -289,9 +348,13 @@ module Prism
|
|
289
348
|
index += 1
|
290
349
|
end
|
291
350
|
when :tFID
|
292
|
-
if tokens
|
351
|
+
if !tokens.empty? && tokens.dig(-1, 0) == :kDEF
|
293
352
|
type = :tIDENTIFIER
|
294
353
|
end
|
354
|
+
when :tXSTRING_BEG
|
355
|
+
if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :STRING_END
|
356
|
+
type = :tBACK_REF2
|
357
|
+
end
|
295
358
|
end
|
296
359
|
|
297
360
|
tokens << [type, [value, location]]
|
@@ -306,6 +369,20 @@ module Prism
|
|
306
369
|
|
307
370
|
private
|
308
371
|
|
372
|
+
# Parse an integer from the string representation.
|
373
|
+
def parse_integer(value)
|
374
|
+
Integer(value)
|
375
|
+
rescue ArgumentError
|
376
|
+
0
|
377
|
+
end
|
378
|
+
|
379
|
+
# Parse a float from the string representation.
|
380
|
+
def parse_float(value)
|
381
|
+
Float(value)
|
382
|
+
rescue ArgumentError
|
383
|
+
0.0
|
384
|
+
end
|
385
|
+
|
309
386
|
# Parse a complex from the string representation.
|
310
387
|
def parse_complex(value)
|
311
388
|
value.chomp!("i")
|
@@ -313,10 +390,12 @@ module Prism
|
|
313
390
|
if value.end_with?("r")
|
314
391
|
Complex(0, parse_rational(value))
|
315
392
|
elsif value.start_with?(/0[BbOoDdXx]/)
|
316
|
-
Complex(0,
|
393
|
+
Complex(0, parse_integer(value))
|
317
394
|
else
|
318
395
|
Complex(0, value)
|
319
396
|
end
|
397
|
+
rescue ArgumentError
|
398
|
+
0i
|
320
399
|
end
|
321
400
|
|
322
401
|
# Parse a rational from the string representation.
|
@@ -324,10 +403,12 @@ module Prism
|
|
324
403
|
value.chomp!("r")
|
325
404
|
|
326
405
|
if value.start_with?(/0[BbOoDdXx]/)
|
327
|
-
Rational(
|
406
|
+
Rational(parse_integer(value))
|
328
407
|
else
|
329
408
|
Rational(value)
|
330
409
|
end
|
410
|
+
rescue ArgumentError
|
411
|
+
0r
|
331
412
|
end
|
332
413
|
end
|
333
414
|
end
|