prism 0.23.0 → 0.25.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (117) hide show
  1. checksums.yaml +4 -4
  2. data/BSDmakefile +58 -0
  3. data/CHANGELOG.md +65 -1
  4. data/Makefile +5 -2
  5. data/README.md +45 -6
  6. data/config.yml +499 -4
  7. data/docs/build_system.md +31 -0
  8. data/docs/configuration.md +2 -0
  9. data/docs/cruby_compilation.md +1 -1
  10. data/docs/parser_translation.md +14 -9
  11. data/docs/releasing.md +3 -3
  12. data/docs/ripper_translation.md +50 -0
  13. data/docs/ruby_api.md +1 -0
  14. data/docs/serialization.md +26 -5
  15. data/ext/prism/api_node.c +2342 -1801
  16. data/ext/prism/api_pack.c +9 -0
  17. data/ext/prism/extconf.rb +27 -11
  18. data/ext/prism/extension.c +313 -66
  19. data/ext/prism/extension.h +5 -4
  20. data/include/prism/ast.h +213 -64
  21. data/include/prism/defines.h +106 -2
  22. data/include/prism/diagnostic.h +134 -71
  23. data/include/prism/encoding.h +22 -4
  24. data/include/prism/node.h +93 -0
  25. data/include/prism/options.h +82 -7
  26. data/include/prism/pack.h +11 -0
  27. data/include/prism/parser.h +198 -53
  28. data/include/prism/prettyprint.h +8 -0
  29. data/include/prism/static_literals.h +118 -0
  30. data/include/prism/util/pm_buffer.h +65 -2
  31. data/include/prism/util/pm_constant_pool.h +18 -1
  32. data/include/prism/util/pm_integer.h +119 -0
  33. data/include/prism/util/pm_list.h +1 -1
  34. data/include/prism/util/pm_newline_list.h +12 -3
  35. data/include/prism/util/pm_string.h +26 -2
  36. data/include/prism/version.h +2 -2
  37. data/include/prism.h +59 -1
  38. data/lib/prism/compiler.rb +8 -1
  39. data/lib/prism/debug.rb +46 -3
  40. data/lib/prism/desugar_compiler.rb +225 -80
  41. data/lib/prism/dispatcher.rb +29 -0
  42. data/lib/prism/dot_visitor.rb +87 -16
  43. data/lib/prism/dsl.rb +315 -300
  44. data/lib/prism/ffi.rb +165 -84
  45. data/lib/prism/lex_compat.rb +17 -15
  46. data/lib/prism/mutation_compiler.rb +11 -0
  47. data/lib/prism/node.rb +4857 -3750
  48. data/lib/prism/node_ext.rb +77 -29
  49. data/lib/prism/pack.rb +4 -0
  50. data/lib/prism/parse_result/comments.rb +34 -17
  51. data/lib/prism/parse_result/newlines.rb +3 -1
  52. data/lib/prism/parse_result.rb +88 -34
  53. data/lib/prism/pattern.rb +16 -4
  54. data/lib/prism/polyfill/string.rb +12 -0
  55. data/lib/prism/serialize.rb +960 -327
  56. data/lib/prism/translation/parser/compiler.rb +152 -50
  57. data/lib/prism/translation/parser/lexer.rb +103 -22
  58. data/lib/prism/translation/parser/rubocop.rb +47 -11
  59. data/lib/prism/translation/parser.rb +134 -10
  60. data/lib/prism/translation/parser33.rb +12 -0
  61. data/lib/prism/translation/parser34.rb +12 -0
  62. data/lib/prism/translation/ripper/sexp.rb +125 -0
  63. data/lib/prism/translation/ripper/shim.rb +5 -0
  64. data/lib/prism/translation/ripper.rb +3248 -379
  65. data/lib/prism/translation/ruby_parser.rb +35 -18
  66. data/lib/prism/translation.rb +3 -1
  67. data/lib/prism/visitor.rb +10 -0
  68. data/lib/prism.rb +8 -2
  69. data/prism.gemspec +35 -4
  70. data/rbi/prism/compiler.rbi +14 -0
  71. data/rbi/prism/desugar_compiler.rbi +5 -0
  72. data/rbi/prism/mutation_compiler.rbi +5 -0
  73. data/rbi/prism/node.rbi +8221 -0
  74. data/rbi/prism/node_ext.rbi +102 -0
  75. data/rbi/prism/parse_result.rbi +304 -0
  76. data/rbi/prism/translation/parser/compiler.rbi +13 -0
  77. data/rbi/prism/translation/ripper/ripper_compiler.rbi +5 -0
  78. data/rbi/prism/translation/ripper.rbi +25 -0
  79. data/rbi/prism/translation/ruby_parser.rbi +11 -0
  80. data/rbi/prism/visitor.rbi +470 -0
  81. data/rbi/prism.rbi +39 -7749
  82. data/sig/prism/compiler.rbs +9 -0
  83. data/sig/prism/dispatcher.rbs +16 -0
  84. data/sig/prism/dot_visitor.rbs +6 -0
  85. data/sig/prism/dsl.rbs +462 -0
  86. data/sig/prism/mutation_compiler.rbs +158 -0
  87. data/sig/prism/node.rbs +3529 -0
  88. data/sig/prism/node_ext.rbs +78 -0
  89. data/sig/prism/pack.rbs +43 -0
  90. data/sig/prism/parse_result.rbs +127 -0
  91. data/sig/prism/pattern.rbs +13 -0
  92. data/sig/prism/serialize.rbs +7 -0
  93. data/sig/prism/visitor.rbs +168 -0
  94. data/sig/prism.rbs +188 -4767
  95. data/src/diagnostic.c +575 -230
  96. data/src/encoding.c +211 -108
  97. data/src/node.c +7526 -447
  98. data/src/options.c +36 -12
  99. data/src/pack.c +33 -17
  100. data/src/prettyprint.c +1297 -1388
  101. data/src/prism.c +3665 -1121
  102. data/src/regexp.c +17 -2
  103. data/src/serialize.c +47 -28
  104. data/src/static_literals.c +552 -0
  105. data/src/token_type.c +1 -0
  106. data/src/util/pm_buffer.c +147 -20
  107. data/src/util/pm_char.c +4 -4
  108. data/src/util/pm_constant_pool.c +35 -11
  109. data/src/util/pm_integer.c +629 -0
  110. data/src/util/pm_list.c +1 -1
  111. data/src/util/pm_newline_list.c +20 -8
  112. data/src/util/pm_string.c +134 -5
  113. data/src/util/pm_string_list.c +2 -2
  114. metadata +37 -6
  115. data/docs/ripper.md +0 -36
  116. data/rbi/prism_static.rbi +0 -207
  117. data/sig/prism_static.rbs +0 -201
@@ -116,7 +116,14 @@ module Prism
116
116
  builder.pair_keyword([node.key.unescaped, srange(node.key.location)], visit(node.value))
117
117
  end
118
118
  elsif node.value.is_a?(ImplicitNode)
119
- builder.pair_label([node.key.unescaped, srange(node.key.location)])
119
+ if (value = node.value.value).is_a?(LocalVariableReadNode)
120
+ builder.pair_keyword(
121
+ [node.key.unescaped, srange(node.key)],
122
+ builder.ident([value.name, srange(node.key.value_loc)]).updated(:lvar)
123
+ )
124
+ else
125
+ builder.pair_label([node.key.unescaped, srange(node.key.location)])
126
+ end
120
127
  elsif node.operator_loc
121
128
  builder.pair(visit(node.key), token(node.operator_loc), visit(node.value))
122
129
  elsif node.key.is_a?(SymbolNode) && node.key.opening_loc.nil?
@@ -247,18 +254,30 @@ module Prism
247
254
 
248
255
  if node.call_operator_loc.nil?
249
256
  case name
257
+ when :-@
258
+ case (receiver = node.receiver).type
259
+ when :integer_node, :float_node, :rational_node, :imaginary_node
260
+ return visit(numeric_negate(node.message_loc, receiver))
261
+ end
250
262
  when :!
251
263
  return visit_block(builder.not_op(token(node.message_loc), token(node.opening_loc), visit(node.receiver), token(node.closing_loc)), block)
264
+ when :=~
265
+ if (receiver = node.receiver).is_a?(RegularExpressionNode)
266
+ return builder.match_op(visit(receiver), token(node.message_loc), visit(node.arguments.arguments.first))
267
+ end
252
268
  when :[]
253
269
  return visit_block(builder.index(visit(node.receiver), token(node.opening_loc), visit_all(arguments), token(node.closing_loc)), block)
254
270
  when :[]=
255
271
  if node.message != "[]=" && node.arguments && block.nil? && !node.safe_navigation?
272
+ arguments = node.arguments.arguments[...-1]
273
+ arguments << node.block if node.block
274
+
256
275
  return visit_block(
257
276
  builder.assign(
258
277
  builder.index_asgn(
259
278
  visit(node.receiver),
260
279
  token(node.opening_loc),
261
- visit_all(node.arguments.arguments[...-1]),
280
+ visit_all(arguments),
262
281
  token(node.closing_loc),
263
282
  ),
264
283
  srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset, ["="]),
@@ -387,9 +406,6 @@ module Prism
387
406
 
388
407
  # @@foo = 1
389
408
  # ^^^^^^^^^
390
- #
391
- # @@foo, @@bar = 1
392
- # ^^^^^ ^^^^^
393
409
  def visit_class_variable_write_node(node)
394
410
  builder.assign(
395
411
  builder.assignable(builder.cvar(token(node.name_loc))),
@@ -682,9 +698,6 @@ module Prism
682
698
 
683
699
  # $foo = 1
684
700
  # ^^^^^^^^
685
- #
686
- # $foo, $bar = 1
687
- # ^^^^ ^^^^
688
701
  def visit_global_variable_write_node(node)
689
702
  builder.assign(
690
703
  builder.assignable(builder.gvar(token(node.name_loc))),
@@ -788,8 +801,9 @@ module Prism
788
801
  end
789
802
 
790
803
  # 1i
804
+ # ^^
791
805
  def visit_imaginary_node(node)
792
- visit_numeric(node, builder.complex([node.value, srange(node.location)]))
806
+ visit_numeric(node, builder.complex([imaginary_value(node), srange(node.location)]))
793
807
  end
794
808
 
795
809
  # { foo: }
@@ -875,9 +889,6 @@ module Prism
875
889
 
876
890
  # @foo = 1
877
891
  # ^^^^^^^^
878
- #
879
- # @foo, @bar = 1
880
- # ^^^^ ^^^^
881
892
  def visit_instance_variable_write_node(node)
882
893
  builder.assign(
883
894
  builder.assignable(builder.ivar(token(node.name_loc))),
@@ -934,16 +945,37 @@ module Prism
934
945
  # "foo #{bar}"
935
946
  # ^^^^^^^^^^^^
936
947
  def visit_interpolated_string_node(node)
937
- if node.opening&.start_with?("<<")
948
+ if node.heredoc?
938
949
  children, closing = visit_heredoc(node)
939
- builder.string_compose(token(node.opening_loc), children, closing)
950
+
951
+ return builder.string_compose(token(node.opening_loc), children, closing)
952
+ end
953
+
954
+ parts = if node.parts.one? { |part| part.type == :string_node }
955
+ node.parts.flat_map do |node|
956
+ if node.type == :string_node && node.unescaped.lines.count >= 2
957
+ start_offset = node.content_loc.start_offset
958
+
959
+ node.unescaped.lines.map do |line|
960
+ end_offset = start_offset + line.length
961
+ offsets = srange_offsets(start_offset, end_offset)
962
+ start_offset = end_offset
963
+
964
+ builder.string_internal([line, offsets])
965
+ end
966
+ else
967
+ visit(node)
968
+ end
969
+ end
940
970
  else
941
- builder.string_compose(
942
- token(node.opening_loc),
943
- visit_all(node.parts),
944
- token(node.closing_loc)
945
- )
971
+ visit_all(node.parts)
946
972
  end
973
+
974
+ builder.string_compose(
975
+ token(node.opening_loc),
976
+ parts,
977
+ token(node.closing_loc)
978
+ )
947
979
  end
948
980
 
949
981
  # :"foo #{bar}"
@@ -959,7 +991,7 @@ module Prism
959
991
  # `foo #{bar}`
960
992
  # ^^^^^^^^^^^^
961
993
  def visit_interpolated_x_string_node(node)
962
- if node.opening.start_with?("<<")
994
+ if node.heredoc?
963
995
  children, closing = visit_heredoc(node)
964
996
  builder.xstring_compose(token(node.opening_loc), children, closing)
965
997
  else
@@ -990,6 +1022,7 @@ module Prism
990
1022
  end
991
1023
 
992
1024
  # -> {}
1025
+ # ^^^^^
993
1026
  def visit_lambda_node(node)
994
1027
  parameters = node.parameters
995
1028
 
@@ -1021,9 +1054,6 @@ module Prism
1021
1054
 
1022
1055
  # foo = 1
1023
1056
  # ^^^^^^^
1024
- #
1025
- # foo, bar = 1
1026
- # ^^^ ^^^
1027
1057
  def visit_local_variable_write_node(node)
1028
1058
  builder.assign(
1029
1059
  builder.assignable(builder.ident(token(node.name_loc))),
@@ -1062,22 +1092,12 @@ module Prism
1062
1092
 
1063
1093
  # foo in bar
1064
1094
  # ^^^^^^^^^^
1065
- if RUBY_VERSION >= "3.0"
1066
- def visit_match_predicate_node(node)
1067
- builder.match_pattern_p(
1068
- visit(node.value),
1069
- token(node.operator_loc),
1070
- within_pattern { |compiler| node.pattern.accept(compiler) }
1071
- )
1072
- end
1073
- else
1074
- def visit_match_predicate_node(node)
1075
- builder.match_pattern(
1076
- visit(node.value),
1077
- token(node.operator_loc),
1078
- within_pattern { |compiler| node.pattern.accept(compiler) }
1079
- )
1080
- end
1095
+ def visit_match_predicate_node(node)
1096
+ builder.match_pattern_p(
1097
+ visit(node.value),
1098
+ token(node.operator_loc),
1099
+ within_pattern { |compiler| node.pattern.accept(compiler) }
1100
+ )
1081
1101
  end
1082
1102
 
1083
1103
  # foo => bar
@@ -1263,7 +1283,8 @@ module Prism
1263
1283
  # foo => ^(bar)
1264
1284
  # ^^^^^^
1265
1285
  def visit_pinned_expression_node(node)
1266
- builder.pin(token(node.operator_loc), visit(node.expression))
1286
+ expression = builder.begin(token(node.lparen_loc), visit(node.expression), token(node.rparen_loc))
1287
+ builder.pin(token(node.operator_loc), expression)
1267
1288
  end
1268
1289
 
1269
1290
  # foo = 1 and bar => ^foo
@@ -1322,7 +1343,7 @@ module Prism
1322
1343
  # 1r
1323
1344
  # ^^
1324
1345
  def visit_rational_node(node)
1325
- visit_numeric(node, builder.rational([node.value, srange(node.location)]))
1346
+ visit_numeric(node, builder.rational([rational_value(node), srange(node.location)]))
1326
1347
  end
1327
1348
 
1328
1349
  # redo
@@ -1418,6 +1439,11 @@ module Prism
1418
1439
  builder.self(token(node.location))
1419
1440
  end
1420
1441
 
1442
+ # A shareable constant.
1443
+ def visit_shareable_constant_node(node)
1444
+ visit(node.write)
1445
+ end
1446
+
1421
1447
  # class << self; end
1422
1448
  # ^^^^^^^^^^^^^^^^^^
1423
1449
  def visit_singleton_class_node(node)
@@ -1476,15 +1502,29 @@ module Prism
1476
1502
  # "foo"
1477
1503
  # ^^^^^
1478
1504
  def visit_string_node(node)
1479
- if node.opening&.start_with?("<<")
1480
- children, closing = visit_heredoc(InterpolatedStringNode.new(node.opening_loc, [node.copy(opening_loc: nil, closing_loc: nil, location: node.content_loc)], node.closing_loc, node.location))
1505
+ if node.heredoc?
1506
+ children, closing = visit_heredoc(node.to_interpolated)
1481
1507
  builder.string_compose(token(node.opening_loc), children, closing)
1482
1508
  elsif node.opening == "?"
1483
1509
  builder.character([node.unescaped, srange(node.location)])
1484
1510
  else
1511
+ parts = if node.content.lines.count <= 1 || node.unescaped.lines.count <= 1
1512
+ [builder.string_internal([node.unescaped, srange(node.content_loc)])]
1513
+ else
1514
+ start_offset = node.content_loc.start_offset
1515
+
1516
+ [node.content.lines, node.unescaped.lines].transpose.map do |content_line, unescaped_line|
1517
+ end_offset = start_offset + content_line.length
1518
+ offsets = srange_offsets(start_offset, end_offset)
1519
+ start_offset = end_offset
1520
+
1521
+ builder.string_internal([unescaped_line, offsets])
1522
+ end
1523
+ end
1524
+
1485
1525
  builder.string_compose(
1486
1526
  token(node.opening_loc),
1487
- [builder.string_internal([node.unescaped, srange(node.content_loc)])],
1527
+ parts,
1488
1528
  token(node.closing_loc)
1489
1529
  )
1490
1530
  end
@@ -1523,9 +1563,23 @@ module Prism
1523
1563
  builder.symbol([node.unescaped, srange(node.location)])
1524
1564
  end
1525
1565
  else
1566
+ parts = if node.value.lines.one?
1567
+ [builder.string_internal([node.unescaped, srange(node.value_loc)])]
1568
+ else
1569
+ start_offset = node.value_loc.start_offset
1570
+
1571
+ node.value.lines.map do |line|
1572
+ end_offset = start_offset + line.length
1573
+ offsets = srange_offsets(start_offset, end_offset)
1574
+ start_offset = end_offset
1575
+
1576
+ builder.string_internal([line, offsets])
1577
+ end
1578
+ end
1579
+
1526
1580
  builder.symbol_compose(
1527
1581
  token(node.opening_loc),
1528
- [builder.string_internal([node.unescaped, srange(node.value_loc)])],
1582
+ parts,
1529
1583
  token(node.closing_loc)
1530
1584
  )
1531
1585
  end
@@ -1604,7 +1658,11 @@ module Prism
1604
1658
  builder.when(
1605
1659
  token(node.keyword_loc),
1606
1660
  visit_all(node.conditions),
1607
- srange_find(node.conditions.last.location.end_offset, node.statements&.location&.start_offset || (node.conditions.last.location.end_offset + 1), [";", "then"]),
1661
+ if node.then_keyword_loc
1662
+ token(node.then_keyword_loc)
1663
+ else
1664
+ srange_find(node.conditions.last.location.end_offset, node.statements&.location&.start_offset || (node.conditions.last.location.end_offset + 1), [";"])
1665
+ end,
1608
1666
  visit(node.statements)
1609
1667
  )
1610
1668
  end
@@ -1637,13 +1695,27 @@ module Prism
1637
1695
  # `foo`
1638
1696
  # ^^^^^
1639
1697
  def visit_x_string_node(node)
1640
- if node.opening&.start_with?("<<")
1641
- children, closing = visit_heredoc(InterpolatedXStringNode.new(node.opening_loc, [StringNode.new(0, nil, node.content_loc, nil, node.unescaped, node.content_loc)], node.closing_loc, node.location))
1698
+ if node.heredoc?
1699
+ children, closing = visit_heredoc(node.to_interpolated)
1642
1700
  builder.xstring_compose(token(node.opening_loc), children, closing)
1643
1701
  else
1702
+ parts = if node.unescaped.lines.one?
1703
+ [builder.string_internal([node.unescaped, srange(node.content_loc)])]
1704
+ else
1705
+ start_offset = node.content_loc.start_offset
1706
+
1707
+ node.unescaped.lines.map do |line|
1708
+ end_offset = start_offset + line.length
1709
+ offsets = srange_offsets(start_offset, end_offset)
1710
+ start_offset = end_offset
1711
+
1712
+ builder.string_internal([line, offsets])
1713
+ end
1714
+ end
1715
+
1644
1716
  builder.xstring_compose(
1645
1717
  token(node.opening_loc),
1646
- [builder.string_internal([node.unescaped, srange(node.content_loc)])],
1718
+ parts,
1647
1719
  token(node.closing_loc)
1648
1720
  )
1649
1721
  end
@@ -1687,6 +1759,26 @@ module Prism
1687
1759
  forwarding
1688
1760
  end
1689
1761
 
1762
+ # Because we have mutated the AST to allow for newlines in the middle of
1763
+ # a rational, we need to manually handle the value here.
1764
+ def imaginary_value(node)
1765
+ Complex(0, node.numeric.is_a?(RationalNode) ? rational_value(node.numeric) : node.numeric.value)
1766
+ end
1767
+
1768
+ # Negate the value of a numeric node. This is a special case where you
1769
+ # have a negative sign on one line and then a number on the next line.
1770
+ # In normal Ruby, this will always be a method call. The parser gem,
1771
+ # however, marks this as a numeric literal. We have to massage the tree
1772
+ # here to get it into the correct form.
1773
+ def numeric_negate(message_loc, receiver)
1774
+ case receiver.type
1775
+ when :integer_node, :float_node
1776
+ receiver.copy(value: -receiver.value, location: message_loc.join(receiver.location))
1777
+ when :rational_node, :imaginary_node
1778
+ receiver.copy(numeric: numeric_negate(message_loc, receiver.numeric), location: message_loc.join(receiver.location))
1779
+ end
1780
+ end
1781
+
1690
1782
  # Blocks can have a special set of parameters that automatically expand
1691
1783
  # when given arrays if they have a single required parameter and no
1692
1784
  # other parameters.
@@ -1701,6 +1793,16 @@ module Prism
1701
1793
  parameters.block.nil?
1702
1794
  end
1703
1795
 
1796
+ # Because we have mutated the AST to allow for newlines in the middle of
1797
+ # a rational, we need to manually handle the value here.
1798
+ def rational_value(node)
1799
+ if node.numeric.is_a?(IntegerNode)
1800
+ Rational(node.numeric.value)
1801
+ else
1802
+ Rational(node.slice.gsub(/\s/, "").chomp("r"))
1803
+ end
1804
+ end
1805
+
1704
1806
  # Locations in the parser gem AST are generated using this class. We
1705
1807
  # store a reference to its constant to make it slightly faster to look
1706
1808
  # up.
@@ -1767,7 +1869,7 @@ module Prism
1767
1869
 
1768
1870
  # Visit a heredoc that can be either a string or an xstring.
1769
1871
  def visit_heredoc(node)
1770
- children = []
1872
+ children = Array.new
1771
1873
  node.parts.each do |part|
1772
1874
  pushing =
1773
1875
  if part.is_a?(StringNode) && part.unescaped.include?("\n")
@@ -167,7 +167,7 @@ module Prism
167
167
  TILDE: :tTILDE,
168
168
  UAMPERSAND: :tAMPER,
169
169
  UCOLON_COLON: :tCOLON3,
170
- UDOT_DOT: :tDOT2,
170
+ UDOT_DOT: :tBDOT2,
171
171
  UDOT_DOT_DOT: :tBDOT3,
172
172
  UMINUS: :tUMINUS,
173
173
  UMINUS_NUM: :tUNARY_NUM,
@@ -177,12 +177,23 @@ module Prism
177
177
  WORDS_SEP: :tSPACE
178
178
  }
179
179
 
180
- private_constant :TYPES
180
+ # These constants represent flags in our lex state. We really, really
181
+ # don't want to be using them and we really, really don't want to be
182
+ # exposing them as part of our public API. Unfortunately, we don't have
183
+ # another way of matching the exact tokens that the parser gem expects
184
+ # without them. We should find another way to do this, but in the
185
+ # meantime we'll hide them from the documentation and mark them as
186
+ # private constants.
187
+ EXPR_BEG = 0x1 # :nodoc:
188
+ EXPR_LABEL = 0x400 # :nodoc:
189
+
190
+ private_constant :TYPES, :EXPR_BEG, :EXPR_LABEL
181
191
 
182
192
  # The Parser::Source::Buffer that the tokens were lexed from.
183
193
  attr_reader :source_buffer
184
194
 
185
- # An array of prism tokens that we lexed.
195
+ # An array of tuples that contain prism tokens and their associated lex
196
+ # state when they were lexed.
186
197
  attr_reader :lexed
187
198
 
188
199
  # A hash that maps offsets in bytes to offsets in characters.
@@ -202,12 +213,16 @@ module Prism
202
213
  # Convert the prism tokens into the expected format for the parser gem.
203
214
  def to_a
204
215
  tokens = []
216
+
205
217
  index = 0
218
+ length = lexed.length
219
+
220
+ heredoc_identifier_stack = []
206
221
 
207
- while index < lexed.length
208
- token, = lexed[index]
222
+ while index < length
223
+ token, state = lexed[index]
209
224
  index += 1
210
- next if token.type == :IGNORED_NEWLINE || token.type == :EOF
225
+ next if %i[IGNORED_NEWLINE __END__ EOF].include?(token.type)
211
226
 
212
227
  type = TYPES.fetch(token.type)
213
228
  value = token.value
@@ -218,14 +233,18 @@ module Prism
218
233
  value.delete_prefix!("?")
219
234
  when :tCOMMENT
220
235
  if token.type == :EMBDOC_BEGIN
221
- until (next_token = lexed[index]) && next_token.type == :EMBDOC_END
236
+ start_index = index
237
+
238
+ while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1)
222
239
  value += next_token.value
223
240
  index += 1
224
241
  end
225
242
 
226
- value += next_token.value
227
- location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index].location.end_offset])
228
- index += 1
243
+ if start_index != index
244
+ value += next_token.value
245
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index][0].location.end_offset])
246
+ index += 1
247
+ end
229
248
  else
230
249
  value.chomp!
231
250
  location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1])
@@ -233,7 +252,7 @@ module Prism
233
252
  when :tNL
234
253
  value = nil
235
254
  when :tFLOAT
236
- value = Float(value)
255
+ value = parse_float(value)
237
256
  when :tIMAGINARY
238
257
  value = parse_complex(value)
239
258
  when :tINTEGER
@@ -242,13 +261,15 @@ module Prism
242
261
  location = Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])
243
262
  end
244
263
 
245
- value = Integer(value)
264
+ value = parse_integer(value)
246
265
  when :tLABEL
247
266
  value.chomp!(":")
248
267
  when :tLABEL_END
249
268
  value.chomp!(":")
269
+ when :tLCURLY
270
+ type = :tLBRACE if state == EXPR_BEG | EXPR_LABEL
250
271
  when :tNTH_REF
251
- value = Integer(value.delete_prefix("$"))
272
+ value = parse_integer(value.delete_prefix("$"))
252
273
  when :tOP_ASGN
253
274
  value.chomp!("=")
254
275
  when :tRATIONAL
@@ -256,31 +277,69 @@ module Prism
256
277
  when :tSPACE
257
278
  value = nil
258
279
  when :tSTRING_BEG
259
- if ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_END
280
+ if token.type == :HEREDOC_START
281
+ heredoc_identifier_stack.push(value.match(/<<[-~]?["'`]?(?<heredoc_identifier>.*?)["'`]?\z/)[:heredoc_identifier])
282
+ end
283
+ if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END
260
284
  next_location = token.location.join(next_token.location)
261
285
  type = :tSTRING
262
286
  value = ""
263
287
  location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
264
288
  index += 1
265
- elsif ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_CONTENT && (next_next_token = lexed[index + 1]) && next_next_token.type == :STRING_END
289
+ elsif ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_CONTENT && next_token.value.lines.count <= 1 && (next_next_token = lexed[index + 1][0]) && next_next_token.type == :STRING_END
266
290
  next_location = token.location.join(next_next_token.location)
267
291
  type = :tSTRING
268
- value = next_token.value
292
+ value = next_token.value.gsub("\\\\", "\\")
269
293
  location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
270
294
  index += 2
271
295
  elsif value.start_with?("<<")
272
296
  quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
273
- value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
297
+ if quote == "`"
298
+ type = :tXSTRING_BEG
299
+ value = "<<`"
300
+ else
301
+ value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
302
+ end
303
+ end
304
+ when :tSTRING_CONTENT
305
+ unless (lines = token.value.lines).one?
306
+ start_offset = offset_cache[token.location.start_offset]
307
+ lines.map do |line|
308
+ newline = line.end_with?("\r\n") ? "\r\n" : "\n"
309
+ chomped_line = line.chomp
310
+ if match = chomped_line.match(/(?<backslashes>\\+)\z/)
311
+ adjustment = match[:backslashes].size / 2
312
+ adjusted_line = chomped_line.delete_suffix("\\" * adjustment)
313
+ if match[:backslashes].size.odd?
314
+ adjusted_line.delete_suffix!("\\")
315
+ adjustment += 2
316
+ else
317
+ adjusted_line << newline
318
+ end
319
+ else
320
+ adjusted_line = line
321
+ adjustment = 0
322
+ end
323
+
324
+ end_offset = start_offset + adjusted_line.length + adjustment
325
+ tokens << [:tSTRING_CONTENT, [adjusted_line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]]
326
+ start_offset = end_offset
327
+ end
328
+ next
274
329
  end
275
330
  when :tSTRING_DVAR
276
331
  value = nil
277
332
  when :tSTRING_END
278
- if token.type == :REGEXP_END
333
+ if token.type == :HEREDOC_END && value.end_with?("\n")
334
+ newline_length = value.end_with?("\r\n") ? 2 : 1
335
+ value = heredoc_identifier_stack.pop
336
+ location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - newline_length])
337
+ elsif token.type == :REGEXP_END
279
338
  value = value[0]
280
339
  location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
281
340
  end
282
341
  when :tSYMBEG
283
- if (next_token = lexed[index]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR
342
+ if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR
284
343
  next_location = token.location.join(next_token.location)
285
344
  type = :tSYMBOL
286
345
  value = next_token.value
@@ -289,9 +348,13 @@ module Prism
289
348
  index += 1
290
349
  end
291
350
  when :tFID
292
- if tokens[-1][0] == :kDEF
351
+ if !tokens.empty? && tokens.dig(-1, 0) == :kDEF
293
352
  type = :tIDENTIFIER
294
353
  end
354
+ when :tXSTRING_BEG
355
+ if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :STRING_END
356
+ type = :tBACK_REF2
357
+ end
295
358
  end
296
359
 
297
360
  tokens << [type, [value, location]]
@@ -306,6 +369,20 @@ module Prism
306
369
 
307
370
  private
308
371
 
372
+ # Parse an integer from the string representation.
373
+ def parse_integer(value)
374
+ Integer(value)
375
+ rescue ArgumentError
376
+ 0
377
+ end
378
+
379
+ # Parse a float from the string representation.
380
+ def parse_float(value)
381
+ Float(value)
382
+ rescue ArgumentError
383
+ 0.0
384
+ end
385
+
309
386
  # Parse a complex from the string representation.
310
387
  def parse_complex(value)
311
388
  value.chomp!("i")
@@ -313,10 +390,12 @@ module Prism
313
390
  if value.end_with?("r")
314
391
  Complex(0, parse_rational(value))
315
392
  elsif value.start_with?(/0[BbOoDdXx]/)
316
- Complex(0, Integer(value))
393
+ Complex(0, parse_integer(value))
317
394
  else
318
395
  Complex(0, value)
319
396
  end
397
+ rescue ArgumentError
398
+ 0i
320
399
  end
321
400
 
322
401
  # Parse a rational from the string representation.
@@ -324,10 +403,12 @@ module Prism
324
403
  value.chomp!("r")
325
404
 
326
405
  if value.start_with?(/0[BbOoDdXx]/)
327
- Rational(Integer(value))
406
+ Rational(parse_integer(value))
328
407
  else
329
408
  Rational(value)
330
409
  end
410
+ rescue ArgumentError
411
+ 0r
331
412
  end
332
413
  end
333
414
  end