prism 0.29.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +77 -1
  3. data/CONTRIBUTING.md +0 -4
  4. data/README.md +4 -0
  5. data/config.yml +498 -145
  6. data/docs/fuzzing.md +1 -1
  7. data/docs/parsing_rules.md +4 -1
  8. data/docs/ripper_translation.md +22 -0
  9. data/docs/serialization.md +3 -0
  10. data/ext/prism/api_node.c +2858 -2082
  11. data/ext/prism/extconf.rb +1 -1
  12. data/ext/prism/extension.c +203 -421
  13. data/ext/prism/extension.h +2 -2
  14. data/include/prism/ast.h +1732 -453
  15. data/include/prism/defines.h +36 -0
  16. data/include/prism/diagnostic.h +23 -6
  17. data/include/prism/node.h +0 -21
  18. data/include/prism/options.h +94 -3
  19. data/include/prism/parser.h +57 -28
  20. data/include/prism/regexp.h +18 -8
  21. data/include/prism/static_literals.h +3 -2
  22. data/include/prism/util/pm_char.h +1 -2
  23. data/include/prism/util/pm_constant_pool.h +0 -8
  24. data/include/prism/util/pm_integer.h +22 -15
  25. data/include/prism/util/pm_newline_list.h +11 -0
  26. data/include/prism/util/pm_string.h +28 -12
  27. data/include/prism/version.h +3 -3
  28. data/include/prism.h +0 -11
  29. data/lib/prism/compiler.rb +3 -0
  30. data/lib/prism/desugar_compiler.rb +111 -74
  31. data/lib/prism/dispatcher.rb +16 -1
  32. data/lib/prism/dot_visitor.rb +45 -34
  33. data/lib/prism/dsl.rb +660 -468
  34. data/lib/prism/ffi.rb +64 -6
  35. data/lib/prism/inspect_visitor.rb +294 -64
  36. data/lib/prism/lex_compat.rb +1 -1
  37. data/lib/prism/mutation_compiler.rb +11 -6
  38. data/lib/prism/node.rb +2469 -4973
  39. data/lib/prism/node_ext.rb +91 -14
  40. data/lib/prism/parse_result/comments.rb +0 -7
  41. data/lib/prism/parse_result/errors.rb +65 -0
  42. data/lib/prism/parse_result/newlines.rb +101 -11
  43. data/lib/prism/parse_result.rb +43 -3
  44. data/lib/prism/reflection.rb +10 -8
  45. data/lib/prism/serialize.rb +484 -609
  46. data/lib/prism/translation/parser/compiler.rb +152 -132
  47. data/lib/prism/translation/parser/lexer.rb +26 -4
  48. data/lib/prism/translation/parser.rb +9 -4
  49. data/lib/prism/translation/ripper.rb +22 -20
  50. data/lib/prism/translation/ruby_parser.rb +73 -13
  51. data/lib/prism/visitor.rb +3 -0
  52. data/lib/prism.rb +0 -4
  53. data/prism.gemspec +3 -5
  54. data/rbi/prism/dsl.rbi +521 -0
  55. data/rbi/prism/node.rbi +744 -4837
  56. data/rbi/prism/visitor.rbi +3 -0
  57. data/rbi/prism.rbi +36 -30
  58. data/sig/prism/dsl.rbs +190 -303
  59. data/sig/prism/mutation_compiler.rbs +1 -0
  60. data/sig/prism/node.rbs +759 -628
  61. data/sig/prism/parse_result.rbs +2 -0
  62. data/sig/prism/visitor.rbs +1 -0
  63. data/sig/prism.rbs +103 -64
  64. data/src/diagnostic.c +62 -28
  65. data/src/node.c +499 -1754
  66. data/src/options.c +76 -27
  67. data/src/prettyprint.c +156 -112
  68. data/src/prism.c +2773 -2081
  69. data/src/regexp.c +202 -69
  70. data/src/serialize.c +170 -50
  71. data/src/static_literals.c +63 -84
  72. data/src/token_type.c +4 -4
  73. data/src/util/pm_constant_pool.c +0 -8
  74. data/src/util/pm_integer.c +53 -25
  75. data/src/util/pm_newline_list.c +29 -0
  76. data/src/util/pm_string.c +130 -80
  77. data/src/util/pm_strpbrk.c +32 -6
  78. metadata +4 -6
  79. data/include/prism/util/pm_string_list.h +0 -44
  80. data/lib/prism/debug.rb +0 -249
  81. data/lib/prism/translation/parser/rubocop.rb +0 -73
  82. data/src/util/pm_string_list.c +0 -28
@@ -90,7 +90,11 @@ module Prism
90
90
  end
91
91
 
92
92
  if node.constant
93
- builder.const_pattern(visit(node.constant), token(node.opening_loc), builder.array_pattern(nil, visited, nil), token(node.closing_loc))
93
+ if visited.empty?
94
+ builder.const_pattern(visit(node.constant), token(node.opening_loc), builder.array_pattern(token(node.opening_loc), visited, token(node.closing_loc)), token(node.closing_loc))
95
+ else
96
+ builder.const_pattern(visit(node.constant), token(node.opening_loc), builder.array_pattern(nil, visited, nil), token(node.closing_loc))
97
+ end
94
98
  else
95
99
  builder.array_pattern(token(node.opening_loc), visited, token(node.closing_loc))
96
100
  end
@@ -105,38 +109,46 @@ module Prism
105
109
  # { a: 1 }
106
110
  # ^^^^
107
111
  def visit_assoc_node(node)
112
+ key = node.key
113
+
108
114
  if in_pattern
109
115
  if node.value.is_a?(ImplicitNode)
110
- if node.key.is_a?(SymbolNode)
111
- builder.match_hash_var([node.key.unescaped, srange(node.key.location)])
116
+ if key.is_a?(SymbolNode)
117
+ if key.opening.nil?
118
+ builder.match_hash_var([key.unescaped, srange(key.location)])
119
+ else
120
+ builder.match_hash_var_from_str(token(key.opening_loc), [builder.string_internal([key.unescaped, srange(key.value_loc)])], token(key.closing_loc))
121
+ end
112
122
  else
113
- builder.match_hash_var_from_str(token(node.key.opening_loc), visit_all(node.key.parts), token(node.key.closing_loc))
123
+ builder.match_hash_var_from_str(token(key.opening_loc), visit_all(key.parts), token(key.closing_loc))
114
124
  end
125
+ elsif key.opening.nil?
126
+ builder.pair_keyword([key.unescaped, srange(key.location)], visit(node.value))
115
127
  else
116
- builder.pair_keyword([node.key.unescaped, srange(node.key.location)], visit(node.value))
128
+ builder.pair_quoted(token(key.opening_loc), [builder.string_internal([key.unescaped, srange(key.value_loc)])], token(key.closing_loc), visit(node.value))
117
129
  end
118
130
  elsif node.value.is_a?(ImplicitNode)
119
131
  if (value = node.value.value).is_a?(LocalVariableReadNode)
120
132
  builder.pair_keyword(
121
- [node.key.unescaped, srange(node.key)],
122
- builder.ident([value.name, srange(node.key.value_loc)]).updated(:lvar)
133
+ [key.unescaped, srange(key)],
134
+ builder.ident([value.name, srange(key.value_loc)]).updated(:lvar)
123
135
  )
124
136
  else
125
- builder.pair_label([node.key.unescaped, srange(node.key.location)])
137
+ builder.pair_label([key.unescaped, srange(key.location)])
126
138
  end
127
139
  elsif node.operator_loc
128
- builder.pair(visit(node.key), token(node.operator_loc), visit(node.value))
129
- elsif node.key.is_a?(SymbolNode) && node.key.opening_loc.nil?
130
- builder.pair_keyword([node.key.unescaped, srange(node.key.location)], visit(node.value))
140
+ builder.pair(visit(key), token(node.operator_loc), visit(node.value))
141
+ elsif key.is_a?(SymbolNode) && key.opening_loc.nil?
142
+ builder.pair_keyword([key.unescaped, srange(key.location)], visit(node.value))
131
143
  else
132
144
  parts =
133
- if node.key.is_a?(SymbolNode)
134
- [builder.string_internal([node.key.unescaped, srange(node.key.value_loc)])]
145
+ if key.is_a?(SymbolNode)
146
+ [builder.string_internal([key.unescaped, srange(key.value_loc)])]
135
147
  else
136
- visit_all(node.key.parts)
148
+ visit_all(key.parts)
137
149
  end
138
150
 
139
- builder.pair_quoted(token(node.key.opening_loc), parts, token(node.key.closing_loc), visit(node.value))
151
+ builder.pair_quoted(token(key.opening_loc), parts, token(key.closing_loc), visit(node.value))
140
152
  end
141
153
  end
142
154
 
@@ -146,7 +158,9 @@ module Prism
146
158
  # { **foo }
147
159
  # ^^^^^
148
160
  def visit_assoc_splat_node(node)
149
- if node.value.nil? && forwarding.include?(:**)
161
+ if in_pattern
162
+ builder.match_rest(token(node.operator_loc), token(node.value&.location))
163
+ elsif node.value.nil? && forwarding.include?(:**)
150
164
  builder.forwarded_kwrestarg(token(node.operator_loc))
151
165
  else
152
166
  builder.kwsplat(token(node.operator_loc), visit(node.value))
@@ -167,7 +181,7 @@ module Prism
167
181
  if (rescue_clause = node.rescue_clause)
168
182
  begin
169
183
  find_start_offset = (rescue_clause.reference&.location || rescue_clause.exceptions.last&.location || rescue_clause.keyword_loc).end_offset
170
- find_end_offset = (rescue_clause.statements&.location&.start_offset || rescue_clause.consequent&.location&.start_offset || (find_start_offset + 1))
184
+ find_end_offset = (rescue_clause.statements&.location&.start_offset || rescue_clause.subsequent&.location&.start_offset || (find_start_offset + 1))
171
185
 
172
186
  rescue_bodies << builder.rescue_body(
173
187
  token(rescue_clause.keyword_loc),
@@ -177,7 +191,7 @@ module Prism
177
191
  srange_find(find_start_offset, find_end_offset, [";"]),
178
192
  visit(rescue_clause.statements)
179
193
  )
180
- end until (rescue_clause = rescue_clause.consequent).nil?
194
+ end until (rescue_clause = rescue_clause.subsequent).nil?
181
195
  end
182
196
 
183
197
  begin_body =
@@ -396,8 +410,8 @@ module Prism
396
410
  token(node.case_keyword_loc),
397
411
  visit(node.predicate),
398
412
  visit_all(node.conditions),
399
- token(node.consequent&.else_keyword_loc),
400
- visit(node.consequent),
413
+ token(node.else_clause&.else_keyword_loc),
414
+ visit(node.else_clause),
401
415
  token(node.end_keyword_loc)
402
416
  )
403
417
  end
@@ -409,8 +423,8 @@ module Prism
409
423
  token(node.case_keyword_loc),
410
424
  visit(node.predicate),
411
425
  visit_all(node.conditions),
412
- token(node.consequent&.else_keyword_loc),
413
- visit(node.consequent),
426
+ token(node.else_clause&.else_keyword_loc),
427
+ visit(node.else_clause),
414
428
  token(node.end_keyword_loc)
415
429
  )
416
430
  end
@@ -844,8 +858,8 @@ module Prism
844
858
  visit(node.predicate),
845
859
  token(node.then_keyword_loc),
846
860
  visit(node.statements),
847
- token(node.consequent.else_keyword_loc),
848
- visit(node.consequent)
861
+ token(node.subsequent.else_keyword_loc),
862
+ visit(node.subsequent)
849
863
  )
850
864
  elsif node.if_keyword_loc.start_offset == node.location.start_offset
851
865
  builder.condition(
@@ -854,16 +868,16 @@ module Prism
854
868
  if node.then_keyword_loc
855
869
  token(node.then_keyword_loc)
856
870
  else
857
- srange_find(node.predicate.location.end_offset, (node.statements&.location || node.consequent&.location || node.end_keyword_loc).start_offset, [";"])
871
+ srange_find(node.predicate.location.end_offset, (node.statements&.location || node.subsequent&.location || node.end_keyword_loc).start_offset, [";"])
858
872
  end,
859
873
  visit(node.statements),
860
- case node.consequent
874
+ case node.subsequent
861
875
  when IfNode
862
- token(node.consequent.if_keyword_loc)
876
+ token(node.subsequent.if_keyword_loc)
863
877
  when ElseNode
864
- token(node.consequent.else_keyword_loc)
878
+ token(node.subsequent.else_keyword_loc)
865
879
  end,
866
- visit(node.consequent),
880
+ visit(node.subsequent),
867
881
  if node.if_keyword != "elsif"
868
882
  token(node.end_keyword_loc)
869
883
  end
@@ -871,7 +885,7 @@ module Prism
871
885
  else
872
886
  builder.condition_mod(
873
887
  visit(node.statements),
874
- visit(node.consequent),
888
+ visit(node.subsequent),
875
889
  token(node.if_keyword_loc),
876
890
  visit(node.predicate)
877
891
  )
@@ -881,7 +895,7 @@ module Prism
881
895
  # 1i
882
896
  # ^^
883
897
  def visit_imaginary_node(node)
884
- visit_numeric(node, builder.complex([imaginary_value(node), srange(node.location)]))
898
+ visit_numeric(node, builder.complex([Complex(0, node.numeric.value), srange(node.location)]))
885
899
  end
886
900
 
887
901
  # { foo: }
@@ -1064,36 +1078,7 @@ module Prism
1064
1078
  # ^^^^^^^^^^^^
1065
1079
  def visit_interpolated_string_node(node)
1066
1080
  if node.heredoc?
1067
- children, closing = visit_heredoc(node)
1068
- opening = token(node.opening_loc)
1069
-
1070
- start_offset = node.opening_loc.end_offset + 1
1071
- end_offset = node.parts.first.location.start_offset
1072
-
1073
- # In the below case, the offsets should be the same:
1074
- #
1075
- # <<~HEREDOC
1076
- # a #{b}
1077
- # HEREDOC
1078
- #
1079
- # But in this case, the end_offset would be greater than the start_offset:
1080
- #
1081
- # <<~HEREDOC
1082
- # #{b}
1083
- # HEREDOC
1084
- #
1085
- # So we need to make sure the result node's heredoc range is correct, without updating the children
1086
- result = if start_offset < end_offset
1087
- # We need to add a padding string to ensure that the heredoc has correct range for its body
1088
- padding_string_node = builder.string_internal(["", srange_offsets(start_offset, end_offset)])
1089
- node_with_correct_location = builder.string_compose(opening, [padding_string_node, *children], closing)
1090
- # But the padding string should not be included in the final AST, so we need to update the result's children
1091
- node_with_correct_location.updated(:dstr, children)
1092
- else
1093
- builder.string_compose(opening, children, closing)
1094
- end
1095
-
1096
- return result
1081
+ return visit_heredoc(node) { |children, closing| builder.string_compose(token(node.opening_loc), children, closing) }
1097
1082
  end
1098
1083
 
1099
1084
  parts = if node.parts.one? { |part| part.type == :string_node }
@@ -1137,8 +1122,7 @@ module Prism
1137
1122
  # ^^^^^^^^^^^^
1138
1123
  def visit_interpolated_x_string_node(node)
1139
1124
  if node.heredoc?
1140
- children, closing = visit_heredoc(node)
1141
- builder.xstring_compose(token(node.opening_loc), children, closing)
1125
+ visit_heredoc(node) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
1142
1126
  else
1143
1127
  builder.xstring_compose(
1144
1128
  token(node.opening_loc),
@@ -1148,6 +1132,12 @@ module Prism
1148
1132
  end
1149
1133
  end
1150
1134
 
1135
+ # -> { it }
1136
+ # ^^
1137
+ def visit_it_local_variable_read_node(node)
1138
+ builder.ident([:it, srange(node.location)]).updated(:lvar)
1139
+ end
1140
+
1151
1141
  # -> { it }
1152
1142
  # ^^^^^^^^^
1153
1143
  def visit_it_parameters_node(node)
@@ -1201,14 +1191,7 @@ module Prism
1201
1191
  # foo
1202
1192
  # ^^^
1203
1193
  def visit_local_variable_read_node(node)
1204
- name = node.name
1205
-
1206
- # This is just a guess. parser doesn't have support for the implicit
1207
- # `it` variable yet, so we'll probably have to visit this once it
1208
- # does.
1209
- name = :it if name == :"0it"
1210
-
1211
- builder.ident([name, srange(node.location)]).updated(:lvar)
1194
+ builder.ident([node.name, srange(node.location)]).updated(:lvar)
1212
1195
  end
1213
1196
 
1214
1197
  # foo = 1
@@ -1312,13 +1295,9 @@ module Prism
1312
1295
  # foo, bar = baz
1313
1296
  # ^^^^^^^^
1314
1297
  def visit_multi_target_node(node)
1315
- elements = [*node.lefts]
1316
- elements << node.rest if !node.rest.nil? && !node.rest.is_a?(ImplicitRestNode)
1317
- elements.concat(node.rights)
1318
-
1319
1298
  builder.multi_lhs(
1320
1299
  token(node.lparen_loc),
1321
- visit_all(elements),
1300
+ visit_all(multi_target_elements(node)),
1322
1301
  token(node.rparen_loc)
1323
1302
  )
1324
1303
  end
@@ -1326,9 +1305,11 @@ module Prism
1326
1305
  # foo, bar = baz
1327
1306
  # ^^^^^^^^^^^^^^
1328
1307
  def visit_multi_write_node(node)
1329
- elements = [*node.lefts]
1330
- elements << node.rest if !node.rest.nil? && !node.rest.is_a?(ImplicitRestNode)
1331
- elements.concat(node.rights)
1308
+ elements = multi_target_elements(node)
1309
+
1310
+ if elements.length == 1 && elements.first.is_a?(MultiTargetNode)
1311
+ elements = multi_target_elements(elements.first)
1312
+ end
1332
1313
 
1333
1314
  builder.multi_assign(
1334
1315
  builder.multi_lhs(
@@ -1409,12 +1390,12 @@ module Prism
1409
1390
 
1410
1391
  if node.requireds.any?
1411
1392
  node.requireds.each do |required|
1412
- if required.is_a?(RequiredParameterNode)
1413
- params << visit(required)
1414
- else
1415
- compiler = copy_compiler(in_destructure: true)
1416
- params << required.accept(compiler)
1417
- end
1393
+ params <<
1394
+ if required.is_a?(RequiredParameterNode)
1395
+ visit(required)
1396
+ else
1397
+ required.accept(copy_compiler(in_destructure: true))
1398
+ end
1418
1399
  end
1419
1400
  end
1420
1401
 
@@ -1423,12 +1404,12 @@ module Prism
1423
1404
 
1424
1405
  if node.posts.any?
1425
1406
  node.posts.each do |post|
1426
- if post.is_a?(RequiredParameterNode)
1427
- params << visit(post)
1428
- else
1429
- compiler = copy_compiler(in_destructure: true)
1430
- params << post.accept(compiler)
1431
- end
1407
+ params <<
1408
+ if post.is_a?(RequiredParameterNode)
1409
+ visit(post)
1410
+ else
1411
+ post.accept(copy_compiler(in_destructure: true))
1412
+ end
1432
1413
  end
1433
1414
  end
1434
1415
 
@@ -1514,7 +1495,7 @@ module Prism
1514
1495
  # 1r
1515
1496
  # ^^
1516
1497
  def visit_rational_node(node)
1517
- visit_numeric(node, builder.rational([rational_value(node), srange(node.location)]))
1498
+ visit_numeric(node, builder.rational([node.value, srange(node.location)]))
1518
1499
  end
1519
1500
 
1520
1501
  # redo
@@ -1526,9 +1507,20 @@ module Prism
1526
1507
  # /foo/
1527
1508
  # ^^^^^
1528
1509
  def visit_regular_expression_node(node)
1510
+ content = node.content
1511
+ parts =
1512
+ if content.include?("\n")
1513
+ offset = node.content_loc.start_offset
1514
+ content.lines.map do |line|
1515
+ builder.string_internal([line, srange_offsets(offset, offset += line.bytesize)])
1516
+ end
1517
+ else
1518
+ [builder.string_internal(token(node.content_loc))]
1519
+ end
1520
+
1529
1521
  builder.regexp_compose(
1530
1522
  token(node.opening_loc),
1531
- [builder.string_internal(token(node.content_loc))],
1523
+ parts,
1532
1524
  [node.closing[0], srange_offsets(node.closing_loc.start_offset, node.closing_loc.start_offset + 1)],
1533
1525
  builder.regexp_options([node.closing[1..], srange_offsets(node.closing_loc.start_offset + 1, node.closing_loc.end_offset)])
1534
1526
  )
@@ -1674,10 +1666,11 @@ module Prism
1674
1666
  # ^^^^^
1675
1667
  def visit_string_node(node)
1676
1668
  if node.heredoc?
1677
- children, closing = visit_heredoc(node.to_interpolated)
1678
- builder.string_compose(token(node.opening_loc), children, closing)
1669
+ visit_heredoc(node.to_interpolated) { |children, closing| builder.string_compose(token(node.opening_loc), children, closing) }
1679
1670
  elsif node.opening == "?"
1680
1671
  builder.character([node.unescaped, srange(node.location)])
1672
+ elsif node.opening&.start_with?("%") && node.unescaped.empty?
1673
+ builder.string_compose(token(node.opening_loc), [], token(node.closing_loc))
1681
1674
  else
1682
1675
  content_lines = node.content.lines
1683
1676
  unescaped_lines = node.unescaped.lines
@@ -1791,16 +1784,16 @@ module Prism
1791
1784
  if node.then_keyword_loc
1792
1785
  token(node.then_keyword_loc)
1793
1786
  else
1794
- srange_find(node.predicate.location.end_offset, (node.statements&.location || node.consequent&.location || node.end_keyword_loc).start_offset, [";"])
1787
+ srange_find(node.predicate.location.end_offset, (node.statements&.location || node.else_clause&.location || node.end_keyword_loc).start_offset, [";"])
1795
1788
  end,
1796
- visit(node.consequent),
1797
- token(node.consequent&.else_keyword_loc),
1789
+ visit(node.else_clause),
1790
+ token(node.else_clause&.else_keyword_loc),
1798
1791
  visit(node.statements),
1799
1792
  token(node.end_keyword_loc)
1800
1793
  )
1801
1794
  else
1802
1795
  builder.condition_mod(
1803
- visit(node.consequent),
1796
+ visit(node.else_clause),
1804
1797
  visit(node.statements),
1805
1798
  token(node.keyword_loc),
1806
1799
  visit(node.predicate)
@@ -1877,8 +1870,7 @@ module Prism
1877
1870
  # ^^^^^
1878
1871
  def visit_x_string_node(node)
1879
1872
  if node.heredoc?
1880
- children, closing = visit_heredoc(node.to_interpolated)
1881
- builder.xstring_compose(token(node.opening_loc), children, closing)
1873
+ visit_heredoc(node.to_interpolated) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
1882
1874
  else
1883
1875
  parts = if node.unescaped.lines.one?
1884
1876
  [builder.string_internal([node.unescaped, srange(node.content_loc)])]
@@ -1940,10 +1932,12 @@ module Prism
1940
1932
  forwarding
1941
1933
  end
1942
1934
 
1943
- # Because we have mutated the AST to allow for newlines in the middle of
1944
- # a rational, we need to manually handle the value here.
1945
- def imaginary_value(node)
1946
- Complex(0, node.numeric.is_a?(RationalNode) ? rational_value(node.numeric) : node.numeric.value)
1935
+ # Returns the set of targets for a MultiTargetNode or a MultiWriteNode.
1936
+ def multi_target_elements(node)
1937
+ elements = [*node.lefts]
1938
+ elements << node.rest if !node.rest.nil? && !node.rest.is_a?(ImplicitRestNode)
1939
+ elements.concat(node.rights)
1940
+ elements
1947
1941
  end
1948
1942
 
1949
1943
  # Negate the value of a numeric node. This is a special case where you
@@ -1955,7 +1949,9 @@ module Prism
1955
1949
  case receiver.type
1956
1950
  when :integer_node, :float_node
1957
1951
  receiver.copy(value: -receiver.value, location: message_loc.join(receiver.location))
1958
- when :rational_node, :imaginary_node
1952
+ when :rational_node
1953
+ receiver.copy(numerator: -receiver.numerator, location: message_loc.join(receiver.location))
1954
+ when :imaginary_node
1959
1955
  receiver.copy(numeric: numeric_negate(message_loc, receiver.numeric), location: message_loc.join(receiver.location))
1960
1956
  end
1961
1957
  end
@@ -1974,16 +1970,6 @@ module Prism
1974
1970
  parameters.block.nil?
1975
1971
  end
1976
1972
 
1977
- # Because we have mutated the AST to allow for newlines in the middle of
1978
- # a rational, we need to manually handle the value here.
1979
- def rational_value(node)
1980
- if node.numeric.is_a?(IntegerNode)
1981
- Rational(node.numeric.value)
1982
- else
1983
- Rational(node.slice.gsub(/\s/, "").chomp("r"))
1984
- end
1985
- end
1986
-
1987
1973
  # Locations in the parser gem AST are generated using this class. We
1988
1974
  # store a reference to its constant to make it slightly faster to look
1989
1975
  # up.
@@ -2006,7 +1992,7 @@ module Prism
2006
1992
  # Note that end_offset is allowed to be nil, in which case this will
2007
1993
  # search until the end of the string.
2008
1994
  def srange_find(start_offset, end_offset, tokens)
2009
- if (match = source_buffer.source.byteslice(start_offset...end_offset).match(/(\s*)(#{tokens.join("|")})/))
1995
+ if (match = source_buffer.source.byteslice(start_offset...end_offset).match(/\A(\s*)(#{tokens.join("|")})/))
2010
1996
  _, whitespace, token = *match
2011
1997
  token_offset = start_offset + whitespace.bytesize
2012
1998
 
@@ -2037,7 +2023,8 @@ module Prism
2037
2023
  token(parameters.opening_loc),
2038
2024
  if procarg0?(parameters.parameters)
2039
2025
  parameter = parameters.parameters.requireds.first
2040
- [builder.procarg0(visit(parameter))].concat(visit_all(parameters.locals))
2026
+ visited = parameter.is_a?(RequiredParameterNode) ? visit(parameter) : parameter.accept(copy_compiler(in_destructure: true))
2027
+ [builder.procarg0(visited)].concat(visit_all(parameters.locals))
2041
2028
  else
2042
2029
  visit(parameters)
2043
2030
  end,
@@ -2053,29 +2040,55 @@ module Prism
2053
2040
  end
2054
2041
  end
2055
2042
 
2043
+ # The parser gem automatically converts \r\n to \n, meaning our offsets
2044
+ # need to be adjusted to always subtract 1 from the length.
2045
+ def chomped_bytesize(line)
2046
+ chomped = line.chomp
2047
+ chomped.bytesize + (chomped == line ? 0 : 1)
2048
+ end
2049
+
2056
2050
  # Visit a heredoc that can be either a string or an xstring.
2057
2051
  def visit_heredoc(node)
2058
2052
  children = Array.new
2053
+ indented = false
2054
+
2055
+ # If this is a dedenting heredoc, then we need to insert the opening
2056
+ # content into the children as well.
2057
+ if node.opening.start_with?("<<~") && node.parts.length > 0 && !node.parts.first.is_a?(StringNode)
2058
+ location = node.parts.first.location
2059
+ location = location.copy(start_offset: location.start_offset - location.start_line_slice.bytesize)
2060
+ children << builder.string_internal(token(location))
2061
+ indented = true
2062
+ end
2063
+
2059
2064
  node.parts.each do |part|
2060
2065
  pushing =
2061
2066
  if part.is_a?(StringNode) && part.unescaped.include?("\n")
2062
- unescaped = part.unescaped.lines(chomp: true)
2063
- escaped = part.content.lines(chomp: true)
2067
+ unescaped = part.unescaped.lines
2068
+ escaped = part.content.lines
2064
2069
 
2065
- escaped_lengths =
2066
- if node.opening.end_with?("'")
2067
- escaped.map { |line| line.bytesize + 1 }
2068
- else
2069
- escaped.chunk_while { |before, after| before.match?(/(?<!\\)\\$/) }.map { |line| line.join.bytesize + line.length }
2070
+ escaped_lengths = []
2071
+ normalized_lengths = []
2072
+
2073
+ if node.opening.end_with?("'")
2074
+ escaped.each do |line|
2075
+ escaped_lengths << line.bytesize
2076
+ normalized_lengths << chomped_bytesize(line)
2070
2077
  end
2078
+ else
2079
+ escaped
2080
+ .chunk_while { |before, after| before.match?(/(?<!\\)\\\r?\n$/) }
2081
+ .each do |lines|
2082
+ escaped_lengths << lines.sum(&:bytesize)
2083
+ normalized_lengths << lines.sum { |line| chomped_bytesize(line) }
2084
+ end
2085
+ end
2071
2086
 
2072
2087
  start_offset = part.location.start_offset
2073
- end_offset = nil
2074
2088
 
2075
- unescaped.zip(escaped_lengths).map do |unescaped_line, escaped_length|
2076
- end_offset = start_offset + (escaped_length || 0)
2077
- inner_part = builder.string_internal(["#{unescaped_line}\n", srange_offsets(start_offset, end_offset)])
2078
- start_offset = end_offset
2089
+ unescaped.map.with_index do |unescaped_line, index|
2090
+ inner_part = builder.string_internal([unescaped_line, srange_offsets(start_offset, start_offset + normalized_lengths.fetch(index, 0))])
2091
+ start_offset += escaped_lengths.fetch(index, 0)
2079
2092
  inner_part
2080
2093
  end
2081
2094
  else
@@ -2086,7 +2099,12 @@ module Prism
2086
2099
  if child.type == :str && child.children.last == ""
2087
2100
  # nothing
2088
2101
  elsif child.type == :str && children.last && children.last.type == :str && !children.last.children.first.end_with?("\n")
2089
- children.last.children.first << child.children.first
2102
+ appendee = children[-1]
2103
+
2104
+ location = appendee.loc
2105
+ location = location.with_expression(location.expression.join(child.loc.expression))
2106
+
2107
+ children[-1] = appendee.updated(:str, [appendee.children.first << child.children.first], location: location)
2090
2108
  else
2091
2109
  children << child
2092
2110
  end
@@ -2095,8 +2113,10 @@ module Prism
2095
2113
 
2096
2114
  closing = node.closing
2097
2115
  closing_t = [closing.chomp, srange_offsets(node.closing_loc.start_offset, node.closing_loc.end_offset - (closing[/\s+$/]&.length || 0))]
2116
+ composed = yield children, closing_t
2098
2117
 
2099
- [children, closing_t]
2118
+ composed = composed.updated(nil, children[1..-1]) if indented
2119
+ composed
2100
2120
  end
2101
2121
 
2102
2122
  # Visit a numeric node and account for the optional sign.
@@ -134,7 +134,7 @@ module Prism
134
134
  MINUS_GREATER: :tLAMBDA,
135
135
  NEWLINE: :tNL,
136
136
  NUMBERED_REFERENCE: :tNTH_REF,
137
- PARENTHESIS_LEFT: :tLPAREN,
137
+ PARENTHESIS_LEFT: :tLPAREN2,
138
138
  PARENTHESIS_LEFT_PARENTHESES: :tLPAREN_ARG,
139
139
  PARENTHESIS_RIGHT: :tRPAREN,
140
140
  PERCENT: :tPERCENT,
@@ -173,7 +173,7 @@ module Prism
173
173
  UMINUS_NUM: :tUNARY_NUM,
174
174
  UPLUS: :tUPLUS,
175
175
  USTAR: :tSTAR,
176
- USTAR_STAR: :tPOW,
176
+ USTAR_STAR: :tDSTAR,
177
177
  WORDS_SEP: :tSPACE
178
178
  }
179
179
 
@@ -187,7 +187,20 @@ module Prism
187
187
  EXPR_BEG = 0x1 # :nodoc:
188
188
  EXPR_LABEL = 0x400 # :nodoc:
189
189
 
190
- private_constant :TYPES, :EXPR_BEG, :EXPR_LABEL
190
+ # It is used to determine whether `do` is of the token type `kDO` or `kDO_LAMBDA`.
191
+ #
192
+ # NOTE: In edge cases like `-> (foo = -> (bar) {}) do end`, please note that `kDO` is still returned
193
+ # instead of `kDO_LAMBDA`, which is expected: https://github.com/ruby/prism/pull/3046
194
+ LAMBDA_TOKEN_TYPES = [:kDO_LAMBDA, :tLAMBDA, :tLAMBEG]
195
+
196
+ # The `PARENTHESIS_LEFT` token in Prism is classified as either `tLPAREN` or `tLPAREN2` in the Parser gem.
197
+ # The following token types are listed as those classified as `tLPAREN`.
198
+ LPAREN_CONVERSION_TOKEN_TYPES = [
199
+ :kBREAK, :kCASE, :tDIVIDE, :kFOR, :kIF, :kNEXT, :kRETURN, :kUNTIL, :kWHILE, :tAMPER, :tANDOP, :tBANG, :tCOMMA, :tDOT2, :tDOT3,
200
+ :tEQL, :tLPAREN, :tLPAREN2, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS
201
+ ]
202
+
203
+ private_constant :TYPES, :EXPR_BEG, :EXPR_LABEL, :LAMBDA_TOKEN_TYPES, :LPAREN_CONVERSION_TOKEN_TYPES
191
204
 
192
205
  # The Parser::Source::Buffer that the tokens were lexed from.
193
206
  attr_reader :source_buffer
@@ -229,6 +242,13 @@ module Prism
229
242
  location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset])
230
243
 
231
244
  case type
245
+ when :kDO
246
+ types = tokens.map(&:first)
247
+ nearest_lambda_token_type = types.reverse.find { |type| LAMBDA_TOKEN_TYPES.include?(type) }
248
+
249
+ if nearest_lambda_token_type == :tLAMBDA
250
+ type = :kDO_LAMBDA
251
+ end
232
252
  when :tCHARACTER
233
253
  value.delete_prefix!("?")
234
254
  when :tCOMMENT
@@ -268,6 +288,8 @@ module Prism
268
288
  value.chomp!(":")
269
289
  when :tLCURLY
270
290
  type = :tLBRACE if state == EXPR_BEG | EXPR_LABEL
291
+ when :tLPAREN2
292
+ type = :tLPAREN if tokens.empty? || LPAREN_CONVERSION_TOKEN_TYPES.include?(tokens.dig(-1, 0))
271
293
  when :tNTH_REF
272
294
  value = parse_integer(value.delete_prefix("$"))
273
295
  when :tOP_ASGN
@@ -339,7 +361,7 @@ module Prism
339
361
  location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
340
362
  end
341
363
  when :tSYMBEG
342
- if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR
364
+ if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END
343
365
  next_location = token.location.join(next_token.location)
344
366
  type = :tSYMBOL
345
367
  value = next_token.value
@@ -1,6 +1,11 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "parser"
3
+ begin
4
+ require "parser"
5
+ rescue LoadError
6
+ warn(%q{Error: Unable to load parser. Add `gem "parser"` to your Gemfile.})
7
+ exit(1)
8
+ end
4
9
 
5
10
  module Prism
6
11
  module Translation
@@ -46,7 +51,7 @@ module Prism
46
51
  source = source_buffer.source
47
52
 
48
53
  offset_cache = build_offset_cache(source)
49
- result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]]), offset_cache)
54
+ result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]], encoding: false), offset_cache)
50
55
 
51
56
  build_ast(result.value, offset_cache)
52
57
  ensure
@@ -59,7 +64,7 @@ module Prism
59
64
  source = source_buffer.source
60
65
 
61
66
  offset_cache = build_offset_cache(source)
62
- result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]]), offset_cache)
67
+ result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]], encoding: false), offset_cache)
63
68
 
64
69
  [
65
70
  build_ast(result.value, offset_cache),
@@ -78,7 +83,7 @@ module Prism
78
83
  offset_cache = build_offset_cache(source)
79
84
  result =
80
85
  begin
81
- unwrap(Prism.parse_lex(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]]), offset_cache)
86
+ unwrap(Prism.parse_lex(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]], encoding: false), offset_cache)
82
87
  rescue ::Parser::SyntaxError
83
88
  raise if !recover
84
89
  end