prism 0.20.0 → 0.21.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 742f60637c4fd77f62b7713f484e87d70249e6e9dfeefc2c2ba0bfe667eed034
4
- data.tar.gz: 67bd239271c1d848536389668722a0617419b19b6bb250ed39cf17b7a414da4c
3
+ metadata.gz: b4d054a1268bf7f8b5947f30ad244c4713c850911e79c1ba469eca0ac36bc47c
4
+ data.tar.gz: b77e29c93584b79759381d75cfb5ad0753fe8d5f92863cada81895bb67f17572
5
5
  SHA512:
6
- metadata.gz: d0a90337f2635d35b08b0932ad6d928610406bb3f908c1b7b601f5fcb08b404604745f93bffd9a4bb84fc13cde0b6b4a71015390546a077daa4e05d7d8cf965e
7
- data.tar.gz: 231693786022302c486d3c4ea2c8841636e3c94cb37f6b5e410f1ab6ac4ce7fc12e53cd4d67f43d6a38f3292867fda808e655391528619d626823a5163cbf722
6
+ metadata.gz: 00fa781d854c4f9b716b238c392e48f3bd946b52a5ea100c8fa98bd909bd7d2fcd116b80c7877cbfff59bb991d7c78158ded3ff4154d7d3362df3b8c00fd4d08
7
+ data.tar.gz: cfea37b3aa825f0bb91a0bd19dec1ec72187790aca39a2b8d560a483d83c1f4604346320d071c93cd605f39c1fd975b1b508395d9673d7bf95c16feaeeee52e6
data/CHANGELOG.md CHANGED
@@ -6,7 +6,25 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
- ## [0.20.0] - 2024-01-01
9
+ ## [0.21.0] - 2024-02-05
10
+
11
+ ### Added
12
+
13
+ - Add the `pm_constant_pool_find` API for finding a constant.
14
+
15
+ ### Changed
16
+
17
+ - Fixes for `Prism::Translation::Parser`.
18
+ - Ensure all errors flow through `parser.diagnostics.process`.
19
+ - Fix the find pattern node.
20
+ - Fix block forwarding with `NumberedParametersNode`.
21
+ - Ensure we can parse strings with invalid bytes for the encoding.
22
+ - Fix hash pairs in pattern matching.
23
+ - Properly reject operator writes on operator calls, e.g., `a.+ -= b`.
24
+ - Fix multi-byte escapes.
25
+ - Handle missing body in `begin` within the receiver of a method call.
26
+
27
+ ## [0.20.0] - 2024-02-01
10
28
 
11
29
  ### Added
12
30
 
@@ -323,7 +341,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
323
341
 
324
342
  - 🎉 Initial release! 🎉
325
343
 
326
- [unreleased]: https://github.com/ruby/prism/compare/v0.20.0...HEAD
344
+ [unreleased]: https://github.com/ruby/prism/compare/v0.21.0...HEAD
345
+ [0.21.0]: https://github.com/ruby/prism/compare/v0.20.0...v0.21.0
327
346
  [0.20.0]: https://github.com/ruby/prism/compare/v0.19.0...v0.20.0
328
347
  [0.19.0]: https://github.com/ruby/prism/compare/v0.18.0...v0.19.0
329
348
  [0.18.0]: https://github.com/ruby/prism/compare/v0.17.1...v0.18.0
@@ -9,7 +9,7 @@ The `parser` gem provides multiple parsers to support different versions of the
9
9
  You can use the `prism` parser like you would any other. After requiring the parser, you should be able to call any of the regular `Parser::Base` APIs that you would normally use.
10
10
 
11
11
  ```ruby
12
- require "prism/translation/parser"
12
+ require "prism"
13
13
 
14
14
  Prism::Translation::Parser.parse_file("path/to/file.rb")
15
15
  ```
@@ -1,7 +1,7 @@
1
1
  #ifndef PRISM_EXT_NODE_H
2
2
  #define PRISM_EXT_NODE_H
3
3
 
4
- #define EXPECTED_PRISM_VERSION "0.20.0"
4
+ #define EXPECTED_PRISM_VERSION "0.21.0"
5
5
 
6
6
  #include <ruby.h>
7
7
  #include <ruby/encoding.h>
data/include/prism/ast.h CHANGED
@@ -1042,7 +1042,7 @@ static const pm_node_flags_t PM_NODE_FLAG_COMMON_MASK = (1 << (PM_NODE_FLAG_BITS
1042
1042
  * Cast the type to an enum to allow the compiler to provide exhaustiveness
1043
1043
  * checking.
1044
1044
  */
1045
- #define PM_NODE_TYPE(node) ((enum pm_node_type) node->type)
1045
+ #define PM_NODE_TYPE(node) ((enum pm_node_type) (node)->type)
1046
1046
 
1047
1047
  /**
1048
1048
  * Return true if the type of the given node matches the given type.
@@ -626,7 +626,7 @@ struct pm_parser {
626
626
  * This is the path of the file being parsed. We use the filepath when
627
627
  * constructing SourceFileNodes.
628
628
  */
629
- pm_string_t filepath_string;
629
+ pm_string_t filepath;
630
630
 
631
631
  /**
632
632
  * This constant pool keeps all of the constants defined throughout the file
@@ -154,6 +154,17 @@ bool pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity);
154
154
  */
155
155
  pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id);
156
156
 
157
+ /**
158
+ * Find a constant in a constant pool. Returns the id of the constant, or 0 if
159
+ * the constant is not found.
160
+ *
161
+ * @param pool The pool to find the constant in.
162
+ * @param start A pointer to the start of the constant.
163
+ * @param length The length of the constant.
164
+ * @return The id of the constant.
165
+ */
166
+ pm_constant_id_t pm_constant_pool_find(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
167
+
157
168
  /**
158
169
  * Insert a constant into a constant pool that is a slice of a source string.
159
170
  * Returns the id of the constant, or 0 if any potential calls to resize fail.
@@ -14,7 +14,7 @@
14
14
  /**
15
15
  * The minor version of the Prism library as an int.
16
16
  */
17
- #define PRISM_VERSION_MINOR 20
17
+ #define PRISM_VERSION_MINOR 21
18
18
 
19
19
  /**
20
20
  * The patch version of the Prism library as an int.
@@ -24,6 +24,6 @@
24
24
  /**
25
25
  * The version of the Prism library as a constant string.
26
26
  */
27
- #define PRISM_VERSION "0.20.0"
27
+ #define PRISM_VERSION "0.21.0"
28
28
 
29
29
  #endif
@@ -27,7 +27,7 @@ module Prism
27
27
 
28
28
  # The minor version of prism that we are expecting to find in the serialized
29
29
  # strings.
30
- MINOR_VERSION = 20
30
+ MINOR_VERSION = 21
31
31
 
32
32
  # The patch version of prism that we are expecting to find in the serialized
33
33
  # strings.
@@ -105,14 +105,18 @@ module Prism
105
105
  # { a: 1 }
106
106
  # ^^^^
107
107
  def visit_assoc_node(node)
108
- if node.value.is_a?(ImplicitNode)
109
- builder.pair_label([node.key.slice.chomp(":"), srange(node.key.location)])
110
- elsif in_pattern && node.value.nil?
111
- if node.key.is_a?(SymbolNode)
112
- builder.match_hash_var([node.key.unescaped, srange(node.key.location)])
108
+ if in_pattern
109
+ if node.value.is_a?(ImplicitNode)
110
+ if node.key.is_a?(SymbolNode)
111
+ builder.match_hash_var([node.key.unescaped, srange(node.key.location)])
112
+ else
113
+ builder.match_hash_var_from_str(token(node.key.opening_loc), visit_all(node.key.parts), token(node.key.closing_loc))
114
+ end
113
115
  else
114
- builder.match_hash_var_from_str(token(node.key.opening_loc), visit_all(node.key.parts), token(node.key.closing_loc))
116
+ builder.pair_keyword([node.key.unescaped, srange(node.key.location)], visit(node.value))
115
117
  end
118
+ elsif node.value.is_a?(ImplicitNode)
119
+ builder.pair_label([node.key.unescaped, srange(node.key.location)])
116
120
  elsif node.operator_loc
117
121
  builder.pair(visit(node.key), token(node.operator_loc), visit(node.value))
118
122
  elsif node.key.is_a?(SymbolNode) && node.key.opening_loc.nil?
@@ -241,53 +245,51 @@ module Prism
241
245
  block = nil
242
246
  end
243
247
 
248
+ if node.call_operator_loc.nil?
249
+ case name
250
+ when :!
251
+ return visit_block(builder.not_op(token(node.message_loc), token(node.opening_loc), visit(node.receiver), token(node.closing_loc)), block)
252
+ when :[]
253
+ return visit_block(builder.index(visit(node.receiver), token(node.opening_loc), visit_all(arguments), token(node.closing_loc)), block)
254
+ when :[]=
255
+ if node.message != "[]=" && node.arguments && block.nil? && !node.safe_navigation?
256
+ return visit_block(
257
+ builder.assign(
258
+ builder.index_asgn(
259
+ visit(node.receiver),
260
+ token(node.opening_loc),
261
+ visit_all(node.arguments.arguments[...-1]),
262
+ token(node.closing_loc),
263
+ ),
264
+ srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset, ["="]),
265
+ visit(node.arguments.arguments.last)
266
+ ),
267
+ block
268
+ )
269
+ end
270
+ end
271
+ end
272
+
273
+ message_loc = node.message_loc
274
+ call_operator_loc = node.call_operator_loc
275
+ call_operator = [{ "." => :dot, "&." => :anddot, "::" => "::" }.fetch(call_operator_loc.slice), srange(call_operator_loc)] if call_operator_loc
276
+
244
277
  visit_block(
245
- if name == :!
246
- builder.not_op(
247
- token(node.message_loc),
248
- token(node.opening_loc),
249
- visit(node.receiver),
250
- token(node.closing_loc)
278
+ if name.end_with?("=") && !message_loc.slice.end_with?("=") && node.arguments && block.nil?
279
+ builder.assign(
280
+ builder.attr_asgn(visit(node.receiver), call_operator, token(message_loc)),
281
+ srange_find(message_loc.end_offset, node.arguments.location.start_offset, ["="]),
282
+ visit(node.arguments.arguments.last)
251
283
  )
252
- elsif name == :[]
253
- builder.index(
284
+ else
285
+ builder.call_method(
254
286
  visit(node.receiver),
287
+ call_operator,
288
+ message_loc ? [node.name, srange(message_loc)] : nil,
255
289
  token(node.opening_loc),
256
290
  visit_all(arguments),
257
291
  token(node.closing_loc)
258
292
  )
259
- elsif name == :[]= && node.message != "[]=" && node.arguments && block.nil?
260
- builder.assign(
261
- builder.index_asgn(
262
- visit(node.receiver),
263
- token(node.opening_loc),
264
- visit_all(node.arguments.arguments[...-1]),
265
- token(node.closing_loc),
266
- ),
267
- srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset, ["="]),
268
- visit(node.arguments.arguments.last)
269
- )
270
- else
271
- message_loc = node.message_loc
272
- call_operator_loc = node.call_operator_loc
273
- call_operator = [{ "." => :dot, "&." => :anddot, "::" => "::" }.fetch(call_operator_loc.slice), srange(call_operator_loc)] if call_operator_loc
274
-
275
- if name.end_with?("=") && !message_loc.slice.end_with?("=") && node.arguments && block.nil?
276
- builder.assign(
277
- builder.attr_asgn(visit(node.receiver), call_operator, token(message_loc)),
278
- srange_find(message_loc.end_offset, node.arguments.location.start_offset, ["="]),
279
- visit(node.arguments.arguments.last)
280
- )
281
- else
282
- builder.call_method(
283
- visit(node.receiver),
284
- call_operator,
285
- message_loc ? [node.name, srange(message_loc)] : nil,
286
- token(node.opening_loc),
287
- visit_all(arguments),
288
- token(node.closing_loc)
289
- )
290
- end
291
293
  end,
292
294
  block
293
295
  )
@@ -519,8 +521,6 @@ module Prism
519
521
  # def self.foo; end
520
522
  # ^^^^^^^^^^^^^^^^^
521
523
  def visit_def_node(node)
522
- forwarding = find_forwarding(node.parameters)
523
-
524
524
  if node.equal_loc
525
525
  if node.receiver
526
526
  builder.def_endless_singleton(
@@ -530,7 +530,7 @@ module Prism
530
530
  token(node.name_loc),
531
531
  builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false),
532
532
  token(node.equal_loc),
533
- node.body&.accept(copy_compiler(forwarding: forwarding))
533
+ node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters)))
534
534
  )
535
535
  else
536
536
  builder.def_endless_method(
@@ -538,7 +538,7 @@ module Prism
538
538
  token(node.name_loc),
539
539
  builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false),
540
540
  token(node.equal_loc),
541
- node.body&.accept(copy_compiler(forwarding: forwarding))
541
+ node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters)))
542
542
  )
543
543
  end
544
544
  elsif node.receiver
@@ -548,7 +548,7 @@ module Prism
548
548
  token(node.operator_loc),
549
549
  token(node.name_loc),
550
550
  builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false),
551
- node.body&.accept(copy_compiler(forwarding: forwarding)),
551
+ node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters))),
552
552
  token(node.end_keyword_loc)
553
553
  )
554
554
  else
@@ -556,7 +556,7 @@ module Prism
556
556
  token(node.def_keyword_loc),
557
557
  token(node.name_loc),
558
558
  builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false),
559
- node.body&.accept(copy_compiler(forwarding: forwarding)),
559
+ node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters))),
560
560
  token(node.end_keyword_loc)
561
561
  )
562
562
  end
@@ -614,9 +614,7 @@ module Prism
614
614
  # foo => [*, bar, *]
615
615
  # ^^^^^^^^^^^
616
616
  def visit_find_pattern_node(node)
617
- elements = [*node.requireds]
618
- elements << node.rest if !node.rest.nil? && !node.rest.is_a?(ImplicitRestNode)
619
- elements.concat(node.posts)
617
+ elements = [node.left, *node.requireds, node.right]
620
618
 
621
619
  if node.constant
622
620
  builder.const_pattern(visit(node.constant), token(node.opening_loc), builder.find_pattern(nil, visit_all(elements), nil), token(node.closing_loc))
@@ -993,24 +991,24 @@ module Prism
993
991
 
994
992
  # -> {}
995
993
  def visit_lambda_node(node)
994
+ parameters = node.parameters
995
+
996
996
  builder.block(
997
997
  builder.call_lambda(token(node.operator_loc)),
998
998
  [node.opening, srange(node.opening_loc)],
999
- if node.parameters
1000
- if node.parameters.is_a?(NumberedParametersNode)
1001
- visit(node.parameters)
1002
- else
1003
- builder.args(
1004
- token(node.parameters.opening_loc),
1005
- visit(node.parameters),
1006
- token(node.parameters.closing_loc),
1007
- false
1008
- )
1009
- end
1010
- else
999
+ if parameters.nil?
1011
1000
  builder.args(nil, [], nil, false)
1001
+ elsif node.parameters.is_a?(NumberedParametersNode)
1002
+ visit(node.parameters)
1003
+ else
1004
+ builder.args(
1005
+ token(node.parameters.opening_loc),
1006
+ visit(node.parameters),
1007
+ token(node.parameters.closing_loc),
1008
+ false
1009
+ )
1012
1010
  end,
1013
- node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters&.parameters))),
1011
+ node.body&.accept(copy_compiler(forwarding: parameters.is_a?(NumberedParametersNode) ? [] : find_forwarding(parameters&.parameters))),
1014
1012
  [node.closing, srange(node.closing_loc)]
1015
1013
  )
1016
1014
  end
@@ -1096,7 +1094,7 @@ module Prism
1096
1094
  # case of a syntax error. The parser gem doesn't have such a concept, so
1097
1095
  # we invent our own here.
1098
1096
  def visit_missing_node(node)
1099
- raise CompilationError, "Cannot compile missing nodes"
1097
+ ::AST::Node.new(:missing, [], location: ::Parser::Source::Map.new(srange(node.location)))
1100
1098
  end
1101
1099
 
1102
1100
  # module Foo; end
@@ -1727,29 +1725,29 @@ module Prism
1727
1725
  # Visit a block node on a call.
1728
1726
  def visit_block(call, block)
1729
1727
  if block
1728
+ parameters = block.parameters
1729
+
1730
1730
  builder.block(
1731
1731
  call,
1732
1732
  token(block.opening_loc),
1733
- if (parameters = block.parameters)
1734
- if parameters.is_a?(NumberedParametersNode)
1735
- visit(parameters)
1736
- else
1737
- builder.args(
1738
- token(parameters.opening_loc),
1739
- if procarg0?(parameters.parameters)
1740
- parameter = parameters.parameters.requireds.first
1741
- [builder.procarg0(visit(parameter))].concat(visit_all(parameters.locals))
1742
- else
1743
- visit(parameters)
1744
- end,
1745
- token(parameters.closing_loc),
1746
- false
1747
- )
1748
- end
1749
- else
1733
+ if parameters.nil?
1750
1734
  builder.args(nil, [], nil, false)
1735
+ elsif parameters.is_a?(NumberedParametersNode)
1736
+ visit(parameters)
1737
+ else
1738
+ builder.args(
1739
+ token(parameters.opening_loc),
1740
+ if procarg0?(parameters.parameters)
1741
+ parameter = parameters.parameters.requireds.first
1742
+ [builder.procarg0(visit(parameter))].concat(visit_all(parameters.locals))
1743
+ else
1744
+ visit(parameters)
1745
+ end,
1746
+ token(parameters.closing_loc),
1747
+ false
1748
+ )
1751
1749
  end,
1752
- block.body&.accept(copy_compiler(forwarding: find_forwarding(block.parameters&.parameters))),
1750
+ block.body&.accept(copy_compiler(forwarding: parameters.is_a?(NumberedParametersNode) ? [] : find_forwarding(parameters&.parameters))),
1753
1751
  token(block.closing_loc)
1754
1752
  )
1755
1753
  else
@@ -1762,9 +1760,9 @@ module Prism
1762
1760
  children = []
1763
1761
  node.parts.each do |part|
1764
1762
  pushing =
1765
- if part.is_a?(StringNode) && part.unescaped.count("\n") > 1
1766
- unescaped = part.unescaped.split("\n")
1767
- escaped = part.content.split("\n")
1763
+ if part.is_a?(StringNode) && part.unescaped.include?("\n")
1764
+ unescaped = part.unescaped.lines(chomp: true)
1765
+ escaped = part.content.lines(chomp: true)
1768
1766
 
1769
1767
  escaped_lengths =
1770
1768
  if node.opening.end_with?("'")
@@ -1779,7 +1777,6 @@ module Prism
1779
1777
  unescaped.zip(escaped_lengths).map do |unescaped_line, escaped_length|
1780
1778
  end_offset = start_offset + (escaped_length || 0)
1781
1779
  inner_part = builder.string_internal(["#{unescaped_line}\n", srange_offsets(start_offset, end_offset)])
1782
-
1783
1780
  start_offset = end_offset
1784
1781
  inner_part
1785
1782
  end
@@ -26,7 +26,7 @@ module Prism
26
26
  Racc_debug_parser = false # :nodoc:
27
27
 
28
28
  def version # :nodoc:
29
- 33
29
+ 34
30
30
  end
31
31
 
32
32
  # The default encoding for Ruby files is UTF-8.
@@ -42,9 +42,10 @@ module Prism
42
42
  @source_buffer = source_buffer
43
43
  source = source_buffer.source
44
44
 
45
- result = unwrap(Prism.parse(source, filepath: source_buffer.name))
45
+ offset_cache = build_offset_cache(source)
46
+ result = unwrap(Prism.parse(source, filepath: source_buffer.name), offset_cache)
46
47
 
47
- build_ast(result.value, build_offset_cache(source))
48
+ build_ast(result.value, offset_cache)
48
49
  ensure
49
50
  @source_buffer = nil
50
51
  end
@@ -55,7 +56,7 @@ module Prism
55
56
  source = source_buffer.source
56
57
 
57
58
  offset_cache = build_offset_cache(source)
58
- result = unwrap(Prism.parse(source, filepath: source_buffer.name))
59
+ result = unwrap(Prism.parse(source, filepath: source_buffer.name), offset_cache)
59
60
 
60
61
  [
61
62
  build_ast(result.value, offset_cache),
@@ -72,7 +73,7 @@ module Prism
72
73
  source = source_buffer.source
73
74
 
74
75
  offset_cache = build_offset_cache(source)
75
- result = unwrap(Prism.parse_lex(source, filepath: source_buffer.name))
76
+ result = unwrap(Prism.parse_lex(source, filepath: source_buffer.name), offset_cache)
76
77
 
77
78
  program, tokens = result.value
78
79
 
@@ -93,16 +94,23 @@ module Prism
93
94
 
94
95
  private
95
96
 
97
+ # This is a hook to allow consumers to disable some errors if they don't
98
+ # want them to block creating the syntax tree.
99
+ def valid_error?(error)
100
+ true
101
+ end
102
+
96
103
  # If there was a error generated during the parse, then raise an
97
104
  # appropriate syntax error. Otherwise return the result.
98
- def unwrap(result)
99
- return result if result.success?
105
+ def unwrap(result, offset_cache)
106
+ result.errors.each do |error|
107
+ next unless valid_error?(error)
100
108
 
101
- error = result.errors.first
102
- offset_cache = build_offset_cache(source_buffer.source)
109
+ location = build_range(error.location, offset_cache)
110
+ diagnostics.process(Diagnostic.new(error.message, location))
111
+ end
103
112
 
104
- diagnostic = Diagnostic.new(error.message, build_range(error.location, offset_cache))
105
- raise ::Parser::SyntaxError, diagnostic
113
+ result
106
114
  end
107
115
 
108
116
  # Prism deals with offsets in bytes, while the parser gem deals with
data/prism.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |spec|
4
4
  spec.name = "prism"
5
- spec.version = "0.20.0"
5
+ spec.version = "0.21.0"
6
6
  spec.authors = ["Shopify"]
7
7
  spec.email = ["ruby@shopify.com"]
8
8
 
data/src/encoding.c CHANGED
@@ -2252,7 +2252,7 @@ static const uint8_t pm_utf_8_dfa[] = {
2252
2252
  */
2253
2253
  static pm_unicode_codepoint_t
2254
2254
  pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
2255
- assert(n >= 1);
2255
+ assert(n >= 0);
2256
2256
  size_t maximum = (size_t) n;
2257
2257
 
2258
2258
  uint32_t codepoint;
data/src/prism.c CHANGED
@@ -870,6 +870,105 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
870
870
  pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
871
871
  }
872
872
 
873
+ /******************************************************************************/
874
+ /* Basic character checks */
875
+ /******************************************************************************/
876
+
877
+ /**
878
+ * This function is used extremely frequently to lex all of the identifiers in a
879
+ * source file, so it's important that it be as fast as possible. For this
880
+ * reason we have the encoding_changed boolean to check if we need to go through
881
+ * the function pointer or can just directly use the UTF-8 functions.
882
+ */
883
+ static inline size_t
884
+ char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b) {
885
+ if (parser->encoding_changed) {
886
+ size_t width;
887
+ if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
888
+ return width;
889
+ } else if (*b == '_') {
890
+ return 1;
891
+ } else if (*b >= 0x80) {
892
+ return parser->encoding->char_width(b, parser->end - b);
893
+ } else {
894
+ return 0;
895
+ }
896
+ } else if (*b < 0x80) {
897
+ return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
898
+ } else {
899
+ return pm_encoding_utf_8_char_width(b, parser->end - b);
900
+ }
901
+ }
902
+
903
+ /**
904
+ * Similar to char_is_identifier but this function assumes that the encoding
905
+ * has not been changed.
906
+ */
907
+ static inline size_t
908
+ char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
909
+ if (*b < 0x80) {
910
+ return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
911
+ } else {
912
+ return pm_encoding_utf_8_char_width(b, end - b);
913
+ }
914
+ }
915
+
916
+ /**
917
+ * Like the above, this function is also used extremely frequently to lex all of
918
+ * the identifiers in a source file once the first character has been found. So
919
+ * it's important that it be as fast as possible.
920
+ */
921
+ static inline size_t
922
+ char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
923
+ if (parser->encoding_changed) {
924
+ size_t width;
925
+ if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
926
+ return width;
927
+ } else if (*b == '_') {
928
+ return 1;
929
+ } else if (*b >= 0x80) {
930
+ return parser->encoding->char_width(b, parser->end - b);
931
+ } else {
932
+ return 0;
933
+ }
934
+ }
935
+ return char_is_identifier_utf8(b, parser->end);
936
+ }
937
+
938
+ // Here we're defining a perfect hash for the characters that are allowed in
939
+ // global names. This is used to quickly check the next character after a $ to
940
+ // see if it's a valid character for a global name.
941
+ #define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
942
+ #define PUNCT(idx) ( \
943
+ BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
944
+ BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
945
+ BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
946
+ BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
947
+ BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
948
+ BIT('0', idx))
949
+
950
+ const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
951
+
952
+ #undef BIT
953
+ #undef PUNCT
954
+
955
+ static inline bool
956
+ char_is_global_name_punctuation(const uint8_t b) {
957
+ const unsigned int i = (const unsigned int) b;
958
+ if (i <= 0x20 || 0x7e < i) return false;
959
+
960
+ return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
961
+ }
962
+
963
+ static inline bool
964
+ token_is_setter_name(pm_token_t *token) {
965
+ return (
966
+ (token->type == PM_TOKEN_IDENTIFIER) &&
967
+ (token->end - token->start >= 2) &&
968
+ (token->end[-1] == '=')
969
+ );
970
+ }
971
+
873
972
  /******************************************************************************/
874
973
  /* Node flag handling functions */
875
974
  /******************************************************************************/
@@ -1923,11 +2022,12 @@ pm_call_node_index_p(pm_call_node_t *node) {
1923
2022
  * operator assignment.
1924
2023
  */
1925
2024
  static inline bool
1926
- pm_call_node_writable_p(pm_call_node_t *node) {
2025
+ pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
1927
2026
  return (
1928
2027
  (node->message_loc.start != NULL) &&
1929
2028
  (node->message_loc.end[-1] != '!') &&
1930
2029
  (node->message_loc.end[-1] != '?') &&
2030
+ char_is_identifier_start(parser, node->message_loc.start) &&
1931
2031
  (node->opening_loc.start == NULL) &&
1932
2032
  (node->arguments == NULL) &&
1933
2033
  (node->block == NULL)
@@ -2744,19 +2844,21 @@ pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *targ
2744
2844
  * Check if the receiver of a `def` node is allowed.
2745
2845
  */
2746
2846
  static void
2747
- pm_check_def_receiver(pm_parser_t *parser, pm_node_t *receiver) {
2748
- switch (receiver->type) {
2847
+ pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
2848
+ switch (PM_NODE_TYPE(node)) {
2749
2849
  case PM_BEGIN_NODE: {
2750
- pm_begin_node_t *begin_node = (pm_begin_node_t *)receiver;
2751
- pm_check_def_receiver(parser, (pm_node_t *) begin_node->statements);
2850
+ const pm_begin_node_t *cast = (pm_begin_node_t *) node;
2851
+ if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements);
2752
2852
  break;
2753
2853
  }
2754
- case PM_PARENTHESES_NODE:
2755
- pm_check_def_receiver(parser, ((pm_parentheses_node_t *) receiver)->body);
2854
+ case PM_PARENTHESES_NODE: {
2855
+ const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
2856
+ if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
2756
2857
  break;
2858
+ }
2757
2859
  case PM_STATEMENTS_NODE: {
2758
- pm_statements_node_t *statements_node = (pm_statements_node_t *)receiver;
2759
- pm_check_def_receiver(parser, statements_node->body.nodes[statements_node->body.size - 1]);
2860
+ const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
2861
+ pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
2760
2862
  break;
2761
2863
  }
2762
2864
  case PM_ARRAY_NODE:
@@ -2775,7 +2877,10 @@ pm_check_def_receiver(pm_parser_t *parser, pm_node_t *receiver) {
2775
2877
  case PM_STRING_NODE:
2776
2878
  case PM_SYMBOL_NODE:
2777
2879
  case PM_X_STRING_NODE:
2778
- pm_parser_err_node(parser, receiver, PM_ERR_SINGLETON_FOR_LITERALS);
2880
+ pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
2881
+ break;
2882
+ default:
2883
+ break;
2779
2884
  }
2780
2885
  }
2781
2886
 
@@ -2807,7 +2912,7 @@ pm_def_node_create(
2807
2912
  }
2808
2913
 
2809
2914
  if ((receiver != NULL) && PM_NODE_TYPE_P(receiver, PM_PARENTHESES_NODE)) {
2810
- pm_check_def_receiver(parser, receiver);
2915
+ pm_def_node_receiver_check(parser, receiver);
2811
2916
  }
2812
2917
 
2813
2918
  *node = (pm_def_node_t) {
@@ -5330,7 +5435,7 @@ pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword)
5330
5435
  .flags = PM_NODE_FLAG_STATIC_LITERAL,
5331
5436
  .location = PM_LOCATION_TOKEN_VALUE(file_keyword),
5332
5437
  },
5333
- .filepath = parser->filepath_string,
5438
+ .filepath = parser->filepath
5334
5439
  };
5335
5440
 
5336
5441
  return node;
@@ -6220,6 +6325,16 @@ pm_parser_local_add_owned(pm_parser_t *parser, const uint8_t *start, size_t leng
6220
6325
  return constant_id;
6221
6326
  }
6222
6327
 
6328
+ /**
6329
+ * Add a local variable from a constant string to the current scope.
6330
+ */
6331
+ static pm_constant_id_t
6332
+ pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
6333
+ pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
6334
+ if (constant_id != 0) pm_parser_local_add(parser, constant_id);
6335
+ return constant_id;
6336
+ }
6337
+
6223
6338
  /**
6224
6339
  * Add a parameter name to the current scope and check whether the name of the
6225
6340
  * parameter is unique or not.
@@ -6259,105 +6374,6 @@ pm_parser_scope_pop(pm_parser_t *parser) {
6259
6374
  free(scope);
6260
6375
  }
6261
6376
 
6262
- /******************************************************************************/
6263
- /* Basic character checks */
6264
- /******************************************************************************/
6265
-
6266
- /**
6267
- * This function is used extremely frequently to lex all of the identifiers in a
6268
- * source file, so it's important that it be as fast as possible. For this
6269
- * reason we have the encoding_changed boolean to check if we need to go through
6270
- * the function pointer or can just directly use the UTF-8 functions.
6271
- */
6272
- static inline size_t
6273
- char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) {
6274
- if (parser->encoding_changed) {
6275
- size_t width;
6276
- if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
6277
- return width;
6278
- } else if (*b == '_') {
6279
- return 1;
6280
- } else if (*b >= 0x80) {
6281
- return parser->encoding->char_width(b, parser->end - b);
6282
- } else {
6283
- return 0;
6284
- }
6285
- } else if (*b < 0x80) {
6286
- return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
6287
- } else {
6288
- return pm_encoding_utf_8_char_width(b, parser->end - b);
6289
- }
6290
- }
6291
-
6292
- /**
6293
- * Similar to char_is_identifier but this function assumes that the encoding
6294
- * has not been changed.
6295
- */
6296
- static inline size_t
6297
- char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
6298
- if (*b < 0x80) {
6299
- return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
6300
- } else {
6301
- return pm_encoding_utf_8_char_width(b, end - b);
6302
- }
6303
- }
6304
-
6305
- /**
6306
- * Like the above, this function is also used extremely frequently to lex all of
6307
- * the identifiers in a source file once the first character has been found. So
6308
- * it's important that it be as fast as possible.
6309
- */
6310
- static inline size_t
6311
- char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
6312
- if (parser->encoding_changed) {
6313
- size_t width;
6314
- if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
6315
- return width;
6316
- } else if (*b == '_') {
6317
- return 1;
6318
- } else if (*b >= 0x80) {
6319
- return parser->encoding->char_width(b, parser->end - b);
6320
- } else {
6321
- return 0;
6322
- }
6323
- }
6324
- return char_is_identifier_utf8(b, parser->end);
6325
- }
6326
-
6327
- // Here we're defining a perfect hash for the characters that are allowed in
6328
- // global names. This is used to quickly check the next character after a $ to
6329
- // see if it's a valid character for a global name.
6330
- #define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
6331
- #define PUNCT(idx) ( \
6332
- BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
6333
- BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
6334
- BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
6335
- BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
6336
- BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
6337
- BIT('0', idx))
6338
-
6339
- const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
6340
-
6341
- #undef BIT
6342
- #undef PUNCT
6343
-
6344
- static inline bool
6345
- char_is_global_name_punctuation(const uint8_t b) {
6346
- const unsigned int i = (const unsigned int) b;
6347
- if (i <= 0x20 || 0x7e < i) return false;
6348
-
6349
- return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
6350
- }
6351
-
6352
- static inline bool
6353
- token_is_setter_name(pm_token_t *token) {
6354
- return (
6355
- (token->type == PM_TOKEN_IDENTIFIER) &&
6356
- (token->end - token->start >= 2) &&
6357
- (token->end[-1] == '=')
6358
- );
6359
- }
6360
-
6361
6377
  /******************************************************************************/
6362
6378
  /* Stack helpers */
6363
6379
  /******************************************************************************/
@@ -7673,6 +7689,28 @@ escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte
7673
7689
  pm_buffer_append_byte(buffer, byte);
7674
7690
  }
7675
7691
 
7692
+ /**
7693
+ * Write each byte of the given escaped character into the buffer.
7694
+ */
7695
+ static inline void
7696
+ escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
7697
+ size_t width;
7698
+ if (parser->encoding_changed) {
7699
+ width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
7700
+ } else {
7701
+ width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
7702
+ }
7703
+
7704
+ // TODO: If the character is invalid in the given encoding, then we'll just
7705
+ // push one byte into the buffer. This should actually be an error.
7706
+ width = (width == 0) ? 1 : width;
7707
+
7708
+ for (size_t index = 0; index < width; index++) {
7709
+ escape_write_byte_encoded(parser, buffer, *parser->current.end);
7710
+ parser->current.end++;
7711
+ }
7712
+ }
7713
+
7676
7714
  /**
7677
7715
  * The regular expression engine doesn't support the same escape sequences as
7678
7716
  * Ruby does. So first we have to read the escape sequence, and then we have to
@@ -8011,7 +8049,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
8011
8049
  /* fallthrough */
8012
8050
  default: {
8013
8051
  if (parser->current.end < parser->end) {
8014
- escape_write_byte_encoded(parser, buffer, *parser->current.end++);
8052
+ escape_write_escape_encoded(parser, buffer);
8015
8053
  }
8016
8054
  return;
8017
8055
  }
@@ -8288,10 +8326,40 @@ typedef struct {
8288
8326
  * Push the given byte into the token buffer.
8289
8327
  */
8290
8328
  static inline void
8291
- pm_token_buffer_push(pm_token_buffer_t *token_buffer, uint8_t byte) {
8329
+ pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
8292
8330
  pm_buffer_append_byte(&token_buffer->buffer, byte);
8293
8331
  }
8294
8332
 
8333
+ /**
8334
+ * Append the given bytes into the token buffer.
8335
+ */
8336
+ static inline void
8337
+ pm_token_buffer_push_bytes(pm_token_buffer_t *token_buffer, const uint8_t *bytes, size_t length) {
8338
+ pm_buffer_append_bytes(&token_buffer->buffer, bytes, length);
8339
+ }
8340
+
8341
+ /**
8342
+ * Push an escaped character into the token buffer.
8343
+ */
8344
+ static inline void
8345
+ pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
8346
+ // First, determine the width of the character to be escaped.
8347
+ size_t width;
8348
+ if (parser->encoding_changed) {
8349
+ width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8350
+ } else {
8351
+ width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
8352
+ }
8353
+
8354
+ // TODO: If the character is invalid in the given encoding, then we'll just
8355
+ // push one byte into the buffer. This should actually be an error.
8356
+ width = (width == 0 ? 1 : width);
8357
+
8358
+ // Now, push the bytes into the buffer.
8359
+ pm_token_buffer_push_bytes(token_buffer, parser->current.end, width);
8360
+ parser->current.end += width;
8361
+ }
8362
+
8295
8363
  /**
8296
8364
  * When we're about to return from lexing the current token and we know for sure
8297
8365
  * that we have found an escape sequence, this function is called to copy the
@@ -9704,18 +9772,18 @@ parser_lex(pm_parser_t *parser) {
9704
9772
  case '\t':
9705
9773
  case '\v':
9706
9774
  case '\\':
9707
- pm_token_buffer_push(&token_buffer, peeked);
9775
+ pm_token_buffer_push_byte(&token_buffer, peeked);
9708
9776
  parser->current.end++;
9709
9777
  break;
9710
9778
  case '\r':
9711
9779
  parser->current.end++;
9712
9780
  if (peek(parser) != '\n') {
9713
- pm_token_buffer_push(&token_buffer, '\r');
9781
+ pm_token_buffer_push_byte(&token_buffer, '\r');
9714
9782
  break;
9715
9783
  }
9716
9784
  /* fallthrough */
9717
9785
  case '\n':
9718
- pm_token_buffer_push(&token_buffer, '\n');
9786
+ pm_token_buffer_push_byte(&token_buffer, '\n');
9719
9787
 
9720
9788
  if (parser->heredoc_end) {
9721
9789
  // ... if we are on the same line as a heredoc,
@@ -9733,14 +9801,13 @@ parser_lex(pm_parser_t *parser) {
9733
9801
  break;
9734
9802
  default:
9735
9803
  if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
9736
- pm_token_buffer_push(&token_buffer, peeked);
9804
+ pm_token_buffer_push_byte(&token_buffer, peeked);
9737
9805
  parser->current.end++;
9738
9806
  } else if (lex_mode->as.list.interpolation) {
9739
9807
  escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
9740
9808
  } else {
9741
- pm_token_buffer_push(&token_buffer, '\\');
9742
- pm_token_buffer_push(&token_buffer, peeked);
9743
- parser->current.end++;
9809
+ pm_token_buffer_push_byte(&token_buffer, '\\');
9810
+ pm_token_buffer_push_escaped(&token_buffer, parser);
9744
9811
  }
9745
9812
 
9746
9813
  break;
@@ -9898,9 +9965,9 @@ parser_lex(pm_parser_t *parser) {
9898
9965
  parser->current.end++;
9899
9966
  if (peek(parser) != '\n') {
9900
9967
  if (lex_mode->as.regexp.terminator != '\r') {
9901
- pm_token_buffer_push(&token_buffer, '\\');
9968
+ pm_token_buffer_push_byte(&token_buffer, '\\');
9902
9969
  }
9903
- pm_token_buffer_push(&token_buffer, '\r');
9970
+ pm_token_buffer_push_byte(&token_buffer, '\r');
9904
9971
  break;
9905
9972
  }
9906
9973
  /* fallthrough */
@@ -9935,20 +10002,19 @@ parser_lex(pm_parser_t *parser) {
9935
10002
  case '$': case ')': case '*': case '+':
9936
10003
  case '.': case '>': case '?': case ']':
9937
10004
  case '^': case '|': case '}':
9938
- pm_token_buffer_push(&token_buffer, '\\');
10005
+ pm_token_buffer_push_byte(&token_buffer, '\\');
9939
10006
  break;
9940
10007
  default:
9941
10008
  break;
9942
10009
  }
9943
10010
 
9944
- pm_token_buffer_push(&token_buffer, peeked);
10011
+ pm_token_buffer_push_byte(&token_buffer, peeked);
9945
10012
  parser->current.end++;
9946
10013
  break;
9947
10014
  }
9948
10015
 
9949
- if (peeked < 0x80) pm_token_buffer_push(&token_buffer, '\\');
9950
- pm_token_buffer_push(&token_buffer, peeked);
9951
- parser->current.end++;
10016
+ if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer, '\\');
10017
+ pm_token_buffer_push_escaped(&token_buffer, parser);
9952
10018
  break;
9953
10019
  }
9954
10020
 
@@ -10115,23 +10181,23 @@ parser_lex(pm_parser_t *parser) {
10115
10181
 
10116
10182
  switch (peeked) {
10117
10183
  case '\\':
10118
- pm_token_buffer_push(&token_buffer, '\\');
10184
+ pm_token_buffer_push_byte(&token_buffer, '\\');
10119
10185
  parser->current.end++;
10120
10186
  break;
10121
10187
  case '\r':
10122
10188
  parser->current.end++;
10123
10189
  if (peek(parser) != '\n') {
10124
10190
  if (!lex_mode->as.string.interpolation) {
10125
- pm_token_buffer_push(&token_buffer, '\\');
10191
+ pm_token_buffer_push_byte(&token_buffer, '\\');
10126
10192
  }
10127
- pm_token_buffer_push(&token_buffer, '\r');
10193
+ pm_token_buffer_push_byte(&token_buffer, '\r');
10128
10194
  break;
10129
10195
  }
10130
10196
  /* fallthrough */
10131
10197
  case '\n':
10132
10198
  if (!lex_mode->as.string.interpolation) {
10133
- pm_token_buffer_push(&token_buffer, '\\');
10134
- pm_token_buffer_push(&token_buffer, '\n');
10199
+ pm_token_buffer_push_byte(&token_buffer, '\\');
10200
+ pm_token_buffer_push_byte(&token_buffer, '\n');
10135
10201
  }
10136
10202
 
10137
10203
  if (parser->heredoc_end) {
@@ -10150,17 +10216,16 @@ parser_lex(pm_parser_t *parser) {
10150
10216
  break;
10151
10217
  default:
10152
10218
  if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
10153
- pm_token_buffer_push(&token_buffer, peeked);
10219
+ pm_token_buffer_push_byte(&token_buffer, peeked);
10154
10220
  parser->current.end++;
10155
10221
  } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
10156
- pm_token_buffer_push(&token_buffer, peeked);
10222
+ pm_token_buffer_push_byte(&token_buffer, peeked);
10157
10223
  parser->current.end++;
10158
10224
  } else if (lex_mode->as.string.interpolation) {
10159
10225
  escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
10160
10226
  } else {
10161
- pm_token_buffer_push(&token_buffer, '\\');
10162
- pm_token_buffer_push(&token_buffer, peeked);
10163
- parser->current.end++;
10227
+ pm_token_buffer_push_byte(&token_buffer, '\\');
10228
+ pm_token_buffer_push_escaped(&token_buffer, parser);
10164
10229
  }
10165
10230
 
10166
10231
  break;
@@ -10417,21 +10482,20 @@ parser_lex(pm_parser_t *parser) {
10417
10482
  case '\r':
10418
10483
  parser->current.end++;
10419
10484
  if (peek(parser) != '\n') {
10420
- pm_token_buffer_push(&token_buffer, '\\');
10421
- pm_token_buffer_push(&token_buffer, '\r');
10485
+ pm_token_buffer_push_byte(&token_buffer, '\\');
10486
+ pm_token_buffer_push_byte(&token_buffer, '\r');
10422
10487
  break;
10423
10488
  }
10424
10489
  /* fallthrough */
10425
10490
  case '\n':
10426
- pm_token_buffer_push(&token_buffer, '\\');
10427
- pm_token_buffer_push(&token_buffer, '\n');
10491
+ pm_token_buffer_push_byte(&token_buffer, '\\');
10492
+ pm_token_buffer_push_byte(&token_buffer, '\n');
10428
10493
  token_buffer.cursor = parser->current.end + 1;
10429
10494
  breakpoint = parser->current.end;
10430
10495
  continue;
10431
10496
  default:
10432
- parser->current.end++;
10433
- pm_token_buffer_push(&token_buffer, '\\');
10434
- pm_token_buffer_push(&token_buffer, peeked);
10497
+ pm_token_buffer_push_byte(&token_buffer, '\\');
10498
+ pm_token_buffer_push_escaped(&token_buffer, parser);
10435
10499
  break;
10436
10500
  }
10437
10501
  } else {
@@ -10439,7 +10503,7 @@ parser_lex(pm_parser_t *parser) {
10439
10503
  case '\r':
10440
10504
  parser->current.end++;
10441
10505
  if (peek(parser) != '\n') {
10442
- pm_token_buffer_push(&token_buffer, '\r');
10506
+ pm_token_buffer_push_byte(&token_buffer, '\r');
10443
10507
  break;
10444
10508
  }
10445
10509
  /* fallthrough */
@@ -10715,14 +10779,6 @@ match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
10715
10779
  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
10716
10780
  }
10717
10781
 
10718
- /**
10719
- * Returns true if the current token is any of the five given types.
10720
- */
10721
- static inline bool
10722
- match5(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5) {
10723
- return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5);
10724
- }
10725
-
10726
10782
  /**
10727
10783
  * Returns true if the current token is any of the six given types.
10728
10784
  */
@@ -11359,7 +11415,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
11359
11415
  break;
11360
11416
  }
11361
11417
 
11362
- // If we have a terminator, then we will parse all consequtive terminators
11418
+ // If we have a terminator, then we will parse all consecutive terminators
11363
11419
  // and then continue parsing the statements list.
11364
11420
  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
11365
11421
  // If we have a terminator, then we will continue parsing the statements
@@ -13149,6 +13205,15 @@ outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
13149
13205
  return false;
13150
13206
  }
13151
13207
 
13208
+ /**
13209
+ * These are the names of the various numbered parameters. We have them here so
13210
+ * that when we insert them into the constant pool we can use a constant string
13211
+ * and not have to allocate.
13212
+ */
13213
+ static const char * const pm_numbered_parameter_names[] = {
13214
+ "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
13215
+ };
13216
+
13152
13217
  /**
13153
13218
  * Parse an identifier into either a local variable read. If the local variable
13154
13219
  * is not found, it returns NULL instead.
@@ -13171,12 +13236,10 @@ parse_variable(pm_parser_t *parser) {
13171
13236
  pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
13172
13237
  } else {
13173
13238
  // Indicate that this scope is using numbered params so that child
13174
- // scopes cannot.
13175
- uint8_t number = parser->previous.start[1];
13176
-
13177
- // We subtract the value for the character '0' to get the actual
13178
- // integer value of the number (only _1 through _9 are valid)
13179
- uint8_t numbered_parameters = (uint8_t) (number - '0');
13239
+ // scopes cannot. We subtract the value for the character '0' to get
13240
+ // the actual integer value of the number (only _1 through _9 are
13241
+ // valid).
13242
+ uint8_t numbered_parameters = (uint8_t) (parser->previous.start[1] - '0');
13180
13243
  if (numbered_parameters > parser->current_scope->numbered_parameters) {
13181
13244
  parser->current_scope->numbered_parameters = numbered_parameters;
13182
13245
  pm_parser_numbered_parameters_set(parser, numbered_parameters);
@@ -13187,21 +13250,13 @@ parse_variable(pm_parser_t *parser) {
13187
13250
  // referencing _2 means that _1 must exist. Therefore here we
13188
13251
  // loop through all of the possibilities and add them into the
13189
13252
  // constant pool.
13190
- uint8_t current = '1';
13191
- uint8_t *value;
13192
-
13193
- while (current < number) {
13194
- value = malloc(2);
13195
- value[0] = '_';
13196
- value[1] = current++;
13197
- pm_parser_local_add_owned(parser, value, 2);
13253
+ for (uint8_t numbered_parameter = 1; numbered_parameter <= numbered_parameters - 1; numbered_parameter++) {
13254
+ pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameter - 1], 2);
13198
13255
  }
13199
13256
 
13200
- // Now we can add the actual token that is being used. For
13201
- // this one we can add a shared version since it is directly
13202
- // referenced in the source.
13203
- pm_parser_local_add_token(parser, &parser->previous);
13204
- return pm_local_variable_read_node_create(parser, &parser->previous, 0);
13257
+ // Finally we can create the local variable read node.
13258
+ pm_constant_id_t name_id = pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameters - 1], 2);
13259
+ return pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
13205
13260
  }
13206
13261
  }
13207
13262
 
@@ -14010,7 +14065,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
14010
14065
  // Gather up all of the patterns into the list.
14011
14066
  while (accept1(parser, PM_TOKEN_COMMA)) {
14012
14067
  // Break early here in case we have a trailing comma.
14013
- if (match5(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
14068
+ if (match6(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
14014
14069
  node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
14015
14070
  pm_node_list_append(&nodes, node);
14016
14071
  break;
@@ -16927,7 +16982,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16927
16982
  }
16928
16983
 
16929
16984
  // If this node cannot be writable, then we have an error.
16930
- if (pm_call_node_writable_p(cast)) {
16985
+ if (pm_call_node_writable_p(parser, cast)) {
16931
16986
  parse_write_name(parser, &cast->name);
16932
16987
  } else {
16933
16988
  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
@@ -17038,7 +17093,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
17038
17093
  }
17039
17094
 
17040
17095
  // If this node cannot be writable, then we have an error.
17041
- if (pm_call_node_writable_p(cast)) {
17096
+ if (pm_call_node_writable_p(parser, cast)) {
17042
17097
  parse_write_name(parser, &cast->name);
17043
17098
  } else {
17044
17099
  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
@@ -17159,7 +17214,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
17159
17214
  }
17160
17215
 
17161
17216
  // If this node cannot be writable, then we have an error.
17162
- if (pm_call_node_writable_p(cast)) {
17217
+ if (pm_call_node_writable_p(parser, cast)) {
17163
17218
  parse_write_name(parser, &cast->name);
17164
17219
  } else {
17165
17220
  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
@@ -17751,7 +17806,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
17751
17806
  .encoding_changed_callback = NULL,
17752
17807
  .encoding_comment_start = source,
17753
17808
  .lex_callback = NULL,
17754
- .filepath_string = { 0 },
17809
+ .filepath = { 0 },
17755
17810
  .constant_pool = { 0 },
17756
17811
  .newline_list = { 0 },
17757
17812
  .integer_base = 0,
@@ -17794,7 +17849,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
17794
17849
  // If options were provided to this parse, establish them here.
17795
17850
  if (options != NULL) {
17796
17851
  // filepath option
17797
- parser->filepath_string = options->filepath;
17852
+ parser->filepath = options->filepath;
17798
17853
 
17799
17854
  // line option
17800
17855
  parser->start_line = options->line;
@@ -17896,7 +17951,7 @@ pm_magic_comment_list_free(pm_list_t *list) {
17896
17951
  */
17897
17952
  PRISM_EXPORTED_FUNCTION void
17898
17953
  pm_parser_free(pm_parser_t *parser) {
17899
- pm_string_free(&parser->filepath_string);
17954
+ pm_string_free(&parser->filepath);
17900
17955
  pm_diagnostic_list_free(&parser->error_list);
17901
17956
  pm_diagnostic_list_free(&parser->warning_list);
17902
17957
  pm_comment_list_free(&parser->comment_list);
@@ -18060,7 +18115,9 @@ pm_parser_errors_format_sort(const pm_list_t *error_list, const pm_newline_list_
18060
18115
 
18061
18116
  // Now we're going to shift all of the errors after this one down one
18062
18117
  // index to make room for the new error.
18063
- memcpy(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
18118
+ if (index + 1 < error_list->size) {
18119
+ memmove(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
18120
+ }
18064
18121
 
18065
18122
  // Finally, we'll insert the error into the array.
18066
18123
  uint32_t column_end;
@@ -181,6 +181,31 @@ pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t
181
181
  return &pool->constants[constant_id - 1];
182
182
  }
183
183
 
184
+ /**
185
+ * Find a constant in a constant pool. Returns the id of the constant, or 0 if
186
+ * the constant is not found.
187
+ */
188
+ pm_constant_id_t
189
+ pm_constant_pool_find(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
190
+ assert(is_power_of_two(pool->capacity));
191
+ const uint32_t mask = pool->capacity - 1;
192
+
193
+ uint32_t hash = pm_constant_pool_hash(start, length);
194
+ uint32_t index = hash & mask;
195
+ pm_constant_pool_bucket_t *bucket;
196
+
197
+ while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
198
+ pm_constant_t *constant = &pool->constants[bucket->id - 1];
199
+ if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
200
+ return bucket->id;
201
+ }
202
+
203
+ index = (index + 1) & mask;
204
+ }
205
+
206
+ return PM_CONSTANT_ID_UNSET;
207
+ }
208
+
184
209
  /**
185
210
  * Insert a constant into a constant pool and return its index in the pool.
186
211
  */
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: prism
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.20.0
4
+ version: 0.21.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shopify
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-02-01 00:00:00.000000000 Z
11
+ date: 2024-02-05 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: