prism 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 742f60637c4fd77f62b7713f484e87d70249e6e9dfeefc2c2ba0bfe667eed034
4
- data.tar.gz: 67bd239271c1d848536389668722a0617419b19b6bb250ed39cf17b7a414da4c
3
+ metadata.gz: b4d054a1268bf7f8b5947f30ad244c4713c850911e79c1ba469eca0ac36bc47c
4
+ data.tar.gz: b77e29c93584b79759381d75cfb5ad0753fe8d5f92863cada81895bb67f17572
5
5
  SHA512:
6
- metadata.gz: d0a90337f2635d35b08b0932ad6d928610406bb3f908c1b7b601f5fcb08b404604745f93bffd9a4bb84fc13cde0b6b4a71015390546a077daa4e05d7d8cf965e
7
- data.tar.gz: 231693786022302c486d3c4ea2c8841636e3c94cb37f6b5e410f1ab6ac4ce7fc12e53cd4d67f43d6a38f3292867fda808e655391528619d626823a5163cbf722
6
+ metadata.gz: 00fa781d854c4f9b716b238c392e48f3bd946b52a5ea100c8fa98bd909bd7d2fcd116b80c7877cbfff59bb991d7c78158ded3ff4154d7d3362df3b8c00fd4d08
7
+ data.tar.gz: cfea37b3aa825f0bb91a0bd19dec1ec72187790aca39a2b8d560a483d83c1f4604346320d071c93cd605f39c1fd975b1b508395d9673d7bf95c16feaeeee52e6
data/CHANGELOG.md CHANGED
@@ -6,7 +6,25 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
- ## [0.20.0] - 2024-01-01
9
+ ## [0.21.0] - 2024-02-05
10
+
11
+ ### Added
12
+
13
+ - Add the `pm_constant_pool_find` API for finding a constant.
14
+
15
+ ### Changed
16
+
17
+ - Fixes for `Prism::Translation::Parser`.
18
+ - Ensure all errors flow through `parser.diagnostics.process`.
19
+ - Fix the find pattern node.
20
+ - Fix block forwarding with `NumberedParametersNode`.
21
+ - Ensure we can parse strings with invalid bytes for the encoding.
22
+ - Fix hash pairs in pattern matching.
23
+ - Properly reject operator writes on operator calls, e.g., `a.+ -= b`.
24
+ - Fix multi-byte escapes.
25
+ - Handle missing body in `begin` within the receiver of a method call.
26
+
27
+ ## [0.20.0] - 2024-02-01
10
28
 
11
29
  ### Added
12
30
 
@@ -323,7 +341,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
323
341
 
324
342
  - 🎉 Initial release! 🎉
325
343
 
326
- [unreleased]: https://github.com/ruby/prism/compare/v0.20.0...HEAD
344
+ [unreleased]: https://github.com/ruby/prism/compare/v0.21.0...HEAD
345
+ [0.21.0]: https://github.com/ruby/prism/compare/v0.20.0...v0.21.0
327
346
  [0.20.0]: https://github.com/ruby/prism/compare/v0.19.0...v0.20.0
328
347
  [0.19.0]: https://github.com/ruby/prism/compare/v0.18.0...v0.19.0
329
348
  [0.18.0]: https://github.com/ruby/prism/compare/v0.17.1...v0.18.0
@@ -9,7 +9,7 @@ The `parser` gem provides multiple parsers to support different versions of the
9
9
  You can use the `prism` parser like you would any other. After requiring the parser, you should be able to call any of the regular `Parser::Base` APIs that you would normally use.
10
10
 
11
11
  ```ruby
12
- require "prism/translation/parser"
12
+ require "prism"
13
13
 
14
14
  Prism::Translation::Parser.parse_file("path/to/file.rb")
15
15
  ```
@@ -1,7 +1,7 @@
1
1
  #ifndef PRISM_EXT_NODE_H
2
2
  #define PRISM_EXT_NODE_H
3
3
 
4
- #define EXPECTED_PRISM_VERSION "0.20.0"
4
+ #define EXPECTED_PRISM_VERSION "0.21.0"
5
5
 
6
6
  #include <ruby.h>
7
7
  #include <ruby/encoding.h>
data/include/prism/ast.h CHANGED
@@ -1042,7 +1042,7 @@ static const pm_node_flags_t PM_NODE_FLAG_COMMON_MASK = (1 << (PM_NODE_FLAG_BITS
1042
1042
  * Cast the type to an enum to allow the compiler to provide exhaustiveness
1043
1043
  * checking.
1044
1044
  */
1045
- #define PM_NODE_TYPE(node) ((enum pm_node_type) node->type)
1045
+ #define PM_NODE_TYPE(node) ((enum pm_node_type) (node)->type)
1046
1046
 
1047
1047
  /**
1048
1048
  * Return true if the type of the given node matches the given type.
@@ -626,7 +626,7 @@ struct pm_parser {
626
626
  * This is the path of the file being parsed. We use the filepath when
627
627
  * constructing SourceFileNodes.
628
628
  */
629
- pm_string_t filepath_string;
629
+ pm_string_t filepath;
630
630
 
631
631
  /**
632
632
  * This constant pool keeps all of the constants defined throughout the file
@@ -154,6 +154,17 @@ bool pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity);
154
154
  */
155
155
  pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id);
156
156
 
157
+ /**
158
+ * Find a constant in a constant pool. Returns the id of the constant, or 0 if
159
+ * the constant is not found.
160
+ *
161
+ * @param pool The pool to find the constant in.
162
+ * @param start A pointer to the start of the constant.
163
+ * @param length The length of the constant.
164
+ * @return The id of the constant.
165
+ */
166
+ pm_constant_id_t pm_constant_pool_find(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
167
+
157
168
  /**
158
169
  * Insert a constant into a constant pool that is a slice of a source string.
159
170
  * Returns the id of the constant, or 0 if any potential calls to resize fail.
@@ -14,7 +14,7 @@
14
14
  /**
15
15
  * The minor version of the Prism library as an int.
16
16
  */
17
- #define PRISM_VERSION_MINOR 20
17
+ #define PRISM_VERSION_MINOR 21
18
18
 
19
19
  /**
20
20
  * The patch version of the Prism library as an int.
@@ -24,6 +24,6 @@
24
24
  /**
25
25
  * The version of the Prism library as a constant string.
26
26
  */
27
- #define PRISM_VERSION "0.20.0"
27
+ #define PRISM_VERSION "0.21.0"
28
28
 
29
29
  #endif
@@ -27,7 +27,7 @@ module Prism
27
27
 
28
28
  # The minor version of prism that we are expecting to find in the serialized
29
29
  # strings.
30
- MINOR_VERSION = 20
30
+ MINOR_VERSION = 21
31
31
 
32
32
  # The patch version of prism that we are expecting to find in the serialized
33
33
  # strings.
@@ -105,14 +105,18 @@ module Prism
105
105
  # { a: 1 }
106
106
  # ^^^^
107
107
  def visit_assoc_node(node)
108
- if node.value.is_a?(ImplicitNode)
109
- builder.pair_label([node.key.slice.chomp(":"), srange(node.key.location)])
110
- elsif in_pattern && node.value.nil?
111
- if node.key.is_a?(SymbolNode)
112
- builder.match_hash_var([node.key.unescaped, srange(node.key.location)])
108
+ if in_pattern
109
+ if node.value.is_a?(ImplicitNode)
110
+ if node.key.is_a?(SymbolNode)
111
+ builder.match_hash_var([node.key.unescaped, srange(node.key.location)])
112
+ else
113
+ builder.match_hash_var_from_str(token(node.key.opening_loc), visit_all(node.key.parts), token(node.key.closing_loc))
114
+ end
113
115
  else
114
- builder.match_hash_var_from_str(token(node.key.opening_loc), visit_all(node.key.parts), token(node.key.closing_loc))
116
+ builder.pair_keyword([node.key.unescaped, srange(node.key.location)], visit(node.value))
115
117
  end
118
+ elsif node.value.is_a?(ImplicitNode)
119
+ builder.pair_label([node.key.unescaped, srange(node.key.location)])
116
120
  elsif node.operator_loc
117
121
  builder.pair(visit(node.key), token(node.operator_loc), visit(node.value))
118
122
  elsif node.key.is_a?(SymbolNode) && node.key.opening_loc.nil?
@@ -241,53 +245,51 @@ module Prism
241
245
  block = nil
242
246
  end
243
247
 
248
+ if node.call_operator_loc.nil?
249
+ case name
250
+ when :!
251
+ return visit_block(builder.not_op(token(node.message_loc), token(node.opening_loc), visit(node.receiver), token(node.closing_loc)), block)
252
+ when :[]
253
+ return visit_block(builder.index(visit(node.receiver), token(node.opening_loc), visit_all(arguments), token(node.closing_loc)), block)
254
+ when :[]=
255
+ if node.message != "[]=" && node.arguments && block.nil? && !node.safe_navigation?
256
+ return visit_block(
257
+ builder.assign(
258
+ builder.index_asgn(
259
+ visit(node.receiver),
260
+ token(node.opening_loc),
261
+ visit_all(node.arguments.arguments[...-1]),
262
+ token(node.closing_loc),
263
+ ),
264
+ srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset, ["="]),
265
+ visit(node.arguments.arguments.last)
266
+ ),
267
+ block
268
+ )
269
+ end
270
+ end
271
+ end
272
+
273
+ message_loc = node.message_loc
274
+ call_operator_loc = node.call_operator_loc
275
+ call_operator = [{ "." => :dot, "&." => :anddot, "::" => "::" }.fetch(call_operator_loc.slice), srange(call_operator_loc)] if call_operator_loc
276
+
244
277
  visit_block(
245
- if name == :!
246
- builder.not_op(
247
- token(node.message_loc),
248
- token(node.opening_loc),
249
- visit(node.receiver),
250
- token(node.closing_loc)
278
+ if name.end_with?("=") && !message_loc.slice.end_with?("=") && node.arguments && block.nil?
279
+ builder.assign(
280
+ builder.attr_asgn(visit(node.receiver), call_operator, token(message_loc)),
281
+ srange_find(message_loc.end_offset, node.arguments.location.start_offset, ["="]),
282
+ visit(node.arguments.arguments.last)
251
283
  )
252
- elsif name == :[]
253
- builder.index(
284
+ else
285
+ builder.call_method(
254
286
  visit(node.receiver),
287
+ call_operator,
288
+ message_loc ? [node.name, srange(message_loc)] : nil,
255
289
  token(node.opening_loc),
256
290
  visit_all(arguments),
257
291
  token(node.closing_loc)
258
292
  )
259
- elsif name == :[]= && node.message != "[]=" && node.arguments && block.nil?
260
- builder.assign(
261
- builder.index_asgn(
262
- visit(node.receiver),
263
- token(node.opening_loc),
264
- visit_all(node.arguments.arguments[...-1]),
265
- token(node.closing_loc),
266
- ),
267
- srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset, ["="]),
268
- visit(node.arguments.arguments.last)
269
- )
270
- else
271
- message_loc = node.message_loc
272
- call_operator_loc = node.call_operator_loc
273
- call_operator = [{ "." => :dot, "&." => :anddot, "::" => "::" }.fetch(call_operator_loc.slice), srange(call_operator_loc)] if call_operator_loc
274
-
275
- if name.end_with?("=") && !message_loc.slice.end_with?("=") && node.arguments && block.nil?
276
- builder.assign(
277
- builder.attr_asgn(visit(node.receiver), call_operator, token(message_loc)),
278
- srange_find(message_loc.end_offset, node.arguments.location.start_offset, ["="]),
279
- visit(node.arguments.arguments.last)
280
- )
281
- else
282
- builder.call_method(
283
- visit(node.receiver),
284
- call_operator,
285
- message_loc ? [node.name, srange(message_loc)] : nil,
286
- token(node.opening_loc),
287
- visit_all(arguments),
288
- token(node.closing_loc)
289
- )
290
- end
291
293
  end,
292
294
  block
293
295
  )
@@ -519,8 +521,6 @@ module Prism
519
521
  # def self.foo; end
520
522
  # ^^^^^^^^^^^^^^^^^
521
523
  def visit_def_node(node)
522
- forwarding = find_forwarding(node.parameters)
523
-
524
524
  if node.equal_loc
525
525
  if node.receiver
526
526
  builder.def_endless_singleton(
@@ -530,7 +530,7 @@ module Prism
530
530
  token(node.name_loc),
531
531
  builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false),
532
532
  token(node.equal_loc),
533
- node.body&.accept(copy_compiler(forwarding: forwarding))
533
+ node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters)))
534
534
  )
535
535
  else
536
536
  builder.def_endless_method(
@@ -538,7 +538,7 @@ module Prism
538
538
  token(node.name_loc),
539
539
  builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false),
540
540
  token(node.equal_loc),
541
- node.body&.accept(copy_compiler(forwarding: forwarding))
541
+ node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters)))
542
542
  )
543
543
  end
544
544
  elsif node.receiver
@@ -548,7 +548,7 @@ module Prism
548
548
  token(node.operator_loc),
549
549
  token(node.name_loc),
550
550
  builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false),
551
- node.body&.accept(copy_compiler(forwarding: forwarding)),
551
+ node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters))),
552
552
  token(node.end_keyword_loc)
553
553
  )
554
554
  else
@@ -556,7 +556,7 @@ module Prism
556
556
  token(node.def_keyword_loc),
557
557
  token(node.name_loc),
558
558
  builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false),
559
- node.body&.accept(copy_compiler(forwarding: forwarding)),
559
+ node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters))),
560
560
  token(node.end_keyword_loc)
561
561
  )
562
562
  end
@@ -614,9 +614,7 @@ module Prism
614
614
  # foo => [*, bar, *]
615
615
  # ^^^^^^^^^^^
616
616
  def visit_find_pattern_node(node)
617
- elements = [*node.requireds]
618
- elements << node.rest if !node.rest.nil? && !node.rest.is_a?(ImplicitRestNode)
619
- elements.concat(node.posts)
617
+ elements = [node.left, *node.requireds, node.right]
620
618
 
621
619
  if node.constant
622
620
  builder.const_pattern(visit(node.constant), token(node.opening_loc), builder.find_pattern(nil, visit_all(elements), nil), token(node.closing_loc))
@@ -993,24 +991,24 @@ module Prism
993
991
 
994
992
  # -> {}
995
993
  def visit_lambda_node(node)
994
+ parameters = node.parameters
995
+
996
996
  builder.block(
997
997
  builder.call_lambda(token(node.operator_loc)),
998
998
  [node.opening, srange(node.opening_loc)],
999
- if node.parameters
1000
- if node.parameters.is_a?(NumberedParametersNode)
1001
- visit(node.parameters)
1002
- else
1003
- builder.args(
1004
- token(node.parameters.opening_loc),
1005
- visit(node.parameters),
1006
- token(node.parameters.closing_loc),
1007
- false
1008
- )
1009
- end
1010
- else
999
+ if parameters.nil?
1011
1000
  builder.args(nil, [], nil, false)
1001
+ elsif node.parameters.is_a?(NumberedParametersNode)
1002
+ visit(node.parameters)
1003
+ else
1004
+ builder.args(
1005
+ token(node.parameters.opening_loc),
1006
+ visit(node.parameters),
1007
+ token(node.parameters.closing_loc),
1008
+ false
1009
+ )
1012
1010
  end,
1013
- node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters&.parameters))),
1011
+ node.body&.accept(copy_compiler(forwarding: parameters.is_a?(NumberedParametersNode) ? [] : find_forwarding(parameters&.parameters))),
1014
1012
  [node.closing, srange(node.closing_loc)]
1015
1013
  )
1016
1014
  end
@@ -1096,7 +1094,7 @@ module Prism
1096
1094
  # case of a syntax error. The parser gem doesn't have such a concept, so
1097
1095
  # we invent our own here.
1098
1096
  def visit_missing_node(node)
1099
- raise CompilationError, "Cannot compile missing nodes"
1097
+ ::AST::Node.new(:missing, [], location: ::Parser::Source::Map.new(srange(node.location)))
1100
1098
  end
1101
1099
 
1102
1100
  # module Foo; end
@@ -1727,29 +1725,29 @@ module Prism
1727
1725
  # Visit a block node on a call.
1728
1726
  def visit_block(call, block)
1729
1727
  if block
1728
+ parameters = block.parameters
1729
+
1730
1730
  builder.block(
1731
1731
  call,
1732
1732
  token(block.opening_loc),
1733
- if (parameters = block.parameters)
1734
- if parameters.is_a?(NumberedParametersNode)
1735
- visit(parameters)
1736
- else
1737
- builder.args(
1738
- token(parameters.opening_loc),
1739
- if procarg0?(parameters.parameters)
1740
- parameter = parameters.parameters.requireds.first
1741
- [builder.procarg0(visit(parameter))].concat(visit_all(parameters.locals))
1742
- else
1743
- visit(parameters)
1744
- end,
1745
- token(parameters.closing_loc),
1746
- false
1747
- )
1748
- end
1749
- else
1733
+ if parameters.nil?
1750
1734
  builder.args(nil, [], nil, false)
1735
+ elsif parameters.is_a?(NumberedParametersNode)
1736
+ visit(parameters)
1737
+ else
1738
+ builder.args(
1739
+ token(parameters.opening_loc),
1740
+ if procarg0?(parameters.parameters)
1741
+ parameter = parameters.parameters.requireds.first
1742
+ [builder.procarg0(visit(parameter))].concat(visit_all(parameters.locals))
1743
+ else
1744
+ visit(parameters)
1745
+ end,
1746
+ token(parameters.closing_loc),
1747
+ false
1748
+ )
1751
1749
  end,
1752
- block.body&.accept(copy_compiler(forwarding: find_forwarding(block.parameters&.parameters))),
1750
+ block.body&.accept(copy_compiler(forwarding: parameters.is_a?(NumberedParametersNode) ? [] : find_forwarding(parameters&.parameters))),
1753
1751
  token(block.closing_loc)
1754
1752
  )
1755
1753
  else
@@ -1762,9 +1760,9 @@ module Prism
1762
1760
  children = []
1763
1761
  node.parts.each do |part|
1764
1762
  pushing =
1765
- if part.is_a?(StringNode) && part.unescaped.count("\n") > 1
1766
- unescaped = part.unescaped.split("\n")
1767
- escaped = part.content.split("\n")
1763
+ if part.is_a?(StringNode) && part.unescaped.include?("\n")
1764
+ unescaped = part.unescaped.lines(chomp: true)
1765
+ escaped = part.content.lines(chomp: true)
1768
1766
 
1769
1767
  escaped_lengths =
1770
1768
  if node.opening.end_with?("'")
@@ -1779,7 +1777,6 @@ module Prism
1779
1777
  unescaped.zip(escaped_lengths).map do |unescaped_line, escaped_length|
1780
1778
  end_offset = start_offset + (escaped_length || 0)
1781
1779
  inner_part = builder.string_internal(["#{unescaped_line}\n", srange_offsets(start_offset, end_offset)])
1782
-
1783
1780
  start_offset = end_offset
1784
1781
  inner_part
1785
1782
  end
@@ -26,7 +26,7 @@ module Prism
26
26
  Racc_debug_parser = false # :nodoc:
27
27
 
28
28
  def version # :nodoc:
29
- 33
29
+ 34
30
30
  end
31
31
 
32
32
  # The default encoding for Ruby files is UTF-8.
@@ -42,9 +42,10 @@ module Prism
42
42
  @source_buffer = source_buffer
43
43
  source = source_buffer.source
44
44
 
45
- result = unwrap(Prism.parse(source, filepath: source_buffer.name))
45
+ offset_cache = build_offset_cache(source)
46
+ result = unwrap(Prism.parse(source, filepath: source_buffer.name), offset_cache)
46
47
 
47
- build_ast(result.value, build_offset_cache(source))
48
+ build_ast(result.value, offset_cache)
48
49
  ensure
49
50
  @source_buffer = nil
50
51
  end
@@ -55,7 +56,7 @@ module Prism
55
56
  source = source_buffer.source
56
57
 
57
58
  offset_cache = build_offset_cache(source)
58
- result = unwrap(Prism.parse(source, filepath: source_buffer.name))
59
+ result = unwrap(Prism.parse(source, filepath: source_buffer.name), offset_cache)
59
60
 
60
61
  [
61
62
  build_ast(result.value, offset_cache),
@@ -72,7 +73,7 @@ module Prism
72
73
  source = source_buffer.source
73
74
 
74
75
  offset_cache = build_offset_cache(source)
75
- result = unwrap(Prism.parse_lex(source, filepath: source_buffer.name))
76
+ result = unwrap(Prism.parse_lex(source, filepath: source_buffer.name), offset_cache)
76
77
 
77
78
  program, tokens = result.value
78
79
 
@@ -93,16 +94,23 @@ module Prism
93
94
 
94
95
  private
95
96
 
97
+ # This is a hook to allow consumers to disable some errors if they don't
98
+ # want them to block creating the syntax tree.
99
+ def valid_error?(error)
100
+ true
101
+ end
102
+
96
103
  # If there was a error generated during the parse, then raise an
97
104
  # appropriate syntax error. Otherwise return the result.
98
- def unwrap(result)
99
- return result if result.success?
105
+ def unwrap(result, offset_cache)
106
+ result.errors.each do |error|
107
+ next unless valid_error?(error)
100
108
 
101
- error = result.errors.first
102
- offset_cache = build_offset_cache(source_buffer.source)
109
+ location = build_range(error.location, offset_cache)
110
+ diagnostics.process(Diagnostic.new(error.message, location))
111
+ end
103
112
 
104
- diagnostic = Diagnostic.new(error.message, build_range(error.location, offset_cache))
105
- raise ::Parser::SyntaxError, diagnostic
113
+ result
106
114
  end
107
115
 
108
116
  # Prism deals with offsets in bytes, while the parser gem deals with
data/prism.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |spec|
4
4
  spec.name = "prism"
5
- spec.version = "0.20.0"
5
+ spec.version = "0.21.0"
6
6
  spec.authors = ["Shopify"]
7
7
  spec.email = ["ruby@shopify.com"]
8
8
 
data/src/encoding.c CHANGED
@@ -2252,7 +2252,7 @@ static const uint8_t pm_utf_8_dfa[] = {
2252
2252
  */
2253
2253
  static pm_unicode_codepoint_t
2254
2254
  pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
2255
- assert(n >= 1);
2255
+ assert(n >= 0);
2256
2256
  size_t maximum = (size_t) n;
2257
2257
 
2258
2258
  uint32_t codepoint;
data/src/prism.c CHANGED
@@ -870,6 +870,105 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
870
870
  pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
871
871
  }
872
872
 
873
+ /******************************************************************************/
874
+ /* Basic character checks */
875
+ /******************************************************************************/
876
+
877
+ /**
878
+ * This function is used extremely frequently to lex all of the identifiers in a
879
+ * source file, so it's important that it be as fast as possible. For this
880
+ * reason we have the encoding_changed boolean to check if we need to go through
881
+ * the function pointer or can just directly use the UTF-8 functions.
882
+ */
883
+ static inline size_t
884
+ char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b) {
885
+ if (parser->encoding_changed) {
886
+ size_t width;
887
+ if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
888
+ return width;
889
+ } else if (*b == '_') {
890
+ return 1;
891
+ } else if (*b >= 0x80) {
892
+ return parser->encoding->char_width(b, parser->end - b);
893
+ } else {
894
+ return 0;
895
+ }
896
+ } else if (*b < 0x80) {
897
+ return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
898
+ } else {
899
+ return pm_encoding_utf_8_char_width(b, parser->end - b);
900
+ }
901
+ }
902
+
903
+ /**
904
+ * Similar to char_is_identifier but this function assumes that the encoding
905
+ * has not been changed.
906
+ */
907
+ static inline size_t
908
+ char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
909
+ if (*b < 0x80) {
910
+ return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
911
+ } else {
912
+ return pm_encoding_utf_8_char_width(b, end - b);
913
+ }
914
+ }
915
+
916
+ /**
917
+ * Like the above, this function is also used extremely frequently to lex all of
918
+ * the identifiers in a source file once the first character has been found. So
919
+ * it's important that it be as fast as possible.
920
+ */
921
+ static inline size_t
922
+ char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
923
+ if (parser->encoding_changed) {
924
+ size_t width;
925
+ if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
926
+ return width;
927
+ } else if (*b == '_') {
928
+ return 1;
929
+ } else if (*b >= 0x80) {
930
+ return parser->encoding->char_width(b, parser->end - b);
931
+ } else {
932
+ return 0;
933
+ }
934
+ }
935
+ return char_is_identifier_utf8(b, parser->end);
936
+ }
937
+
938
+ // Here we're defining a perfect hash for the characters that are allowed in
939
+ // global names. This is used to quickly check the next character after a $ to
940
+ // see if it's a valid character for a global name.
941
+ #define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
942
+ #define PUNCT(idx) ( \
943
+ BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
944
+ BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
945
+ BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
946
+ BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
947
+ BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
948
+ BIT('0', idx))
949
+
950
+ const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
951
+
952
+ #undef BIT
953
+ #undef PUNCT
954
+
955
+ static inline bool
956
+ char_is_global_name_punctuation(const uint8_t b) {
957
+ const unsigned int i = (const unsigned int) b;
958
+ if (i <= 0x20 || 0x7e < i) return false;
959
+
960
+ return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
961
+ }
962
+
963
+ static inline bool
964
+ token_is_setter_name(pm_token_t *token) {
965
+ return (
966
+ (token->type == PM_TOKEN_IDENTIFIER) &&
967
+ (token->end - token->start >= 2) &&
968
+ (token->end[-1] == '=')
969
+ );
970
+ }
971
+
873
972
  /******************************************************************************/
874
973
  /* Node flag handling functions */
875
974
  /******************************************************************************/
@@ -1923,11 +2022,12 @@ pm_call_node_index_p(pm_call_node_t *node) {
1923
2022
  * operator assignment.
1924
2023
  */
1925
2024
  static inline bool
1926
- pm_call_node_writable_p(pm_call_node_t *node) {
2025
+ pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
1927
2026
  return (
1928
2027
  (node->message_loc.start != NULL) &&
1929
2028
  (node->message_loc.end[-1] != '!') &&
1930
2029
  (node->message_loc.end[-1] != '?') &&
2030
+ char_is_identifier_start(parser, node->message_loc.start) &&
1931
2031
  (node->opening_loc.start == NULL) &&
1932
2032
  (node->arguments == NULL) &&
1933
2033
  (node->block == NULL)
@@ -2744,19 +2844,21 @@ pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *targ
2744
2844
  * Check if the receiver of a `def` node is allowed.
2745
2845
  */
2746
2846
  static void
2747
- pm_check_def_receiver(pm_parser_t *parser, pm_node_t *receiver) {
2748
- switch (receiver->type) {
2847
+ pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
2848
+ switch (PM_NODE_TYPE(node)) {
2749
2849
  case PM_BEGIN_NODE: {
2750
- pm_begin_node_t *begin_node = (pm_begin_node_t *)receiver;
2751
- pm_check_def_receiver(parser, (pm_node_t *) begin_node->statements);
2850
+ const pm_begin_node_t *cast = (pm_begin_node_t *) node;
2851
+ if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements);
2752
2852
  break;
2753
2853
  }
2754
- case PM_PARENTHESES_NODE:
2755
- pm_check_def_receiver(parser, ((pm_parentheses_node_t *) receiver)->body);
2854
+ case PM_PARENTHESES_NODE: {
2855
+ const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
2856
+ if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
2756
2857
  break;
2858
+ }
2757
2859
  case PM_STATEMENTS_NODE: {
2758
- pm_statements_node_t *statements_node = (pm_statements_node_t *)receiver;
2759
- pm_check_def_receiver(parser, statements_node->body.nodes[statements_node->body.size - 1]);
2860
+ const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
2861
+ pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
2760
2862
  break;
2761
2863
  }
2762
2864
  case PM_ARRAY_NODE:
@@ -2775,7 +2877,10 @@ pm_check_def_receiver(pm_parser_t *parser, pm_node_t *receiver) {
2775
2877
  case PM_STRING_NODE:
2776
2878
  case PM_SYMBOL_NODE:
2777
2879
  case PM_X_STRING_NODE:
2778
- pm_parser_err_node(parser, receiver, PM_ERR_SINGLETON_FOR_LITERALS);
2880
+ pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
2881
+ break;
2882
+ default:
2883
+ break;
2779
2884
  }
2780
2885
  }
2781
2886
 
@@ -2807,7 +2912,7 @@ pm_def_node_create(
2807
2912
  }
2808
2913
 
2809
2914
  if ((receiver != NULL) && PM_NODE_TYPE_P(receiver, PM_PARENTHESES_NODE)) {
2810
- pm_check_def_receiver(parser, receiver);
2915
+ pm_def_node_receiver_check(parser, receiver);
2811
2916
  }
2812
2917
 
2813
2918
  *node = (pm_def_node_t) {
@@ -5330,7 +5435,7 @@ pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword)
5330
5435
  .flags = PM_NODE_FLAG_STATIC_LITERAL,
5331
5436
  .location = PM_LOCATION_TOKEN_VALUE(file_keyword),
5332
5437
  },
5333
- .filepath = parser->filepath_string,
5438
+ .filepath = parser->filepath
5334
5439
  };
5335
5440
 
5336
5441
  return node;
@@ -6220,6 +6325,16 @@ pm_parser_local_add_owned(pm_parser_t *parser, const uint8_t *start, size_t leng
6220
6325
  return constant_id;
6221
6326
  }
6222
6327
 
6328
+ /**
6329
+ * Add a local variable from a constant string to the current scope.
6330
+ */
6331
+ static pm_constant_id_t
6332
+ pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
6333
+ pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
6334
+ if (constant_id != 0) pm_parser_local_add(parser, constant_id);
6335
+ return constant_id;
6336
+ }
6337
+
6223
6338
  /**
6224
6339
  * Add a parameter name to the current scope and check whether the name of the
6225
6340
  * parameter is unique or not.
@@ -6259,105 +6374,6 @@ pm_parser_scope_pop(pm_parser_t *parser) {
6259
6374
  free(scope);
6260
6375
  }
6261
6376
 
6262
- /******************************************************************************/
6263
- /* Basic character checks */
6264
- /******************************************************************************/
6265
-
6266
- /**
6267
- * This function is used extremely frequently to lex all of the identifiers in a
6268
- * source file, so it's important that it be as fast as possible. For this
6269
- * reason we have the encoding_changed boolean to check if we need to go through
6270
- * the function pointer or can just directly use the UTF-8 functions.
6271
- */
6272
- static inline size_t
6273
- char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) {
6274
- if (parser->encoding_changed) {
6275
- size_t width;
6276
- if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
6277
- return width;
6278
- } else if (*b == '_') {
6279
- return 1;
6280
- } else if (*b >= 0x80) {
6281
- return parser->encoding->char_width(b, parser->end - b);
6282
- } else {
6283
- return 0;
6284
- }
6285
- } else if (*b < 0x80) {
6286
- return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
6287
- } else {
6288
- return pm_encoding_utf_8_char_width(b, parser->end - b);
6289
- }
6290
- }
6291
-
6292
- /**
6293
- * Similar to char_is_identifier but this function assumes that the encoding
6294
- * has not been changed.
6295
- */
6296
- static inline size_t
6297
- char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
6298
- if (*b < 0x80) {
6299
- return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
6300
- } else {
6301
- return pm_encoding_utf_8_char_width(b, end - b);
6302
- }
6303
- }
6304
-
6305
- /**
6306
- * Like the above, this function is also used extremely frequently to lex all of
6307
- * the identifiers in a source file once the first character has been found. So
6308
- * it's important that it be as fast as possible.
6309
- */
6310
- static inline size_t
6311
- char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
6312
- if (parser->encoding_changed) {
6313
- size_t width;
6314
- if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
6315
- return width;
6316
- } else if (*b == '_') {
6317
- return 1;
6318
- } else if (*b >= 0x80) {
6319
- return parser->encoding->char_width(b, parser->end - b);
6320
- } else {
6321
- return 0;
6322
- }
6323
- }
6324
- return char_is_identifier_utf8(b, parser->end);
6325
- }
6326
-
6327
- // Here we're defining a perfect hash for the characters that are allowed in
6328
- // global names. This is used to quickly check the next character after a $ to
6329
- // see if it's a valid character for a global name.
6330
- #define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
6331
- #define PUNCT(idx) ( \
6332
- BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
6333
- BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
6334
- BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
6335
- BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
6336
- BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
6337
- BIT('0', idx))
6338
-
6339
- const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
6340
-
6341
- #undef BIT
6342
- #undef PUNCT
6343
-
6344
- static inline bool
6345
- char_is_global_name_punctuation(const uint8_t b) {
6346
- const unsigned int i = (const unsigned int) b;
6347
- if (i <= 0x20 || 0x7e < i) return false;
6348
-
6349
- return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
6350
- }
6351
-
6352
- static inline bool
6353
- token_is_setter_name(pm_token_t *token) {
6354
- return (
6355
- (token->type == PM_TOKEN_IDENTIFIER) &&
6356
- (token->end - token->start >= 2) &&
6357
- (token->end[-1] == '=')
6358
- );
6359
- }
6360
-
6361
6377
  /******************************************************************************/
6362
6378
  /* Stack helpers */
6363
6379
  /******************************************************************************/
@@ -7673,6 +7689,28 @@ escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte
7673
7689
  pm_buffer_append_byte(buffer, byte);
7674
7690
  }
7675
7691
 
7692
+ /**
7693
+ * Write each byte of the given escaped character into the buffer.
7694
+ */
7695
+ static inline void
7696
+ escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
7697
+ size_t width;
7698
+ if (parser->encoding_changed) {
7699
+ width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
7700
+ } else {
7701
+ width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
7702
+ }
7703
+
7704
+ // TODO: If the character is invalid in the given encoding, then we'll just
7705
+ // push one byte into the buffer. This should actually be an error.
7706
+ width = (width == 0) ? 1 : width;
7707
+
7708
+ for (size_t index = 0; index < width; index++) {
7709
+ escape_write_byte_encoded(parser, buffer, *parser->current.end);
7710
+ parser->current.end++;
7711
+ }
7712
+ }
7713
+
7676
7714
  /**
7677
7715
  * The regular expression engine doesn't support the same escape sequences as
7678
7716
  * Ruby does. So first we have to read the escape sequence, and then we have to
@@ -8011,7 +8049,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
8011
8049
  /* fallthrough */
8012
8050
  default: {
8013
8051
  if (parser->current.end < parser->end) {
8014
- escape_write_byte_encoded(parser, buffer, *parser->current.end++);
8052
+ escape_write_escape_encoded(parser, buffer);
8015
8053
  }
8016
8054
  return;
8017
8055
  }
@@ -8288,10 +8326,40 @@ typedef struct {
8288
8326
  * Push the given byte into the token buffer.
8289
8327
  */
8290
8328
  static inline void
8291
- pm_token_buffer_push(pm_token_buffer_t *token_buffer, uint8_t byte) {
8329
+ pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
8292
8330
  pm_buffer_append_byte(&token_buffer->buffer, byte);
8293
8331
  }
8294
8332
 
8333
+ /**
8334
+ * Append the given bytes into the token buffer.
8335
+ */
8336
+ static inline void
8337
+ pm_token_buffer_push_bytes(pm_token_buffer_t *token_buffer, const uint8_t *bytes, size_t length) {
8338
+ pm_buffer_append_bytes(&token_buffer->buffer, bytes, length);
8339
+ }
8340
+
8341
+ /**
8342
+ * Push an escaped character into the token buffer.
8343
+ */
8344
+ static inline void
8345
+ pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
8346
+ // First, determine the width of the character to be escaped.
8347
+ size_t width;
8348
+ if (parser->encoding_changed) {
8349
+ width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8350
+ } else {
8351
+ width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
8352
+ }
8353
+
8354
+ // TODO: If the character is invalid in the given encoding, then we'll just
8355
+ // push one byte into the buffer. This should actually be an error.
8356
+ width = (width == 0 ? 1 : width);
8357
+
8358
+ // Now, push the bytes into the buffer.
8359
+ pm_token_buffer_push_bytes(token_buffer, parser->current.end, width);
8360
+ parser->current.end += width;
8361
+ }
8362
+
8295
8363
  /**
8296
8364
  * When we're about to return from lexing the current token and we know for sure
8297
8365
  * that we have found an escape sequence, this function is called to copy the
@@ -9704,18 +9772,18 @@ parser_lex(pm_parser_t *parser) {
9704
9772
  case '\t':
9705
9773
  case '\v':
9706
9774
  case '\\':
9707
- pm_token_buffer_push(&token_buffer, peeked);
9775
+ pm_token_buffer_push_byte(&token_buffer, peeked);
9708
9776
  parser->current.end++;
9709
9777
  break;
9710
9778
  case '\r':
9711
9779
  parser->current.end++;
9712
9780
  if (peek(parser) != '\n') {
9713
- pm_token_buffer_push(&token_buffer, '\r');
9781
+ pm_token_buffer_push_byte(&token_buffer, '\r');
9714
9782
  break;
9715
9783
  }
9716
9784
  /* fallthrough */
9717
9785
  case '\n':
9718
- pm_token_buffer_push(&token_buffer, '\n');
9786
+ pm_token_buffer_push_byte(&token_buffer, '\n');
9719
9787
 
9720
9788
  if (parser->heredoc_end) {
9721
9789
  // ... if we are on the same line as a heredoc,
@@ -9733,14 +9801,13 @@ parser_lex(pm_parser_t *parser) {
9733
9801
  break;
9734
9802
  default:
9735
9803
  if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
9736
- pm_token_buffer_push(&token_buffer, peeked);
9804
+ pm_token_buffer_push_byte(&token_buffer, peeked);
9737
9805
  parser->current.end++;
9738
9806
  } else if (lex_mode->as.list.interpolation) {
9739
9807
  escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
9740
9808
  } else {
9741
- pm_token_buffer_push(&token_buffer, '\\');
9742
- pm_token_buffer_push(&token_buffer, peeked);
9743
- parser->current.end++;
9809
+ pm_token_buffer_push_byte(&token_buffer, '\\');
9810
+ pm_token_buffer_push_escaped(&token_buffer, parser);
9744
9811
  }
9745
9812
 
9746
9813
  break;
@@ -9898,9 +9965,9 @@ parser_lex(pm_parser_t *parser) {
9898
9965
  parser->current.end++;
9899
9966
  if (peek(parser) != '\n') {
9900
9967
  if (lex_mode->as.regexp.terminator != '\r') {
9901
- pm_token_buffer_push(&token_buffer, '\\');
9968
+ pm_token_buffer_push_byte(&token_buffer, '\\');
9902
9969
  }
9903
- pm_token_buffer_push(&token_buffer, '\r');
9970
+ pm_token_buffer_push_byte(&token_buffer, '\r');
9904
9971
  break;
9905
9972
  }
9906
9973
  /* fallthrough */
@@ -9935,20 +10002,19 @@ parser_lex(pm_parser_t *parser) {
9935
10002
  case '$': case ')': case '*': case '+':
9936
10003
  case '.': case '>': case '?': case ']':
9937
10004
  case '^': case '|': case '}':
9938
- pm_token_buffer_push(&token_buffer, '\\');
10005
+ pm_token_buffer_push_byte(&token_buffer, '\\');
9939
10006
  break;
9940
10007
  default:
9941
10008
  break;
9942
10009
  }
9943
10010
 
9944
- pm_token_buffer_push(&token_buffer, peeked);
10011
+ pm_token_buffer_push_byte(&token_buffer, peeked);
9945
10012
  parser->current.end++;
9946
10013
  break;
9947
10014
  }
9948
10015
 
9949
- if (peeked < 0x80) pm_token_buffer_push(&token_buffer, '\\');
9950
- pm_token_buffer_push(&token_buffer, peeked);
9951
- parser->current.end++;
10016
+ if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer, '\\');
10017
+ pm_token_buffer_push_escaped(&token_buffer, parser);
9952
10018
  break;
9953
10019
  }
9954
10020
 
@@ -10115,23 +10181,23 @@ parser_lex(pm_parser_t *parser) {
10115
10181
 
10116
10182
  switch (peeked) {
10117
10183
  case '\\':
10118
- pm_token_buffer_push(&token_buffer, '\\');
10184
+ pm_token_buffer_push_byte(&token_buffer, '\\');
10119
10185
  parser->current.end++;
10120
10186
  break;
10121
10187
  case '\r':
10122
10188
  parser->current.end++;
10123
10189
  if (peek(parser) != '\n') {
10124
10190
  if (!lex_mode->as.string.interpolation) {
10125
- pm_token_buffer_push(&token_buffer, '\\');
10191
+ pm_token_buffer_push_byte(&token_buffer, '\\');
10126
10192
  }
10127
- pm_token_buffer_push(&token_buffer, '\r');
10193
+ pm_token_buffer_push_byte(&token_buffer, '\r');
10128
10194
  break;
10129
10195
  }
10130
10196
  /* fallthrough */
10131
10197
  case '\n':
10132
10198
  if (!lex_mode->as.string.interpolation) {
10133
- pm_token_buffer_push(&token_buffer, '\\');
10134
- pm_token_buffer_push(&token_buffer, '\n');
10199
+ pm_token_buffer_push_byte(&token_buffer, '\\');
10200
+ pm_token_buffer_push_byte(&token_buffer, '\n');
10135
10201
  }
10136
10202
 
10137
10203
  if (parser->heredoc_end) {
@@ -10150,17 +10216,16 @@ parser_lex(pm_parser_t *parser) {
10150
10216
  break;
10151
10217
  default:
10152
10218
  if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
10153
- pm_token_buffer_push(&token_buffer, peeked);
10219
+ pm_token_buffer_push_byte(&token_buffer, peeked);
10154
10220
  parser->current.end++;
10155
10221
  } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
10156
- pm_token_buffer_push(&token_buffer, peeked);
10222
+ pm_token_buffer_push_byte(&token_buffer, peeked);
10157
10223
  parser->current.end++;
10158
10224
  } else if (lex_mode->as.string.interpolation) {
10159
10225
  escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
10160
10226
  } else {
10161
- pm_token_buffer_push(&token_buffer, '\\');
10162
- pm_token_buffer_push(&token_buffer, peeked);
10163
- parser->current.end++;
10227
+ pm_token_buffer_push_byte(&token_buffer, '\\');
10228
+ pm_token_buffer_push_escaped(&token_buffer, parser);
10164
10229
  }
10165
10230
 
10166
10231
  break;
@@ -10417,21 +10482,20 @@ parser_lex(pm_parser_t *parser) {
10417
10482
  case '\r':
10418
10483
  parser->current.end++;
10419
10484
  if (peek(parser) != '\n') {
10420
- pm_token_buffer_push(&token_buffer, '\\');
10421
- pm_token_buffer_push(&token_buffer, '\r');
10485
+ pm_token_buffer_push_byte(&token_buffer, '\\');
10486
+ pm_token_buffer_push_byte(&token_buffer, '\r');
10422
10487
  break;
10423
10488
  }
10424
10489
  /* fallthrough */
10425
10490
  case '\n':
10426
- pm_token_buffer_push(&token_buffer, '\\');
10427
- pm_token_buffer_push(&token_buffer, '\n');
10491
+ pm_token_buffer_push_byte(&token_buffer, '\\');
10492
+ pm_token_buffer_push_byte(&token_buffer, '\n');
10428
10493
  token_buffer.cursor = parser->current.end + 1;
10429
10494
  breakpoint = parser->current.end;
10430
10495
  continue;
10431
10496
  default:
10432
- parser->current.end++;
10433
- pm_token_buffer_push(&token_buffer, '\\');
10434
- pm_token_buffer_push(&token_buffer, peeked);
10497
+ pm_token_buffer_push_byte(&token_buffer, '\\');
10498
+ pm_token_buffer_push_escaped(&token_buffer, parser);
10435
10499
  break;
10436
10500
  }
10437
10501
  } else {
@@ -10439,7 +10503,7 @@ parser_lex(pm_parser_t *parser) {
10439
10503
  case '\r':
10440
10504
  parser->current.end++;
10441
10505
  if (peek(parser) != '\n') {
10442
- pm_token_buffer_push(&token_buffer, '\r');
10506
+ pm_token_buffer_push_byte(&token_buffer, '\r');
10443
10507
  break;
10444
10508
  }
10445
10509
  /* fallthrough */
@@ -10715,14 +10779,6 @@ match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
10715
10779
  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
10716
10780
  }
10717
10781
 
10718
- /**
10719
- * Returns true if the current token is any of the five given types.
10720
- */
10721
- static inline bool
10722
- match5(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5) {
10723
- return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5);
10724
- }
10725
-
10726
10782
  /**
10727
10783
  * Returns true if the current token is any of the six given types.
10728
10784
  */
@@ -11359,7 +11415,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
11359
11415
  break;
11360
11416
  }
11361
11417
 
11362
- // If we have a terminator, then we will parse all consequtive terminators
11418
+ // If we have a terminator, then we will parse all consecutive terminators
11363
11419
  // and then continue parsing the statements list.
11364
11420
  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
11365
11421
  // If we have a terminator, then we will continue parsing the statements
@@ -13149,6 +13205,15 @@ outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
13149
13205
  return false;
13150
13206
  }
13151
13207
 
13208
+ /**
13209
+ * These are the names of the various numbered parameters. We have them here so
13210
+ * that when we insert them into the constant pool we can use a constant string
13211
+ * and not have to allocate.
13212
+ */
13213
+ static const char * const pm_numbered_parameter_names[] = {
13214
+ "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
13215
+ };
13216
+
13152
13217
  /**
13153
13218
  * Parse an identifier into either a local variable read. If the local variable
13154
13219
  * is not found, it returns NULL instead.
@@ -13171,12 +13236,10 @@ parse_variable(pm_parser_t *parser) {
13171
13236
  pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
13172
13237
  } else {
13173
13238
  // Indicate that this scope is using numbered params so that child
13174
- // scopes cannot.
13175
- uint8_t number = parser->previous.start[1];
13176
-
13177
- // We subtract the value for the character '0' to get the actual
13178
- // integer value of the number (only _1 through _9 are valid)
13179
- uint8_t numbered_parameters = (uint8_t) (number - '0');
13239
+ // scopes cannot. We subtract the value for the character '0' to get
13240
+ // the actual integer value of the number (only _1 through _9 are
13241
+ // valid).
13242
+ uint8_t numbered_parameters = (uint8_t) (parser->previous.start[1] - '0');
13180
13243
  if (numbered_parameters > parser->current_scope->numbered_parameters) {
13181
13244
  parser->current_scope->numbered_parameters = numbered_parameters;
13182
13245
  pm_parser_numbered_parameters_set(parser, numbered_parameters);
@@ -13187,21 +13250,13 @@ parse_variable(pm_parser_t *parser) {
13187
13250
  // referencing _2 means that _1 must exist. Therefore here we
13188
13251
  // loop through all of the possibilities and add them into the
13189
13252
  // constant pool.
13190
- uint8_t current = '1';
13191
- uint8_t *value;
13192
-
13193
- while (current < number) {
13194
- value = malloc(2);
13195
- value[0] = '_';
13196
- value[1] = current++;
13197
- pm_parser_local_add_owned(parser, value, 2);
13253
+ for (uint8_t numbered_parameter = 1; numbered_parameter <= numbered_parameters - 1; numbered_parameter++) {
13254
+ pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameter - 1], 2);
13198
13255
  }
13199
13256
 
13200
- // Now we can add the actual token that is being used. For
13201
- // this one we can add a shared version since it is directly
13202
- // referenced in the source.
13203
- pm_parser_local_add_token(parser, &parser->previous);
13204
- return pm_local_variable_read_node_create(parser, &parser->previous, 0);
13257
+ // Finally we can create the local variable read node.
13258
+ pm_constant_id_t name_id = pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameters - 1], 2);
13259
+ return pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
13205
13260
  }
13206
13261
  }
13207
13262
 
@@ -14010,7 +14065,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
14010
14065
  // Gather up all of the patterns into the list.
14011
14066
  while (accept1(parser, PM_TOKEN_COMMA)) {
14012
14067
  // Break early here in case we have a trailing comma.
14013
- if (match5(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
14068
+ if (match6(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
14014
14069
  node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
14015
14070
  pm_node_list_append(&nodes, node);
14016
14071
  break;
@@ -16927,7 +16982,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16927
16982
  }
16928
16983
 
16929
16984
  // If this node cannot be writable, then we have an error.
16930
- if (pm_call_node_writable_p(cast)) {
16985
+ if (pm_call_node_writable_p(parser, cast)) {
16931
16986
  parse_write_name(parser, &cast->name);
16932
16987
  } else {
16933
16988
  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
@@ -17038,7 +17093,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
17038
17093
  }
17039
17094
 
17040
17095
  // If this node cannot be writable, then we have an error.
17041
- if (pm_call_node_writable_p(cast)) {
17096
+ if (pm_call_node_writable_p(parser, cast)) {
17042
17097
  parse_write_name(parser, &cast->name);
17043
17098
  } else {
17044
17099
  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
@@ -17159,7 +17214,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
17159
17214
  }
17160
17215
 
17161
17216
  // If this node cannot be writable, then we have an error.
17162
- if (pm_call_node_writable_p(cast)) {
17217
+ if (pm_call_node_writable_p(parser, cast)) {
17163
17218
  parse_write_name(parser, &cast->name);
17164
17219
  } else {
17165
17220
  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
@@ -17751,7 +17806,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
17751
17806
  .encoding_changed_callback = NULL,
17752
17807
  .encoding_comment_start = source,
17753
17808
  .lex_callback = NULL,
17754
- .filepath_string = { 0 },
17809
+ .filepath = { 0 },
17755
17810
  .constant_pool = { 0 },
17756
17811
  .newline_list = { 0 },
17757
17812
  .integer_base = 0,
@@ -17794,7 +17849,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
17794
17849
  // If options were provided to this parse, establish them here.
17795
17850
  if (options != NULL) {
17796
17851
  // filepath option
17797
- parser->filepath_string = options->filepath;
17852
+ parser->filepath = options->filepath;
17798
17853
 
17799
17854
  // line option
17800
17855
  parser->start_line = options->line;
@@ -17896,7 +17951,7 @@ pm_magic_comment_list_free(pm_list_t *list) {
17896
17951
  */
17897
17952
  PRISM_EXPORTED_FUNCTION void
17898
17953
  pm_parser_free(pm_parser_t *parser) {
17899
- pm_string_free(&parser->filepath_string);
17954
+ pm_string_free(&parser->filepath);
17900
17955
  pm_diagnostic_list_free(&parser->error_list);
17901
17956
  pm_diagnostic_list_free(&parser->warning_list);
17902
17957
  pm_comment_list_free(&parser->comment_list);
@@ -18060,7 +18115,9 @@ pm_parser_errors_format_sort(const pm_list_t *error_list, const pm_newline_list_
18060
18115
 
18061
18116
  // Now we're going to shift all of the errors after this one down one
18062
18117
  // index to make room for the new error.
18063
- memcpy(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
18118
+ if (index + 1 < error_list->size) {
18119
+ memmove(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
18120
+ }
18064
18121
 
18065
18122
  // Finally, we'll insert the error into the array.
18066
18123
  uint32_t column_end;
@@ -181,6 +181,31 @@ pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t
181
181
  return &pool->constants[constant_id - 1];
182
182
  }
183
183
 
184
+ /**
185
+ * Find a constant in a constant pool. Returns the id of the constant, or 0 if
186
+ * the constant is not found.
187
+ */
188
+ pm_constant_id_t
189
+ pm_constant_pool_find(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
190
+ assert(is_power_of_two(pool->capacity));
191
+ const uint32_t mask = pool->capacity - 1;
192
+
193
+ uint32_t hash = pm_constant_pool_hash(start, length);
194
+ uint32_t index = hash & mask;
195
+ pm_constant_pool_bucket_t *bucket;
196
+
197
+ while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
198
+ pm_constant_t *constant = &pool->constants[bucket->id - 1];
199
+ if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
200
+ return bucket->id;
201
+ }
202
+
203
+ index = (index + 1) & mask;
204
+ }
205
+
206
+ return PM_CONSTANT_ID_UNSET;
207
+ }
208
+
184
209
  /**
185
210
  * Insert a constant into a constant pool and return its index in the pool.
186
211
  */
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: prism
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.20.0
4
+ version: 0.21.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shopify
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-02-01 00:00:00.000000000 Z
11
+ date: 2024-02-05 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: