prism 0.20.0 → 0.21.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -2
- data/docs/parser_translation.md +1 -1
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +1 -1
- data/include/prism/parser.h +1 -1
- data/include/prism/util/pm_constant_pool.h +11 -0
- data/include/prism/version.h +2 -2
- data/lib/prism/serialize.rb +1 -1
- data/lib/prism/translation/parser/compiler.rb +88 -91
- data/lib/prism/translation/parser.rb +19 -11
- data/prism.gemspec +1 -1
- data/src/encoding.c +1 -1
- data/src/prism.c +238 -181
- data/src/util/pm_constant_pool.c +25 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b4d054a1268bf7f8b5947f30ad244c4713c850911e79c1ba469eca0ac36bc47c
|
4
|
+
data.tar.gz: b77e29c93584b79759381d75cfb5ad0753fe8d5f92863cada81895bb67f17572
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 00fa781d854c4f9b716b238c392e48f3bd946b52a5ea100c8fa98bd909bd7d2fcd116b80c7877cbfff59bb991d7c78158ded3ff4154d7d3362df3b8c00fd4d08
|
7
|
+
data.tar.gz: cfea37b3aa825f0bb91a0bd19dec1ec72187790aca39a2b8d560a483d83c1f4604346320d071c93cd605f39c1fd975b1b508395d9673d7bf95c16feaeeee52e6
|
data/CHANGELOG.md
CHANGED
@@ -6,7 +6,25 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
|
|
6
6
|
|
7
7
|
## [Unreleased]
|
8
8
|
|
9
|
-
## [0.
|
9
|
+
## [0.21.0] - 2024-02-05
|
10
|
+
|
11
|
+
### Added
|
12
|
+
|
13
|
+
- Add the `pm_constant_pool_find` API for finding a constant.
|
14
|
+
|
15
|
+
### Changed
|
16
|
+
|
17
|
+
- Fixes for `Prism::Translation::Parser`.
|
18
|
+
- Ensure all errors flow through `parser.diagnostics.process`.
|
19
|
+
- Fix the find pattern node.
|
20
|
+
- Fix block forwarding with `NumberedParametersNode`.
|
21
|
+
- Ensure we can parse strings with invalid bytes for the encoding.
|
22
|
+
- Fix hash pairs in pattern matching.
|
23
|
+
- Properly reject operator writes on operator calls, e.g., `a.+ -= b`.
|
24
|
+
- Fix multi-byte escapes.
|
25
|
+
- Handle missing body in `begin` within the receiver of a method call.
|
26
|
+
|
27
|
+
## [0.20.0] - 2024-02-01
|
10
28
|
|
11
29
|
### Added
|
12
30
|
|
@@ -323,7 +341,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
|
|
323
341
|
|
324
342
|
- 🎉 Initial release! 🎉
|
325
343
|
|
326
|
-
[unreleased]: https://github.com/ruby/prism/compare/v0.
|
344
|
+
[unreleased]: https://github.com/ruby/prism/compare/v0.21.0...HEAD
|
345
|
+
[0.21.0]: https://github.com/ruby/prism/compare/v0.20.0...v0.21.0
|
327
346
|
[0.20.0]: https://github.com/ruby/prism/compare/v0.19.0...v0.20.0
|
328
347
|
[0.19.0]: https://github.com/ruby/prism/compare/v0.18.0...v0.19.0
|
329
348
|
[0.18.0]: https://github.com/ruby/prism/compare/v0.17.1...v0.18.0
|
data/docs/parser_translation.md
CHANGED
@@ -9,7 +9,7 @@ The `parser` gem provides multiple parsers to support different versions of the
|
|
9
9
|
You can use the `prism` parser like you would any other. After requiring the parser, you should be able to call any of the regular `Parser::Base` APIs that you would normally use.
|
10
10
|
|
11
11
|
```ruby
|
12
|
-
require "prism
|
12
|
+
require "prism"
|
13
13
|
|
14
14
|
Prism::Translation::Parser.parse_file("path/to/file.rb")
|
15
15
|
```
|
data/ext/prism/extension.h
CHANGED
data/include/prism/ast.h
CHANGED
@@ -1042,7 +1042,7 @@ static const pm_node_flags_t PM_NODE_FLAG_COMMON_MASK = (1 << (PM_NODE_FLAG_BITS
|
|
1042
1042
|
* Cast the type to an enum to allow the compiler to provide exhaustiveness
|
1043
1043
|
* checking.
|
1044
1044
|
*/
|
1045
|
-
#define PM_NODE_TYPE(node) ((enum pm_node_type) node->type)
|
1045
|
+
#define PM_NODE_TYPE(node) ((enum pm_node_type) (node)->type)
|
1046
1046
|
|
1047
1047
|
/**
|
1048
1048
|
* Return true if the type of the given node matches the given type.
|
data/include/prism/parser.h
CHANGED
@@ -626,7 +626,7 @@ struct pm_parser {
|
|
626
626
|
* This is the path of the file being parsed. We use the filepath when
|
627
627
|
* constructing SourceFileNodes.
|
628
628
|
*/
|
629
|
-
pm_string_t
|
629
|
+
pm_string_t filepath;
|
630
630
|
|
631
631
|
/**
|
632
632
|
* This constant pool keeps all of the constants defined throughout the file
|
@@ -154,6 +154,17 @@ bool pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity);
|
|
154
154
|
*/
|
155
155
|
pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id);
|
156
156
|
|
157
|
+
/**
|
158
|
+
* Find a constant in a constant pool. Returns the id of the constant, or 0 if
|
159
|
+
* the constant is not found.
|
160
|
+
*
|
161
|
+
* @param pool The pool to find the constant in.
|
162
|
+
* @param start A pointer to the start of the constant.
|
163
|
+
* @param length The length of the constant.
|
164
|
+
* @return The id of the constant.
|
165
|
+
*/
|
166
|
+
pm_constant_id_t pm_constant_pool_find(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
|
167
|
+
|
157
168
|
/**
|
158
169
|
* Insert a constant into a constant pool that is a slice of a source string.
|
159
170
|
* Returns the id of the constant, or 0 if any potential calls to resize fail.
|
data/include/prism/version.h
CHANGED
@@ -14,7 +14,7 @@
|
|
14
14
|
/**
|
15
15
|
* The minor version of the Prism library as an int.
|
16
16
|
*/
|
17
|
-
#define PRISM_VERSION_MINOR
|
17
|
+
#define PRISM_VERSION_MINOR 21
|
18
18
|
|
19
19
|
/**
|
20
20
|
* The patch version of the Prism library as an int.
|
@@ -24,6 +24,6 @@
|
|
24
24
|
/**
|
25
25
|
* The version of the Prism library as a constant string.
|
26
26
|
*/
|
27
|
-
#define PRISM_VERSION "0.
|
27
|
+
#define PRISM_VERSION "0.21.0"
|
28
28
|
|
29
29
|
#endif
|
data/lib/prism/serialize.rb
CHANGED
@@ -105,14 +105,18 @@ module Prism
|
|
105
105
|
# { a: 1 }
|
106
106
|
# ^^^^
|
107
107
|
def visit_assoc_node(node)
|
108
|
-
if
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
108
|
+
if in_pattern
|
109
|
+
if node.value.is_a?(ImplicitNode)
|
110
|
+
if node.key.is_a?(SymbolNode)
|
111
|
+
builder.match_hash_var([node.key.unescaped, srange(node.key.location)])
|
112
|
+
else
|
113
|
+
builder.match_hash_var_from_str(token(node.key.opening_loc), visit_all(node.key.parts), token(node.key.closing_loc))
|
114
|
+
end
|
113
115
|
else
|
114
|
-
builder.
|
116
|
+
builder.pair_keyword([node.key.unescaped, srange(node.key.location)], visit(node.value))
|
115
117
|
end
|
118
|
+
elsif node.value.is_a?(ImplicitNode)
|
119
|
+
builder.pair_label([node.key.unescaped, srange(node.key.location)])
|
116
120
|
elsif node.operator_loc
|
117
121
|
builder.pair(visit(node.key), token(node.operator_loc), visit(node.value))
|
118
122
|
elsif node.key.is_a?(SymbolNode) && node.key.opening_loc.nil?
|
@@ -241,53 +245,51 @@ module Prism
|
|
241
245
|
block = nil
|
242
246
|
end
|
243
247
|
|
248
|
+
if node.call_operator_loc.nil?
|
249
|
+
case name
|
250
|
+
when :!
|
251
|
+
return visit_block(builder.not_op(token(node.message_loc), token(node.opening_loc), visit(node.receiver), token(node.closing_loc)), block)
|
252
|
+
when :[]
|
253
|
+
return visit_block(builder.index(visit(node.receiver), token(node.opening_loc), visit_all(arguments), token(node.closing_loc)), block)
|
254
|
+
when :[]=
|
255
|
+
if node.message != "[]=" && node.arguments && block.nil? && !node.safe_navigation?
|
256
|
+
return visit_block(
|
257
|
+
builder.assign(
|
258
|
+
builder.index_asgn(
|
259
|
+
visit(node.receiver),
|
260
|
+
token(node.opening_loc),
|
261
|
+
visit_all(node.arguments.arguments[...-1]),
|
262
|
+
token(node.closing_loc),
|
263
|
+
),
|
264
|
+
srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset, ["="]),
|
265
|
+
visit(node.arguments.arguments.last)
|
266
|
+
),
|
267
|
+
block
|
268
|
+
)
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
message_loc = node.message_loc
|
274
|
+
call_operator_loc = node.call_operator_loc
|
275
|
+
call_operator = [{ "." => :dot, "&." => :anddot, "::" => "::" }.fetch(call_operator_loc.slice), srange(call_operator_loc)] if call_operator_loc
|
276
|
+
|
244
277
|
visit_block(
|
245
|
-
if name
|
246
|
-
builder.
|
247
|
-
|
248
|
-
|
249
|
-
visit(node.
|
250
|
-
token(node.closing_loc)
|
278
|
+
if name.end_with?("=") && !message_loc.slice.end_with?("=") && node.arguments && block.nil?
|
279
|
+
builder.assign(
|
280
|
+
builder.attr_asgn(visit(node.receiver), call_operator, token(message_loc)),
|
281
|
+
srange_find(message_loc.end_offset, node.arguments.location.start_offset, ["="]),
|
282
|
+
visit(node.arguments.arguments.last)
|
251
283
|
)
|
252
|
-
|
253
|
-
builder.
|
284
|
+
else
|
285
|
+
builder.call_method(
|
254
286
|
visit(node.receiver),
|
287
|
+
call_operator,
|
288
|
+
message_loc ? [node.name, srange(message_loc)] : nil,
|
255
289
|
token(node.opening_loc),
|
256
290
|
visit_all(arguments),
|
257
291
|
token(node.closing_loc)
|
258
292
|
)
|
259
|
-
elsif name == :[]= && node.message != "[]=" && node.arguments && block.nil?
|
260
|
-
builder.assign(
|
261
|
-
builder.index_asgn(
|
262
|
-
visit(node.receiver),
|
263
|
-
token(node.opening_loc),
|
264
|
-
visit_all(node.arguments.arguments[...-1]),
|
265
|
-
token(node.closing_loc),
|
266
|
-
),
|
267
|
-
srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset, ["="]),
|
268
|
-
visit(node.arguments.arguments.last)
|
269
|
-
)
|
270
|
-
else
|
271
|
-
message_loc = node.message_loc
|
272
|
-
call_operator_loc = node.call_operator_loc
|
273
|
-
call_operator = [{ "." => :dot, "&." => :anddot, "::" => "::" }.fetch(call_operator_loc.slice), srange(call_operator_loc)] if call_operator_loc
|
274
|
-
|
275
|
-
if name.end_with?("=") && !message_loc.slice.end_with?("=") && node.arguments && block.nil?
|
276
|
-
builder.assign(
|
277
|
-
builder.attr_asgn(visit(node.receiver), call_operator, token(message_loc)),
|
278
|
-
srange_find(message_loc.end_offset, node.arguments.location.start_offset, ["="]),
|
279
|
-
visit(node.arguments.arguments.last)
|
280
|
-
)
|
281
|
-
else
|
282
|
-
builder.call_method(
|
283
|
-
visit(node.receiver),
|
284
|
-
call_operator,
|
285
|
-
message_loc ? [node.name, srange(message_loc)] : nil,
|
286
|
-
token(node.opening_loc),
|
287
|
-
visit_all(arguments),
|
288
|
-
token(node.closing_loc)
|
289
|
-
)
|
290
|
-
end
|
291
293
|
end,
|
292
294
|
block
|
293
295
|
)
|
@@ -519,8 +521,6 @@ module Prism
|
|
519
521
|
# def self.foo; end
|
520
522
|
# ^^^^^^^^^^^^^^^^^
|
521
523
|
def visit_def_node(node)
|
522
|
-
forwarding = find_forwarding(node.parameters)
|
523
|
-
|
524
524
|
if node.equal_loc
|
525
525
|
if node.receiver
|
526
526
|
builder.def_endless_singleton(
|
@@ -530,7 +530,7 @@ module Prism
|
|
530
530
|
token(node.name_loc),
|
531
531
|
builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false),
|
532
532
|
token(node.equal_loc),
|
533
|
-
node.body&.accept(copy_compiler(forwarding:
|
533
|
+
node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters)))
|
534
534
|
)
|
535
535
|
else
|
536
536
|
builder.def_endless_method(
|
@@ -538,7 +538,7 @@ module Prism
|
|
538
538
|
token(node.name_loc),
|
539
539
|
builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false),
|
540
540
|
token(node.equal_loc),
|
541
|
-
node.body&.accept(copy_compiler(forwarding:
|
541
|
+
node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters)))
|
542
542
|
)
|
543
543
|
end
|
544
544
|
elsif node.receiver
|
@@ -548,7 +548,7 @@ module Prism
|
|
548
548
|
token(node.operator_loc),
|
549
549
|
token(node.name_loc),
|
550
550
|
builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false),
|
551
|
-
node.body&.accept(copy_compiler(forwarding:
|
551
|
+
node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters))),
|
552
552
|
token(node.end_keyword_loc)
|
553
553
|
)
|
554
554
|
else
|
@@ -556,7 +556,7 @@ module Prism
|
|
556
556
|
token(node.def_keyword_loc),
|
557
557
|
token(node.name_loc),
|
558
558
|
builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false),
|
559
|
-
node.body&.accept(copy_compiler(forwarding:
|
559
|
+
node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters))),
|
560
560
|
token(node.end_keyword_loc)
|
561
561
|
)
|
562
562
|
end
|
@@ -614,9 +614,7 @@ module Prism
|
|
614
614
|
# foo => [*, bar, *]
|
615
615
|
# ^^^^^^^^^^^
|
616
616
|
def visit_find_pattern_node(node)
|
617
|
-
elements = [*node.requireds]
|
618
|
-
elements << node.rest if !node.rest.nil? && !node.rest.is_a?(ImplicitRestNode)
|
619
|
-
elements.concat(node.posts)
|
617
|
+
elements = [node.left, *node.requireds, node.right]
|
620
618
|
|
621
619
|
if node.constant
|
622
620
|
builder.const_pattern(visit(node.constant), token(node.opening_loc), builder.find_pattern(nil, visit_all(elements), nil), token(node.closing_loc))
|
@@ -993,24 +991,24 @@ module Prism
|
|
993
991
|
|
994
992
|
# -> {}
|
995
993
|
def visit_lambda_node(node)
|
994
|
+
parameters = node.parameters
|
995
|
+
|
996
996
|
builder.block(
|
997
997
|
builder.call_lambda(token(node.operator_loc)),
|
998
998
|
[node.opening, srange(node.opening_loc)],
|
999
|
-
if
|
1000
|
-
if node.parameters.is_a?(NumberedParametersNode)
|
1001
|
-
visit(node.parameters)
|
1002
|
-
else
|
1003
|
-
builder.args(
|
1004
|
-
token(node.parameters.opening_loc),
|
1005
|
-
visit(node.parameters),
|
1006
|
-
token(node.parameters.closing_loc),
|
1007
|
-
false
|
1008
|
-
)
|
1009
|
-
end
|
1010
|
-
else
|
999
|
+
if parameters.nil?
|
1011
1000
|
builder.args(nil, [], nil, false)
|
1001
|
+
elsif node.parameters.is_a?(NumberedParametersNode)
|
1002
|
+
visit(node.parameters)
|
1003
|
+
else
|
1004
|
+
builder.args(
|
1005
|
+
token(node.parameters.opening_loc),
|
1006
|
+
visit(node.parameters),
|
1007
|
+
token(node.parameters.closing_loc),
|
1008
|
+
false
|
1009
|
+
)
|
1012
1010
|
end,
|
1013
|
-
node.body&.accept(copy_compiler(forwarding: find_forwarding(
|
1011
|
+
node.body&.accept(copy_compiler(forwarding: parameters.is_a?(NumberedParametersNode) ? [] : find_forwarding(parameters&.parameters))),
|
1014
1012
|
[node.closing, srange(node.closing_loc)]
|
1015
1013
|
)
|
1016
1014
|
end
|
@@ -1096,7 +1094,7 @@ module Prism
|
|
1096
1094
|
# case of a syntax error. The parser gem doesn't have such a concept, so
|
1097
1095
|
# we invent our own here.
|
1098
1096
|
def visit_missing_node(node)
|
1099
|
-
|
1097
|
+
::AST::Node.new(:missing, [], location: ::Parser::Source::Map.new(srange(node.location)))
|
1100
1098
|
end
|
1101
1099
|
|
1102
1100
|
# module Foo; end
|
@@ -1727,29 +1725,29 @@ module Prism
|
|
1727
1725
|
# Visit a block node on a call.
|
1728
1726
|
def visit_block(call, block)
|
1729
1727
|
if block
|
1728
|
+
parameters = block.parameters
|
1729
|
+
|
1730
1730
|
builder.block(
|
1731
1731
|
call,
|
1732
1732
|
token(block.opening_loc),
|
1733
|
-
if
|
1734
|
-
if parameters.is_a?(NumberedParametersNode)
|
1735
|
-
visit(parameters)
|
1736
|
-
else
|
1737
|
-
builder.args(
|
1738
|
-
token(parameters.opening_loc),
|
1739
|
-
if procarg0?(parameters.parameters)
|
1740
|
-
parameter = parameters.parameters.requireds.first
|
1741
|
-
[builder.procarg0(visit(parameter))].concat(visit_all(parameters.locals))
|
1742
|
-
else
|
1743
|
-
visit(parameters)
|
1744
|
-
end,
|
1745
|
-
token(parameters.closing_loc),
|
1746
|
-
false
|
1747
|
-
)
|
1748
|
-
end
|
1749
|
-
else
|
1733
|
+
if parameters.nil?
|
1750
1734
|
builder.args(nil, [], nil, false)
|
1735
|
+
elsif parameters.is_a?(NumberedParametersNode)
|
1736
|
+
visit(parameters)
|
1737
|
+
else
|
1738
|
+
builder.args(
|
1739
|
+
token(parameters.opening_loc),
|
1740
|
+
if procarg0?(parameters.parameters)
|
1741
|
+
parameter = parameters.parameters.requireds.first
|
1742
|
+
[builder.procarg0(visit(parameter))].concat(visit_all(parameters.locals))
|
1743
|
+
else
|
1744
|
+
visit(parameters)
|
1745
|
+
end,
|
1746
|
+
token(parameters.closing_loc),
|
1747
|
+
false
|
1748
|
+
)
|
1751
1749
|
end,
|
1752
|
-
block.body&.accept(copy_compiler(forwarding: find_forwarding(
|
1750
|
+
block.body&.accept(copy_compiler(forwarding: parameters.is_a?(NumberedParametersNode) ? [] : find_forwarding(parameters&.parameters))),
|
1753
1751
|
token(block.closing_loc)
|
1754
1752
|
)
|
1755
1753
|
else
|
@@ -1762,9 +1760,9 @@ module Prism
|
|
1762
1760
|
children = []
|
1763
1761
|
node.parts.each do |part|
|
1764
1762
|
pushing =
|
1765
|
-
if part.is_a?(StringNode) && part.unescaped.
|
1766
|
-
unescaped = part.unescaped.
|
1767
|
-
escaped = part.content.
|
1763
|
+
if part.is_a?(StringNode) && part.unescaped.include?("\n")
|
1764
|
+
unescaped = part.unescaped.lines(chomp: true)
|
1765
|
+
escaped = part.content.lines(chomp: true)
|
1768
1766
|
|
1769
1767
|
escaped_lengths =
|
1770
1768
|
if node.opening.end_with?("'")
|
@@ -1779,7 +1777,6 @@ module Prism
|
|
1779
1777
|
unescaped.zip(escaped_lengths).map do |unescaped_line, escaped_length|
|
1780
1778
|
end_offset = start_offset + (escaped_length || 0)
|
1781
1779
|
inner_part = builder.string_internal(["#{unescaped_line}\n", srange_offsets(start_offset, end_offset)])
|
1782
|
-
|
1783
1780
|
start_offset = end_offset
|
1784
1781
|
inner_part
|
1785
1782
|
end
|
@@ -26,7 +26,7 @@ module Prism
|
|
26
26
|
Racc_debug_parser = false # :nodoc:
|
27
27
|
|
28
28
|
def version # :nodoc:
|
29
|
-
|
29
|
+
34
|
30
30
|
end
|
31
31
|
|
32
32
|
# The default encoding for Ruby files is UTF-8.
|
@@ -42,9 +42,10 @@ module Prism
|
|
42
42
|
@source_buffer = source_buffer
|
43
43
|
source = source_buffer.source
|
44
44
|
|
45
|
-
|
45
|
+
offset_cache = build_offset_cache(source)
|
46
|
+
result = unwrap(Prism.parse(source, filepath: source_buffer.name), offset_cache)
|
46
47
|
|
47
|
-
build_ast(result.value,
|
48
|
+
build_ast(result.value, offset_cache)
|
48
49
|
ensure
|
49
50
|
@source_buffer = nil
|
50
51
|
end
|
@@ -55,7 +56,7 @@ module Prism
|
|
55
56
|
source = source_buffer.source
|
56
57
|
|
57
58
|
offset_cache = build_offset_cache(source)
|
58
|
-
result = unwrap(Prism.parse(source, filepath: source_buffer.name))
|
59
|
+
result = unwrap(Prism.parse(source, filepath: source_buffer.name), offset_cache)
|
59
60
|
|
60
61
|
[
|
61
62
|
build_ast(result.value, offset_cache),
|
@@ -72,7 +73,7 @@ module Prism
|
|
72
73
|
source = source_buffer.source
|
73
74
|
|
74
75
|
offset_cache = build_offset_cache(source)
|
75
|
-
result = unwrap(Prism.parse_lex(source, filepath: source_buffer.name))
|
76
|
+
result = unwrap(Prism.parse_lex(source, filepath: source_buffer.name), offset_cache)
|
76
77
|
|
77
78
|
program, tokens = result.value
|
78
79
|
|
@@ -93,16 +94,23 @@ module Prism
|
|
93
94
|
|
94
95
|
private
|
95
96
|
|
97
|
+
# This is a hook to allow consumers to disable some errors if they don't
|
98
|
+
# want them to block creating the syntax tree.
|
99
|
+
def valid_error?(error)
|
100
|
+
true
|
101
|
+
end
|
102
|
+
|
96
103
|
# If there was a error generated during the parse, then raise an
|
97
104
|
# appropriate syntax error. Otherwise return the result.
|
98
|
-
def unwrap(result)
|
99
|
-
|
105
|
+
def unwrap(result, offset_cache)
|
106
|
+
result.errors.each do |error|
|
107
|
+
next unless valid_error?(error)
|
100
108
|
|
101
|
-
|
102
|
-
|
109
|
+
location = build_range(error.location, offset_cache)
|
110
|
+
diagnostics.process(Diagnostic.new(error.message, location))
|
111
|
+
end
|
103
112
|
|
104
|
-
|
105
|
-
raise ::Parser::SyntaxError, diagnostic
|
113
|
+
result
|
106
114
|
end
|
107
115
|
|
108
116
|
# Prism deals with offsets in bytes, while the parser gem deals with
|
data/prism.gemspec
CHANGED
data/src/encoding.c
CHANGED
@@ -2252,7 +2252,7 @@ static const uint8_t pm_utf_8_dfa[] = {
|
|
2252
2252
|
*/
|
2253
2253
|
static pm_unicode_codepoint_t
|
2254
2254
|
pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
|
2255
|
-
assert(n >=
|
2255
|
+
assert(n >= 0);
|
2256
2256
|
size_t maximum = (size_t) n;
|
2257
2257
|
|
2258
2258
|
uint32_t codepoint;
|
data/src/prism.c
CHANGED
@@ -870,6 +870,105 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
|
|
870
870
|
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
|
871
871
|
}
|
872
872
|
|
873
|
+
/******************************************************************************/
|
874
|
+
/* Basic character checks */
|
875
|
+
/******************************************************************************/
|
876
|
+
|
877
|
+
/**
|
878
|
+
* This function is used extremely frequently to lex all of the identifiers in a
|
879
|
+
* source file, so it's important that it be as fast as possible. For this
|
880
|
+
* reason we have the encoding_changed boolean to check if we need to go through
|
881
|
+
* the function pointer or can just directly use the UTF-8 functions.
|
882
|
+
*/
|
883
|
+
static inline size_t
|
884
|
+
char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b) {
|
885
|
+
if (parser->encoding_changed) {
|
886
|
+
size_t width;
|
887
|
+
if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
|
888
|
+
return width;
|
889
|
+
} else if (*b == '_') {
|
890
|
+
return 1;
|
891
|
+
} else if (*b >= 0x80) {
|
892
|
+
return parser->encoding->char_width(b, parser->end - b);
|
893
|
+
} else {
|
894
|
+
return 0;
|
895
|
+
}
|
896
|
+
} else if (*b < 0x80) {
|
897
|
+
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
|
898
|
+
} else {
|
899
|
+
return pm_encoding_utf_8_char_width(b, parser->end - b);
|
900
|
+
}
|
901
|
+
}
|
902
|
+
|
903
|
+
/**
|
904
|
+
* Similar to char_is_identifier but this function assumes that the encoding
|
905
|
+
* has not been changed.
|
906
|
+
*/
|
907
|
+
static inline size_t
|
908
|
+
char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
|
909
|
+
if (*b < 0x80) {
|
910
|
+
return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
|
911
|
+
} else {
|
912
|
+
return pm_encoding_utf_8_char_width(b, end - b);
|
913
|
+
}
|
914
|
+
}
|
915
|
+
|
916
|
+
/**
|
917
|
+
* Like the above, this function is also used extremely frequently to lex all of
|
918
|
+
* the identifiers in a source file once the first character has been found. So
|
919
|
+
* it's important that it be as fast as possible.
|
920
|
+
*/
|
921
|
+
static inline size_t
|
922
|
+
char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
|
923
|
+
if (parser->encoding_changed) {
|
924
|
+
size_t width;
|
925
|
+
if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
|
926
|
+
return width;
|
927
|
+
} else if (*b == '_') {
|
928
|
+
return 1;
|
929
|
+
} else if (*b >= 0x80) {
|
930
|
+
return parser->encoding->char_width(b, parser->end - b);
|
931
|
+
} else {
|
932
|
+
return 0;
|
933
|
+
}
|
934
|
+
}
|
935
|
+
return char_is_identifier_utf8(b, parser->end);
|
936
|
+
}
|
937
|
+
|
938
|
+
// Here we're defining a perfect hash for the characters that are allowed in
|
939
|
+
// global names. This is used to quickly check the next character after a $ to
|
940
|
+
// see if it's a valid character for a global name.
|
941
|
+
#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
|
942
|
+
#define PUNCT(idx) ( \
|
943
|
+
BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
|
944
|
+
BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
|
945
|
+
BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
|
946
|
+
BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
|
947
|
+
BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
|
948
|
+
BIT('0', idx))
|
949
|
+
|
950
|
+
const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
|
951
|
+
|
952
|
+
#undef BIT
|
953
|
+
#undef PUNCT
|
954
|
+
|
955
|
+
static inline bool
|
956
|
+
char_is_global_name_punctuation(const uint8_t b) {
|
957
|
+
const unsigned int i = (const unsigned int) b;
|
958
|
+
if (i <= 0x20 || 0x7e < i) return false;
|
959
|
+
|
960
|
+
return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
|
961
|
+
}
|
962
|
+
|
963
|
+
static inline bool
|
964
|
+
token_is_setter_name(pm_token_t *token) {
|
965
|
+
return (
|
966
|
+
(token->type == PM_TOKEN_IDENTIFIER) &&
|
967
|
+
(token->end - token->start >= 2) &&
|
968
|
+
(token->end[-1] == '=')
|
969
|
+
);
|
970
|
+
}
|
971
|
+
|
873
972
|
/******************************************************************************/
|
874
973
|
/* Node flag handling functions */
|
875
974
|
/******************************************************************************/
|
@@ -1923,11 +2022,12 @@ pm_call_node_index_p(pm_call_node_t *node) {
|
|
1923
2022
|
* operator assignment.
|
1924
2023
|
*/
|
1925
2024
|
static inline bool
|
1926
|
-
pm_call_node_writable_p(pm_call_node_t *node) {
|
2025
|
+
pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
|
1927
2026
|
return (
|
1928
2027
|
(node->message_loc.start != NULL) &&
|
1929
2028
|
(node->message_loc.end[-1] != '!') &&
|
1930
2029
|
(node->message_loc.end[-1] != '?') &&
|
2030
|
+
char_is_identifier_start(parser, node->message_loc.start) &&
|
1931
2031
|
(node->opening_loc.start == NULL) &&
|
1932
2032
|
(node->arguments == NULL) &&
|
1933
2033
|
(node->block == NULL)
|
@@ -2744,19 +2844,21 @@ pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *targ
|
|
2744
2844
|
* Check if the receiver of a `def` node is allowed.
|
2745
2845
|
*/
|
2746
2846
|
static void
|
2747
|
-
|
2748
|
-
switch (
|
2847
|
+
pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
|
2848
|
+
switch (PM_NODE_TYPE(node)) {
|
2749
2849
|
case PM_BEGIN_NODE: {
|
2750
|
-
pm_begin_node_t *
|
2751
|
-
|
2850
|
+
const pm_begin_node_t *cast = (pm_begin_node_t *) node;
|
2851
|
+
if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements);
|
2752
2852
|
break;
|
2753
2853
|
}
|
2754
|
-
case PM_PARENTHESES_NODE:
|
2755
|
-
|
2854
|
+
case PM_PARENTHESES_NODE: {
|
2855
|
+
const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
|
2856
|
+
if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
|
2756
2857
|
break;
|
2858
|
+
}
|
2757
2859
|
case PM_STATEMENTS_NODE: {
|
2758
|
-
pm_statements_node_t *
|
2759
|
-
|
2860
|
+
const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
|
2861
|
+
pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
|
2760
2862
|
break;
|
2761
2863
|
}
|
2762
2864
|
case PM_ARRAY_NODE:
|
@@ -2775,7 +2877,10 @@ pm_check_def_receiver(pm_parser_t *parser, pm_node_t *receiver) {
|
|
2775
2877
|
case PM_STRING_NODE:
|
2776
2878
|
case PM_SYMBOL_NODE:
|
2777
2879
|
case PM_X_STRING_NODE:
|
2778
|
-
pm_parser_err_node(parser,
|
2880
|
+
pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
|
2881
|
+
break;
|
2882
|
+
default:
|
2883
|
+
break;
|
2779
2884
|
}
|
2780
2885
|
}
|
2781
2886
|
|
@@ -2807,7 +2912,7 @@ pm_def_node_create(
|
|
2807
2912
|
}
|
2808
2913
|
|
2809
2914
|
if ((receiver != NULL) && PM_NODE_TYPE_P(receiver, PM_PARENTHESES_NODE)) {
|
2810
|
-
|
2915
|
+
pm_def_node_receiver_check(parser, receiver);
|
2811
2916
|
}
|
2812
2917
|
|
2813
2918
|
*node = (pm_def_node_t) {
|
@@ -5330,7 +5435,7 @@ pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword)
|
|
5330
5435
|
.flags = PM_NODE_FLAG_STATIC_LITERAL,
|
5331
5436
|
.location = PM_LOCATION_TOKEN_VALUE(file_keyword),
|
5332
5437
|
},
|
5333
|
-
.filepath = parser->
|
5438
|
+
.filepath = parser->filepath
|
5334
5439
|
};
|
5335
5440
|
|
5336
5441
|
return node;
|
@@ -6220,6 +6325,16 @@ pm_parser_local_add_owned(pm_parser_t *parser, const uint8_t *start, size_t leng
|
|
6220
6325
|
return constant_id;
|
6221
6326
|
}
|
6222
6327
|
|
6328
|
+
/**
|
6329
|
+
* Add a local variable from a constant string to the current scope.
|
6330
|
+
*/
|
6331
|
+
static pm_constant_id_t
|
6332
|
+
pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
|
6333
|
+
pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
|
6334
|
+
if (constant_id != 0) pm_parser_local_add(parser, constant_id);
|
6335
|
+
return constant_id;
|
6336
|
+
}
|
6337
|
+
|
6223
6338
|
/**
|
6224
6339
|
* Add a parameter name to the current scope and check whether the name of the
|
6225
6340
|
* parameter is unique or not.
|
@@ -6259,105 +6374,6 @@ pm_parser_scope_pop(pm_parser_t *parser) {
|
|
6259
6374
|
free(scope);
|
6260
6375
|
}
|
6261
6376
|
|
6262
|
-
/******************************************************************************/
|
6263
|
-
/* Basic character checks */
|
6264
|
-
/******************************************************************************/
|
6265
|
-
|
6266
|
-
/**
|
6267
|
-
* This function is used extremely frequently to lex all of the identifiers in a
|
6268
|
-
* source file, so it's important that it be as fast as possible. For this
|
6269
|
-
* reason we have the encoding_changed boolean to check if we need to go through
|
6270
|
-
* the function pointer or can just directly use the UTF-8 functions.
|
6271
|
-
*/
|
6272
|
-
static inline size_t
|
6273
|
-
char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) {
|
6274
|
-
if (parser->encoding_changed) {
|
6275
|
-
size_t width;
|
6276
|
-
if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
|
6277
|
-
return width;
|
6278
|
-
} else if (*b == '_') {
|
6279
|
-
return 1;
|
6280
|
-
} else if (*b >= 0x80) {
|
6281
|
-
return parser->encoding->char_width(b, parser->end - b);
|
6282
|
-
} else {
|
6283
|
-
return 0;
|
6284
|
-
}
|
6285
|
-
} else if (*b < 0x80) {
|
6286
|
-
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
|
6287
|
-
} else {
|
6288
|
-
return pm_encoding_utf_8_char_width(b, parser->end - b);
|
6289
|
-
}
|
6290
|
-
}
|
6291
|
-
|
6292
|
-
/**
|
6293
|
-
* Similar to char_is_identifier but this function assumes that the encoding
|
6294
|
-
* has not been changed.
|
6295
|
-
*/
|
6296
|
-
static inline size_t
|
6297
|
-
char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
|
6298
|
-
if (*b < 0x80) {
|
6299
|
-
return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
|
6300
|
-
} else {
|
6301
|
-
return pm_encoding_utf_8_char_width(b, end - b);
|
6302
|
-
}
|
6303
|
-
}
|
6304
|
-
|
6305
|
-
/**
|
6306
|
-
* Like the above, this function is also used extremely frequently to lex all of
|
6307
|
-
* the identifiers in a source file once the first character has been found. So
|
6308
|
-
* it's important that it be as fast as possible.
|
6309
|
-
*/
|
6310
|
-
static inline size_t
|
6311
|
-
char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
|
6312
|
-
if (parser->encoding_changed) {
|
6313
|
-
size_t width;
|
6314
|
-
if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
|
6315
|
-
return width;
|
6316
|
-
} else if (*b == '_') {
|
6317
|
-
return 1;
|
6318
|
-
} else if (*b >= 0x80) {
|
6319
|
-
return parser->encoding->char_width(b, parser->end - b);
|
6320
|
-
} else {
|
6321
|
-
return 0;
|
6322
|
-
}
|
6323
|
-
}
|
6324
|
-
return char_is_identifier_utf8(b, parser->end);
|
6325
|
-
}
|
6326
|
-
|
6327
|
-
// Here we're defining a perfect hash for the characters that are allowed in
|
6328
|
-
// global names. This is used to quickly check the next character after a $ to
|
6329
|
-
// see if it's a valid character for a global name.
|
6330
|
-
#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
|
6331
|
-
#define PUNCT(idx) ( \
|
6332
|
-
BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
|
6333
|
-
BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
|
6334
|
-
BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
|
6335
|
-
BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
|
6336
|
-
BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
|
6337
|
-
BIT('0', idx))
|
6338
|
-
|
6339
|
-
const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
|
6340
|
-
|
6341
|
-
#undef BIT
|
6342
|
-
#undef PUNCT
|
6343
|
-
|
6344
|
-
static inline bool
|
6345
|
-
char_is_global_name_punctuation(const uint8_t b) {
|
6346
|
-
const unsigned int i = (const unsigned int) b;
|
6347
|
-
if (i <= 0x20 || 0x7e < i) return false;
|
6348
|
-
|
6349
|
-
return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
|
6350
|
-
}
|
6351
|
-
|
6352
|
-
static inline bool
|
6353
|
-
token_is_setter_name(pm_token_t *token) {
|
6354
|
-
return (
|
6355
|
-
(token->type == PM_TOKEN_IDENTIFIER) &&
|
6356
|
-
(token->end - token->start >= 2) &&
|
6357
|
-
(token->end[-1] == '=')
|
6358
|
-
);
|
6359
|
-
}
|
6360
|
-
|
6361
6377
|
/******************************************************************************/
|
6362
6378
|
/* Stack helpers */
|
6363
6379
|
/******************************************************************************/
|
@@ -7673,6 +7689,28 @@ escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte
|
|
7673
7689
|
pm_buffer_append_byte(buffer, byte);
|
7674
7690
|
}
|
7675
7691
|
|
7692
|
+
/**
|
7693
|
+
* Write each byte of the given escaped character into the buffer.
|
7694
|
+
*/
|
7695
|
+
static inline void
|
7696
|
+
escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
|
7697
|
+
size_t width;
|
7698
|
+
if (parser->encoding_changed) {
|
7699
|
+
width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
7700
|
+
} else {
|
7701
|
+
width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
|
7702
|
+
}
|
7703
|
+
|
7704
|
+
// TODO: If the character is invalid in the given encoding, then we'll just
|
7705
|
+
// push one byte into the buffer. This should actually be an error.
|
7706
|
+
width = (width == 0) ? 1 : width;
|
7707
|
+
|
7708
|
+
for (size_t index = 0; index < width; index++) {
|
7709
|
+
escape_write_byte_encoded(parser, buffer, *parser->current.end);
|
7710
|
+
parser->current.end++;
|
7711
|
+
}
|
7712
|
+
}
|
7713
|
+
|
7676
7714
|
/**
|
7677
7715
|
* The regular expression engine doesn't support the same escape sequences as
|
7678
7716
|
* Ruby does. So first we have to read the escape sequence, and then we have to
|
@@ -8011,7 +8049,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
|
|
8011
8049
|
/* fallthrough */
|
8012
8050
|
default: {
|
8013
8051
|
if (parser->current.end < parser->end) {
|
8014
|
-
|
8052
|
+
escape_write_escape_encoded(parser, buffer);
|
8015
8053
|
}
|
8016
8054
|
return;
|
8017
8055
|
}
|
@@ -8288,10 +8326,40 @@ typedef struct {
|
|
8288
8326
|
* Push the given byte into the token buffer.
|
8289
8327
|
*/
|
8290
8328
|
static inline void
|
8291
|
-
|
8329
|
+
pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
|
8292
8330
|
pm_buffer_append_byte(&token_buffer->buffer, byte);
|
8293
8331
|
}
|
8294
8332
|
|
8333
|
+
/**
|
8334
|
+
* Append the given bytes into the token buffer.
|
8335
|
+
*/
|
8336
|
+
static inline void
|
8337
|
+
pm_token_buffer_push_bytes(pm_token_buffer_t *token_buffer, const uint8_t *bytes, size_t length) {
|
8338
|
+
pm_buffer_append_bytes(&token_buffer->buffer, bytes, length);
|
8339
|
+
}
|
8340
|
+
|
8341
|
+
/**
|
8342
|
+
* Push an escaped character into the token buffer.
|
8343
|
+
*/
|
8344
|
+
static inline void
|
8345
|
+
pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
|
8346
|
+
// First, determine the width of the character to be escaped.
|
8347
|
+
size_t width;
|
8348
|
+
if (parser->encoding_changed) {
|
8349
|
+
width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
8350
|
+
} else {
|
8351
|
+
width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
|
8352
|
+
}
|
8353
|
+
|
8354
|
+
// TODO: If the character is invalid in the given encoding, then we'll just
|
8355
|
+
// push one byte into the buffer. This should actually be an error.
|
8356
|
+
width = (width == 0 ? 1 : width);
|
8357
|
+
|
8358
|
+
// Now, push the bytes into the buffer.
|
8359
|
+
pm_token_buffer_push_bytes(token_buffer, parser->current.end, width);
|
8360
|
+
parser->current.end += width;
|
8361
|
+
}
|
8362
|
+
|
8295
8363
|
/**
|
8296
8364
|
* When we're about to return from lexing the current token and we know for sure
|
8297
8365
|
* that we have found an escape sequence, this function is called to copy the
|
@@ -9704,18 +9772,18 @@ parser_lex(pm_parser_t *parser) {
|
|
9704
9772
|
case '\t':
|
9705
9773
|
case '\v':
|
9706
9774
|
case '\\':
|
9707
|
-
|
9775
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
9708
9776
|
parser->current.end++;
|
9709
9777
|
break;
|
9710
9778
|
case '\r':
|
9711
9779
|
parser->current.end++;
|
9712
9780
|
if (peek(parser) != '\n') {
|
9713
|
-
|
9781
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
9714
9782
|
break;
|
9715
9783
|
}
|
9716
9784
|
/* fallthrough */
|
9717
9785
|
case '\n':
|
9718
|
-
|
9786
|
+
pm_token_buffer_push_byte(&token_buffer, '\n');
|
9719
9787
|
|
9720
9788
|
if (parser->heredoc_end) {
|
9721
9789
|
// ... if we are on the same line as a heredoc,
|
@@ -9733,14 +9801,13 @@ parser_lex(pm_parser_t *parser) {
|
|
9733
9801
|
break;
|
9734
9802
|
default:
|
9735
9803
|
if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
|
9736
|
-
|
9804
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
9737
9805
|
parser->current.end++;
|
9738
9806
|
} else if (lex_mode->as.list.interpolation) {
|
9739
9807
|
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
|
9740
9808
|
} else {
|
9741
|
-
|
9742
|
-
|
9743
|
-
parser->current.end++;
|
9809
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
9810
|
+
pm_token_buffer_push_escaped(&token_buffer, parser);
|
9744
9811
|
}
|
9745
9812
|
|
9746
9813
|
break;
|
@@ -9898,9 +9965,9 @@ parser_lex(pm_parser_t *parser) {
|
|
9898
9965
|
parser->current.end++;
|
9899
9966
|
if (peek(parser) != '\n') {
|
9900
9967
|
if (lex_mode->as.regexp.terminator != '\r') {
|
9901
|
-
|
9968
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
9902
9969
|
}
|
9903
|
-
|
9970
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
9904
9971
|
break;
|
9905
9972
|
}
|
9906
9973
|
/* fallthrough */
|
@@ -9935,20 +10002,19 @@ parser_lex(pm_parser_t *parser) {
|
|
9935
10002
|
case '$': case ')': case '*': case '+':
|
9936
10003
|
case '.': case '>': case '?': case ']':
|
9937
10004
|
case '^': case '|': case '}':
|
9938
|
-
|
10005
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
9939
10006
|
break;
|
9940
10007
|
default:
|
9941
10008
|
break;
|
9942
10009
|
}
|
9943
10010
|
|
9944
|
-
|
10011
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
9945
10012
|
parser->current.end++;
|
9946
10013
|
break;
|
9947
10014
|
}
|
9948
10015
|
|
9949
|
-
if (peeked < 0x80)
|
9950
|
-
|
9951
|
-
parser->current.end++;
|
10016
|
+
if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer, '\\');
|
10017
|
+
pm_token_buffer_push_escaped(&token_buffer, parser);
|
9952
10018
|
break;
|
9953
10019
|
}
|
9954
10020
|
|
@@ -10115,23 +10181,23 @@ parser_lex(pm_parser_t *parser) {
|
|
10115
10181
|
|
10116
10182
|
switch (peeked) {
|
10117
10183
|
case '\\':
|
10118
|
-
|
10184
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10119
10185
|
parser->current.end++;
|
10120
10186
|
break;
|
10121
10187
|
case '\r':
|
10122
10188
|
parser->current.end++;
|
10123
10189
|
if (peek(parser) != '\n') {
|
10124
10190
|
if (!lex_mode->as.string.interpolation) {
|
10125
|
-
|
10191
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10126
10192
|
}
|
10127
|
-
|
10193
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
10128
10194
|
break;
|
10129
10195
|
}
|
10130
10196
|
/* fallthrough */
|
10131
10197
|
case '\n':
|
10132
10198
|
if (!lex_mode->as.string.interpolation) {
|
10133
|
-
|
10134
|
-
|
10199
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10200
|
+
pm_token_buffer_push_byte(&token_buffer, '\n');
|
10135
10201
|
}
|
10136
10202
|
|
10137
10203
|
if (parser->heredoc_end) {
|
@@ -10150,17 +10216,16 @@ parser_lex(pm_parser_t *parser) {
|
|
10150
10216
|
break;
|
10151
10217
|
default:
|
10152
10218
|
if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
|
10153
|
-
|
10219
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
10154
10220
|
parser->current.end++;
|
10155
10221
|
} else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
|
10156
|
-
|
10222
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
10157
10223
|
parser->current.end++;
|
10158
10224
|
} else if (lex_mode->as.string.interpolation) {
|
10159
10225
|
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
|
10160
10226
|
} else {
|
10161
|
-
|
10162
|
-
|
10163
|
-
parser->current.end++;
|
10227
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10228
|
+
pm_token_buffer_push_escaped(&token_buffer, parser);
|
10164
10229
|
}
|
10165
10230
|
|
10166
10231
|
break;
|
@@ -10417,21 +10482,20 @@ parser_lex(pm_parser_t *parser) {
|
|
10417
10482
|
case '\r':
|
10418
10483
|
parser->current.end++;
|
10419
10484
|
if (peek(parser) != '\n') {
|
10420
|
-
|
10421
|
-
|
10485
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10486
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
10422
10487
|
break;
|
10423
10488
|
}
|
10424
10489
|
/* fallthrough */
|
10425
10490
|
case '\n':
|
10426
|
-
|
10427
|
-
|
10491
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10492
|
+
pm_token_buffer_push_byte(&token_buffer, '\n');
|
10428
10493
|
token_buffer.cursor = parser->current.end + 1;
|
10429
10494
|
breakpoint = parser->current.end;
|
10430
10495
|
continue;
|
10431
10496
|
default:
|
10432
|
-
|
10433
|
-
|
10434
|
-
pm_token_buffer_push(&token_buffer, peeked);
|
10497
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10498
|
+
pm_token_buffer_push_escaped(&token_buffer, parser);
|
10435
10499
|
break;
|
10436
10500
|
}
|
10437
10501
|
} else {
|
@@ -10439,7 +10503,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10439
10503
|
case '\r':
|
10440
10504
|
parser->current.end++;
|
10441
10505
|
if (peek(parser) != '\n') {
|
10442
|
-
|
10506
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
10443
10507
|
break;
|
10444
10508
|
}
|
10445
10509
|
/* fallthrough */
|
@@ -10715,14 +10779,6 @@ match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
|
|
10715
10779
|
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
|
10716
10780
|
}
|
10717
10781
|
|
10718
|
-
/**
|
10719
|
-
* Returns true if the current token is any of the five given types.
|
10720
|
-
*/
|
10721
|
-
static inline bool
|
10722
|
-
match5(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5) {
|
10723
|
-
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5);
|
10724
|
-
}
|
10725
|
-
|
10726
10782
|
/**
|
10727
10783
|
* Returns true if the current token is any of the six given types.
|
10728
10784
|
*/
|
@@ -11359,7 +11415,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
11359
11415
|
break;
|
11360
11416
|
}
|
11361
11417
|
|
11362
|
-
// If we have a terminator, then we will parse all
|
11418
|
+
// If we have a terminator, then we will parse all consecutive terminators
|
11363
11419
|
// and then continue parsing the statements list.
|
11364
11420
|
if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
|
11365
11421
|
// If we have a terminator, then we will continue parsing the statements
|
@@ -13149,6 +13205,15 @@ outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
|
|
13149
13205
|
return false;
|
13150
13206
|
}
|
13151
13207
|
|
13208
|
+
/**
|
13209
|
+
* These are the names of the various numbered parameters. We have them here so
|
13210
|
+
* that when we insert them into the constant pool we can use a constant string
|
13211
|
+
* and not have to allocate.
|
13212
|
+
*/
|
13213
|
+
static const char * const pm_numbered_parameter_names[] = {
|
13214
|
+
"_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
|
13215
|
+
};
|
13216
|
+
|
13152
13217
|
/**
|
13153
13218
|
* Parse an identifier into either a local variable read. If the local variable
|
13154
13219
|
* is not found, it returns NULL instead.
|
@@ -13171,12 +13236,10 @@ parse_variable(pm_parser_t *parser) {
|
|
13171
13236
|
pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
|
13172
13237
|
} else {
|
13173
13238
|
// Indicate that this scope is using numbered params so that child
|
13174
|
-
// scopes cannot.
|
13175
|
-
|
13176
|
-
|
13177
|
-
|
13178
|
-
// integer value of the number (only _1 through _9 are valid)
|
13179
|
-
uint8_t numbered_parameters = (uint8_t) (number - '0');
|
13239
|
+
// scopes cannot. We subtract the value for the character '0' to get
|
13240
|
+
// the actual integer value of the number (only _1 through _9 are
|
13241
|
+
// valid).
|
13242
|
+
uint8_t numbered_parameters = (uint8_t) (parser->previous.start[1] - '0');
|
13180
13243
|
if (numbered_parameters > parser->current_scope->numbered_parameters) {
|
13181
13244
|
parser->current_scope->numbered_parameters = numbered_parameters;
|
13182
13245
|
pm_parser_numbered_parameters_set(parser, numbered_parameters);
|
@@ -13187,21 +13250,13 @@ parse_variable(pm_parser_t *parser) {
|
|
13187
13250
|
// referencing _2 means that _1 must exist. Therefore here we
|
13188
13251
|
// loop through all of the possibilities and add them into the
|
13189
13252
|
// constant pool.
|
13190
|
-
uint8_t
|
13191
|
-
|
13192
|
-
|
13193
|
-
while (current < number) {
|
13194
|
-
value = malloc(2);
|
13195
|
-
value[0] = '_';
|
13196
|
-
value[1] = current++;
|
13197
|
-
pm_parser_local_add_owned(parser, value, 2);
|
13253
|
+
for (uint8_t numbered_parameter = 1; numbered_parameter <= numbered_parameters - 1; numbered_parameter++) {
|
13254
|
+
pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameter - 1], 2);
|
13198
13255
|
}
|
13199
13256
|
|
13200
|
-
//
|
13201
|
-
|
13202
|
-
|
13203
|
-
pm_parser_local_add_token(parser, &parser->previous);
|
13204
|
-
return pm_local_variable_read_node_create(parser, &parser->previous, 0);
|
13257
|
+
// Finally we can create the local variable read node.
|
13258
|
+
pm_constant_id_t name_id = pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameters - 1], 2);
|
13259
|
+
return pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
|
13205
13260
|
}
|
13206
13261
|
}
|
13207
13262
|
|
@@ -14010,7 +14065,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
|
|
14010
14065
|
// Gather up all of the patterns into the list.
|
14011
14066
|
while (accept1(parser, PM_TOKEN_COMMA)) {
|
14012
14067
|
// Break early here in case we have a trailing comma.
|
14013
|
-
if (
|
14068
|
+
if (match6(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
|
14014
14069
|
node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
|
14015
14070
|
pm_node_list_append(&nodes, node);
|
14016
14071
|
break;
|
@@ -16927,7 +16982,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
16927
16982
|
}
|
16928
16983
|
|
16929
16984
|
// If this node cannot be writable, then we have an error.
|
16930
|
-
if (pm_call_node_writable_p(cast)) {
|
16985
|
+
if (pm_call_node_writable_p(parser, cast)) {
|
16931
16986
|
parse_write_name(parser, &cast->name);
|
16932
16987
|
} else {
|
16933
16988
|
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
@@ -17038,7 +17093,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
17038
17093
|
}
|
17039
17094
|
|
17040
17095
|
// If this node cannot be writable, then we have an error.
|
17041
|
-
if (pm_call_node_writable_p(cast)) {
|
17096
|
+
if (pm_call_node_writable_p(parser, cast)) {
|
17042
17097
|
parse_write_name(parser, &cast->name);
|
17043
17098
|
} else {
|
17044
17099
|
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
@@ -17159,7 +17214,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
17159
17214
|
}
|
17160
17215
|
|
17161
17216
|
// If this node cannot be writable, then we have an error.
|
17162
|
-
if (pm_call_node_writable_p(cast)) {
|
17217
|
+
if (pm_call_node_writable_p(parser, cast)) {
|
17163
17218
|
parse_write_name(parser, &cast->name);
|
17164
17219
|
} else {
|
17165
17220
|
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
@@ -17751,7 +17806,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
17751
17806
|
.encoding_changed_callback = NULL,
|
17752
17807
|
.encoding_comment_start = source,
|
17753
17808
|
.lex_callback = NULL,
|
17754
|
-
.
|
17809
|
+
.filepath = { 0 },
|
17755
17810
|
.constant_pool = { 0 },
|
17756
17811
|
.newline_list = { 0 },
|
17757
17812
|
.integer_base = 0,
|
@@ -17794,7 +17849,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
17794
17849
|
// If options were provided to this parse, establish them here.
|
17795
17850
|
if (options != NULL) {
|
17796
17851
|
// filepath option
|
17797
|
-
parser->
|
17852
|
+
parser->filepath = options->filepath;
|
17798
17853
|
|
17799
17854
|
// line option
|
17800
17855
|
parser->start_line = options->line;
|
@@ -17896,7 +17951,7 @@ pm_magic_comment_list_free(pm_list_t *list) {
|
|
17896
17951
|
*/
|
17897
17952
|
PRISM_EXPORTED_FUNCTION void
|
17898
17953
|
pm_parser_free(pm_parser_t *parser) {
|
17899
|
-
pm_string_free(&parser->
|
17954
|
+
pm_string_free(&parser->filepath);
|
17900
17955
|
pm_diagnostic_list_free(&parser->error_list);
|
17901
17956
|
pm_diagnostic_list_free(&parser->warning_list);
|
17902
17957
|
pm_comment_list_free(&parser->comment_list);
|
@@ -18060,7 +18115,9 @@ pm_parser_errors_format_sort(const pm_list_t *error_list, const pm_newline_list_
|
|
18060
18115
|
|
18061
18116
|
// Now we're going to shift all of the errors after this one down one
|
18062
18117
|
// index to make room for the new error.
|
18063
|
-
|
18118
|
+
if (index + 1 < error_list->size) {
|
18119
|
+
memmove(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
|
18120
|
+
}
|
18064
18121
|
|
18065
18122
|
// Finally, we'll insert the error into the array.
|
18066
18123
|
uint32_t column_end;
|
data/src/util/pm_constant_pool.c
CHANGED
@@ -181,6 +181,31 @@ pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t
|
|
181
181
|
return &pool->constants[constant_id - 1];
|
182
182
|
}
|
183
183
|
|
184
|
+
/**
|
185
|
+
* Find a constant in a constant pool. Returns the id of the constant, or 0 if
|
186
|
+
* the constant is not found.
|
187
|
+
*/
|
188
|
+
pm_constant_id_t
|
189
|
+
pm_constant_pool_find(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
190
|
+
assert(is_power_of_two(pool->capacity));
|
191
|
+
const uint32_t mask = pool->capacity - 1;
|
192
|
+
|
193
|
+
uint32_t hash = pm_constant_pool_hash(start, length);
|
194
|
+
uint32_t index = hash & mask;
|
195
|
+
pm_constant_pool_bucket_t *bucket;
|
196
|
+
|
197
|
+
while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
|
198
|
+
pm_constant_t *constant = &pool->constants[bucket->id - 1];
|
199
|
+
if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
|
200
|
+
return bucket->id;
|
201
|
+
}
|
202
|
+
|
203
|
+
index = (index + 1) & mask;
|
204
|
+
}
|
205
|
+
|
206
|
+
return PM_CONSTANT_ID_UNSET;
|
207
|
+
}
|
208
|
+
|
184
209
|
/**
|
185
210
|
* Insert a constant into a constant pool and return its index in the pool.
|
186
211
|
*/
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: prism
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.21.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shopify
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-02-
|
11
|
+
date: 2024-02-05 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email:
|