prism 0.20.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -2
- data/docs/parser_translation.md +1 -1
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +1 -1
- data/include/prism/parser.h +1 -1
- data/include/prism/util/pm_constant_pool.h +11 -0
- data/include/prism/version.h +2 -2
- data/lib/prism/serialize.rb +1 -1
- data/lib/prism/translation/parser/compiler.rb +88 -91
- data/lib/prism/translation/parser.rb +19 -11
- data/prism.gemspec +1 -1
- data/src/encoding.c +1 -1
- data/src/prism.c +238 -181
- data/src/util/pm_constant_pool.c +25 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b4d054a1268bf7f8b5947f30ad244c4713c850911e79c1ba469eca0ac36bc47c
|
4
|
+
data.tar.gz: b77e29c93584b79759381d75cfb5ad0753fe8d5f92863cada81895bb67f17572
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 00fa781d854c4f9b716b238c392e48f3bd946b52a5ea100c8fa98bd909bd7d2fcd116b80c7877cbfff59bb991d7c78158ded3ff4154d7d3362df3b8c00fd4d08
|
7
|
+
data.tar.gz: cfea37b3aa825f0bb91a0bd19dec1ec72187790aca39a2b8d560a483d83c1f4604346320d071c93cd605f39c1fd975b1b508395d9673d7bf95c16feaeeee52e6
|
data/CHANGELOG.md
CHANGED
@@ -6,7 +6,25 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
|
|
6
6
|
|
7
7
|
## [Unreleased]
|
8
8
|
|
9
|
-
## [0.
|
9
|
+
## [0.21.0] - 2024-02-05
|
10
|
+
|
11
|
+
### Added
|
12
|
+
|
13
|
+
- Add the `pm_constant_pool_find` API for finding a constant.
|
14
|
+
|
15
|
+
### Changed
|
16
|
+
|
17
|
+
- Fixes for `Prism::Translation::Parser`.
|
18
|
+
- Ensure all errors flow through `parser.diagnostics.process`.
|
19
|
+
- Fix the find pattern node.
|
20
|
+
- Fix block forwarding with `NumberedParametersNode`.
|
21
|
+
- Ensure we can parse strings with invalid bytes for the encoding.
|
22
|
+
- Fix hash pairs in pattern matching.
|
23
|
+
- Properly reject operator writes on operator calls, e.g., `a.+ -= b`.
|
24
|
+
- Fix multi-byte escapes.
|
25
|
+
- Handle missing body in `begin` within the receiver of a method call.
|
26
|
+
|
27
|
+
## [0.20.0] - 2024-02-01
|
10
28
|
|
11
29
|
### Added
|
12
30
|
|
@@ -323,7 +341,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
|
|
323
341
|
|
324
342
|
- 🎉 Initial release! 🎉
|
325
343
|
|
326
|
-
[unreleased]: https://github.com/ruby/prism/compare/v0.
|
344
|
+
[unreleased]: https://github.com/ruby/prism/compare/v0.21.0...HEAD
|
345
|
+
[0.21.0]: https://github.com/ruby/prism/compare/v0.20.0...v0.21.0
|
327
346
|
[0.20.0]: https://github.com/ruby/prism/compare/v0.19.0...v0.20.0
|
328
347
|
[0.19.0]: https://github.com/ruby/prism/compare/v0.18.0...v0.19.0
|
329
348
|
[0.18.0]: https://github.com/ruby/prism/compare/v0.17.1...v0.18.0
|
data/docs/parser_translation.md
CHANGED
@@ -9,7 +9,7 @@ The `parser` gem provides multiple parsers to support different versions of the
|
|
9
9
|
You can use the `prism` parser like you would any other. After requiring the parser, you should be able to call any of the regular `Parser::Base` APIs that you would normally use.
|
10
10
|
|
11
11
|
```ruby
|
12
|
-
require "prism
|
12
|
+
require "prism"
|
13
13
|
|
14
14
|
Prism::Translation::Parser.parse_file("path/to/file.rb")
|
15
15
|
```
|
data/ext/prism/extension.h
CHANGED
data/include/prism/ast.h
CHANGED
@@ -1042,7 +1042,7 @@ static const pm_node_flags_t PM_NODE_FLAG_COMMON_MASK = (1 << (PM_NODE_FLAG_BITS
|
|
1042
1042
|
* Cast the type to an enum to allow the compiler to provide exhaustiveness
|
1043
1043
|
* checking.
|
1044
1044
|
*/
|
1045
|
-
#define PM_NODE_TYPE(node) ((enum pm_node_type) node->type)
|
1045
|
+
#define PM_NODE_TYPE(node) ((enum pm_node_type) (node)->type)
|
1046
1046
|
|
1047
1047
|
/**
|
1048
1048
|
* Return true if the type of the given node matches the given type.
|
data/include/prism/parser.h
CHANGED
@@ -626,7 +626,7 @@ struct pm_parser {
|
|
626
626
|
* This is the path of the file being parsed. We use the filepath when
|
627
627
|
* constructing SourceFileNodes.
|
628
628
|
*/
|
629
|
-
pm_string_t
|
629
|
+
pm_string_t filepath;
|
630
630
|
|
631
631
|
/**
|
632
632
|
* This constant pool keeps all of the constants defined throughout the file
|
@@ -154,6 +154,17 @@ bool pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity);
|
|
154
154
|
*/
|
155
155
|
pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id);
|
156
156
|
|
157
|
+
/**
|
158
|
+
* Find a constant in a constant pool. Returns the id of the constant, or 0 if
|
159
|
+
* the constant is not found.
|
160
|
+
*
|
161
|
+
* @param pool The pool to find the constant in.
|
162
|
+
* @param start A pointer to the start of the constant.
|
163
|
+
* @param length The length of the constant.
|
164
|
+
* @return The id of the constant.
|
165
|
+
*/
|
166
|
+
pm_constant_id_t pm_constant_pool_find(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
|
167
|
+
|
157
168
|
/**
|
158
169
|
* Insert a constant into a constant pool that is a slice of a source string.
|
159
170
|
* Returns the id of the constant, or 0 if any potential calls to resize fail.
|
data/include/prism/version.h
CHANGED
@@ -14,7 +14,7 @@
|
|
14
14
|
/**
|
15
15
|
* The minor version of the Prism library as an int.
|
16
16
|
*/
|
17
|
-
#define PRISM_VERSION_MINOR
|
17
|
+
#define PRISM_VERSION_MINOR 21
|
18
18
|
|
19
19
|
/**
|
20
20
|
* The patch version of the Prism library as an int.
|
@@ -24,6 +24,6 @@
|
|
24
24
|
/**
|
25
25
|
* The version of the Prism library as a constant string.
|
26
26
|
*/
|
27
|
-
#define PRISM_VERSION "0.
|
27
|
+
#define PRISM_VERSION "0.21.0"
|
28
28
|
|
29
29
|
#endif
|
data/lib/prism/serialize.rb
CHANGED
@@ -105,14 +105,18 @@ module Prism
|
|
105
105
|
# { a: 1 }
|
106
106
|
# ^^^^
|
107
107
|
def visit_assoc_node(node)
|
108
|
-
if
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
108
|
+
if in_pattern
|
109
|
+
if node.value.is_a?(ImplicitNode)
|
110
|
+
if node.key.is_a?(SymbolNode)
|
111
|
+
builder.match_hash_var([node.key.unescaped, srange(node.key.location)])
|
112
|
+
else
|
113
|
+
builder.match_hash_var_from_str(token(node.key.opening_loc), visit_all(node.key.parts), token(node.key.closing_loc))
|
114
|
+
end
|
113
115
|
else
|
114
|
-
builder.
|
116
|
+
builder.pair_keyword([node.key.unescaped, srange(node.key.location)], visit(node.value))
|
115
117
|
end
|
118
|
+
elsif node.value.is_a?(ImplicitNode)
|
119
|
+
builder.pair_label([node.key.unescaped, srange(node.key.location)])
|
116
120
|
elsif node.operator_loc
|
117
121
|
builder.pair(visit(node.key), token(node.operator_loc), visit(node.value))
|
118
122
|
elsif node.key.is_a?(SymbolNode) && node.key.opening_loc.nil?
|
@@ -241,53 +245,51 @@ module Prism
|
|
241
245
|
block = nil
|
242
246
|
end
|
243
247
|
|
248
|
+
if node.call_operator_loc.nil?
|
249
|
+
case name
|
250
|
+
when :!
|
251
|
+
return visit_block(builder.not_op(token(node.message_loc), token(node.opening_loc), visit(node.receiver), token(node.closing_loc)), block)
|
252
|
+
when :[]
|
253
|
+
return visit_block(builder.index(visit(node.receiver), token(node.opening_loc), visit_all(arguments), token(node.closing_loc)), block)
|
254
|
+
when :[]=
|
255
|
+
if node.message != "[]=" && node.arguments && block.nil? && !node.safe_navigation?
|
256
|
+
return visit_block(
|
257
|
+
builder.assign(
|
258
|
+
builder.index_asgn(
|
259
|
+
visit(node.receiver),
|
260
|
+
token(node.opening_loc),
|
261
|
+
visit_all(node.arguments.arguments[...-1]),
|
262
|
+
token(node.closing_loc),
|
263
|
+
),
|
264
|
+
srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset, ["="]),
|
265
|
+
visit(node.arguments.arguments.last)
|
266
|
+
),
|
267
|
+
block
|
268
|
+
)
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
message_loc = node.message_loc
|
274
|
+
call_operator_loc = node.call_operator_loc
|
275
|
+
call_operator = [{ "." => :dot, "&." => :anddot, "::" => "::" }.fetch(call_operator_loc.slice), srange(call_operator_loc)] if call_operator_loc
|
276
|
+
|
244
277
|
visit_block(
|
245
|
-
if name
|
246
|
-
builder.
|
247
|
-
|
248
|
-
|
249
|
-
visit(node.
|
250
|
-
token(node.closing_loc)
|
278
|
+
if name.end_with?("=") && !message_loc.slice.end_with?("=") && node.arguments && block.nil?
|
279
|
+
builder.assign(
|
280
|
+
builder.attr_asgn(visit(node.receiver), call_operator, token(message_loc)),
|
281
|
+
srange_find(message_loc.end_offset, node.arguments.location.start_offset, ["="]),
|
282
|
+
visit(node.arguments.arguments.last)
|
251
283
|
)
|
252
|
-
|
253
|
-
builder.
|
284
|
+
else
|
285
|
+
builder.call_method(
|
254
286
|
visit(node.receiver),
|
287
|
+
call_operator,
|
288
|
+
message_loc ? [node.name, srange(message_loc)] : nil,
|
255
289
|
token(node.opening_loc),
|
256
290
|
visit_all(arguments),
|
257
291
|
token(node.closing_loc)
|
258
292
|
)
|
259
|
-
elsif name == :[]= && node.message != "[]=" && node.arguments && block.nil?
|
260
|
-
builder.assign(
|
261
|
-
builder.index_asgn(
|
262
|
-
visit(node.receiver),
|
263
|
-
token(node.opening_loc),
|
264
|
-
visit_all(node.arguments.arguments[...-1]),
|
265
|
-
token(node.closing_loc),
|
266
|
-
),
|
267
|
-
srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset, ["="]),
|
268
|
-
visit(node.arguments.arguments.last)
|
269
|
-
)
|
270
|
-
else
|
271
|
-
message_loc = node.message_loc
|
272
|
-
call_operator_loc = node.call_operator_loc
|
273
|
-
call_operator = [{ "." => :dot, "&." => :anddot, "::" => "::" }.fetch(call_operator_loc.slice), srange(call_operator_loc)] if call_operator_loc
|
274
|
-
|
275
|
-
if name.end_with?("=") && !message_loc.slice.end_with?("=") && node.arguments && block.nil?
|
276
|
-
builder.assign(
|
277
|
-
builder.attr_asgn(visit(node.receiver), call_operator, token(message_loc)),
|
278
|
-
srange_find(message_loc.end_offset, node.arguments.location.start_offset, ["="]),
|
279
|
-
visit(node.arguments.arguments.last)
|
280
|
-
)
|
281
|
-
else
|
282
|
-
builder.call_method(
|
283
|
-
visit(node.receiver),
|
284
|
-
call_operator,
|
285
|
-
message_loc ? [node.name, srange(message_loc)] : nil,
|
286
|
-
token(node.opening_loc),
|
287
|
-
visit_all(arguments),
|
288
|
-
token(node.closing_loc)
|
289
|
-
)
|
290
|
-
end
|
291
293
|
end,
|
292
294
|
block
|
293
295
|
)
|
@@ -519,8 +521,6 @@ module Prism
|
|
519
521
|
# def self.foo; end
|
520
522
|
# ^^^^^^^^^^^^^^^^^
|
521
523
|
def visit_def_node(node)
|
522
|
-
forwarding = find_forwarding(node.parameters)
|
523
|
-
|
524
524
|
if node.equal_loc
|
525
525
|
if node.receiver
|
526
526
|
builder.def_endless_singleton(
|
@@ -530,7 +530,7 @@ module Prism
|
|
530
530
|
token(node.name_loc),
|
531
531
|
builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false),
|
532
532
|
token(node.equal_loc),
|
533
|
-
node.body&.accept(copy_compiler(forwarding:
|
533
|
+
node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters)))
|
534
534
|
)
|
535
535
|
else
|
536
536
|
builder.def_endless_method(
|
@@ -538,7 +538,7 @@ module Prism
|
|
538
538
|
token(node.name_loc),
|
539
539
|
builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false),
|
540
540
|
token(node.equal_loc),
|
541
|
-
node.body&.accept(copy_compiler(forwarding:
|
541
|
+
node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters)))
|
542
542
|
)
|
543
543
|
end
|
544
544
|
elsif node.receiver
|
@@ -548,7 +548,7 @@ module Prism
|
|
548
548
|
token(node.operator_loc),
|
549
549
|
token(node.name_loc),
|
550
550
|
builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false),
|
551
|
-
node.body&.accept(copy_compiler(forwarding:
|
551
|
+
node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters))),
|
552
552
|
token(node.end_keyword_loc)
|
553
553
|
)
|
554
554
|
else
|
@@ -556,7 +556,7 @@ module Prism
|
|
556
556
|
token(node.def_keyword_loc),
|
557
557
|
token(node.name_loc),
|
558
558
|
builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false),
|
559
|
-
node.body&.accept(copy_compiler(forwarding:
|
559
|
+
node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters))),
|
560
560
|
token(node.end_keyword_loc)
|
561
561
|
)
|
562
562
|
end
|
@@ -614,9 +614,7 @@ module Prism
|
|
614
614
|
# foo => [*, bar, *]
|
615
615
|
# ^^^^^^^^^^^
|
616
616
|
def visit_find_pattern_node(node)
|
617
|
-
elements = [*node.requireds]
|
618
|
-
elements << node.rest if !node.rest.nil? && !node.rest.is_a?(ImplicitRestNode)
|
619
|
-
elements.concat(node.posts)
|
617
|
+
elements = [node.left, *node.requireds, node.right]
|
620
618
|
|
621
619
|
if node.constant
|
622
620
|
builder.const_pattern(visit(node.constant), token(node.opening_loc), builder.find_pattern(nil, visit_all(elements), nil), token(node.closing_loc))
|
@@ -993,24 +991,24 @@ module Prism
|
|
993
991
|
|
994
992
|
# -> {}
|
995
993
|
def visit_lambda_node(node)
|
994
|
+
parameters = node.parameters
|
995
|
+
|
996
996
|
builder.block(
|
997
997
|
builder.call_lambda(token(node.operator_loc)),
|
998
998
|
[node.opening, srange(node.opening_loc)],
|
999
|
-
if
|
1000
|
-
if node.parameters.is_a?(NumberedParametersNode)
|
1001
|
-
visit(node.parameters)
|
1002
|
-
else
|
1003
|
-
builder.args(
|
1004
|
-
token(node.parameters.opening_loc),
|
1005
|
-
visit(node.parameters),
|
1006
|
-
token(node.parameters.closing_loc),
|
1007
|
-
false
|
1008
|
-
)
|
1009
|
-
end
|
1010
|
-
else
|
999
|
+
if parameters.nil?
|
1011
1000
|
builder.args(nil, [], nil, false)
|
1001
|
+
elsif node.parameters.is_a?(NumberedParametersNode)
|
1002
|
+
visit(node.parameters)
|
1003
|
+
else
|
1004
|
+
builder.args(
|
1005
|
+
token(node.parameters.opening_loc),
|
1006
|
+
visit(node.parameters),
|
1007
|
+
token(node.parameters.closing_loc),
|
1008
|
+
false
|
1009
|
+
)
|
1012
1010
|
end,
|
1013
|
-
node.body&.accept(copy_compiler(forwarding: find_forwarding(
|
1011
|
+
node.body&.accept(copy_compiler(forwarding: parameters.is_a?(NumberedParametersNode) ? [] : find_forwarding(parameters&.parameters))),
|
1014
1012
|
[node.closing, srange(node.closing_loc)]
|
1015
1013
|
)
|
1016
1014
|
end
|
@@ -1096,7 +1094,7 @@ module Prism
|
|
1096
1094
|
# case of a syntax error. The parser gem doesn't have such a concept, so
|
1097
1095
|
# we invent our own here.
|
1098
1096
|
def visit_missing_node(node)
|
1099
|
-
|
1097
|
+
::AST::Node.new(:missing, [], location: ::Parser::Source::Map.new(srange(node.location)))
|
1100
1098
|
end
|
1101
1099
|
|
1102
1100
|
# module Foo; end
|
@@ -1727,29 +1725,29 @@ module Prism
|
|
1727
1725
|
# Visit a block node on a call.
|
1728
1726
|
def visit_block(call, block)
|
1729
1727
|
if block
|
1728
|
+
parameters = block.parameters
|
1729
|
+
|
1730
1730
|
builder.block(
|
1731
1731
|
call,
|
1732
1732
|
token(block.opening_loc),
|
1733
|
-
if
|
1734
|
-
if parameters.is_a?(NumberedParametersNode)
|
1735
|
-
visit(parameters)
|
1736
|
-
else
|
1737
|
-
builder.args(
|
1738
|
-
token(parameters.opening_loc),
|
1739
|
-
if procarg0?(parameters.parameters)
|
1740
|
-
parameter = parameters.parameters.requireds.first
|
1741
|
-
[builder.procarg0(visit(parameter))].concat(visit_all(parameters.locals))
|
1742
|
-
else
|
1743
|
-
visit(parameters)
|
1744
|
-
end,
|
1745
|
-
token(parameters.closing_loc),
|
1746
|
-
false
|
1747
|
-
)
|
1748
|
-
end
|
1749
|
-
else
|
1733
|
+
if parameters.nil?
|
1750
1734
|
builder.args(nil, [], nil, false)
|
1735
|
+
elsif parameters.is_a?(NumberedParametersNode)
|
1736
|
+
visit(parameters)
|
1737
|
+
else
|
1738
|
+
builder.args(
|
1739
|
+
token(parameters.opening_loc),
|
1740
|
+
if procarg0?(parameters.parameters)
|
1741
|
+
parameter = parameters.parameters.requireds.first
|
1742
|
+
[builder.procarg0(visit(parameter))].concat(visit_all(parameters.locals))
|
1743
|
+
else
|
1744
|
+
visit(parameters)
|
1745
|
+
end,
|
1746
|
+
token(parameters.closing_loc),
|
1747
|
+
false
|
1748
|
+
)
|
1751
1749
|
end,
|
1752
|
-
block.body&.accept(copy_compiler(forwarding: find_forwarding(
|
1750
|
+
block.body&.accept(copy_compiler(forwarding: parameters.is_a?(NumberedParametersNode) ? [] : find_forwarding(parameters&.parameters))),
|
1753
1751
|
token(block.closing_loc)
|
1754
1752
|
)
|
1755
1753
|
else
|
@@ -1762,9 +1760,9 @@ module Prism
|
|
1762
1760
|
children = []
|
1763
1761
|
node.parts.each do |part|
|
1764
1762
|
pushing =
|
1765
|
-
if part.is_a?(StringNode) && part.unescaped.
|
1766
|
-
unescaped = part.unescaped.
|
1767
|
-
escaped = part.content.
|
1763
|
+
if part.is_a?(StringNode) && part.unescaped.include?("\n")
|
1764
|
+
unescaped = part.unescaped.lines(chomp: true)
|
1765
|
+
escaped = part.content.lines(chomp: true)
|
1768
1766
|
|
1769
1767
|
escaped_lengths =
|
1770
1768
|
if node.opening.end_with?("'")
|
@@ -1779,7 +1777,6 @@ module Prism
|
|
1779
1777
|
unescaped.zip(escaped_lengths).map do |unescaped_line, escaped_length|
|
1780
1778
|
end_offset = start_offset + (escaped_length || 0)
|
1781
1779
|
inner_part = builder.string_internal(["#{unescaped_line}\n", srange_offsets(start_offset, end_offset)])
|
1782
|
-
|
1783
1780
|
start_offset = end_offset
|
1784
1781
|
inner_part
|
1785
1782
|
end
|
@@ -26,7 +26,7 @@ module Prism
|
|
26
26
|
Racc_debug_parser = false # :nodoc:
|
27
27
|
|
28
28
|
def version # :nodoc:
|
29
|
-
|
29
|
+
34
|
30
30
|
end
|
31
31
|
|
32
32
|
# The default encoding for Ruby files is UTF-8.
|
@@ -42,9 +42,10 @@ module Prism
|
|
42
42
|
@source_buffer = source_buffer
|
43
43
|
source = source_buffer.source
|
44
44
|
|
45
|
-
|
45
|
+
offset_cache = build_offset_cache(source)
|
46
|
+
result = unwrap(Prism.parse(source, filepath: source_buffer.name), offset_cache)
|
46
47
|
|
47
|
-
build_ast(result.value,
|
48
|
+
build_ast(result.value, offset_cache)
|
48
49
|
ensure
|
49
50
|
@source_buffer = nil
|
50
51
|
end
|
@@ -55,7 +56,7 @@ module Prism
|
|
55
56
|
source = source_buffer.source
|
56
57
|
|
57
58
|
offset_cache = build_offset_cache(source)
|
58
|
-
result = unwrap(Prism.parse(source, filepath: source_buffer.name))
|
59
|
+
result = unwrap(Prism.parse(source, filepath: source_buffer.name), offset_cache)
|
59
60
|
|
60
61
|
[
|
61
62
|
build_ast(result.value, offset_cache),
|
@@ -72,7 +73,7 @@ module Prism
|
|
72
73
|
source = source_buffer.source
|
73
74
|
|
74
75
|
offset_cache = build_offset_cache(source)
|
75
|
-
result = unwrap(Prism.parse_lex(source, filepath: source_buffer.name))
|
76
|
+
result = unwrap(Prism.parse_lex(source, filepath: source_buffer.name), offset_cache)
|
76
77
|
|
77
78
|
program, tokens = result.value
|
78
79
|
|
@@ -93,16 +94,23 @@ module Prism
|
|
93
94
|
|
94
95
|
private
|
95
96
|
|
97
|
+
# This is a hook to allow consumers to disable some errors if they don't
|
98
|
+
# want them to block creating the syntax tree.
|
99
|
+
def valid_error?(error)
|
100
|
+
true
|
101
|
+
end
|
102
|
+
|
96
103
|
# If there was a error generated during the parse, then raise an
|
97
104
|
# appropriate syntax error. Otherwise return the result.
|
98
|
-
def unwrap(result)
|
99
|
-
|
105
|
+
def unwrap(result, offset_cache)
|
106
|
+
result.errors.each do |error|
|
107
|
+
next unless valid_error?(error)
|
100
108
|
|
101
|
-
|
102
|
-
|
109
|
+
location = build_range(error.location, offset_cache)
|
110
|
+
diagnostics.process(Diagnostic.new(error.message, location))
|
111
|
+
end
|
103
112
|
|
104
|
-
|
105
|
-
raise ::Parser::SyntaxError, diagnostic
|
113
|
+
result
|
106
114
|
end
|
107
115
|
|
108
116
|
# Prism deals with offsets in bytes, while the parser gem deals with
|
data/prism.gemspec
CHANGED
data/src/encoding.c
CHANGED
@@ -2252,7 +2252,7 @@ static const uint8_t pm_utf_8_dfa[] = {
|
|
2252
2252
|
*/
|
2253
2253
|
static pm_unicode_codepoint_t
|
2254
2254
|
pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
|
2255
|
-
assert(n >=
|
2255
|
+
assert(n >= 0);
|
2256
2256
|
size_t maximum = (size_t) n;
|
2257
2257
|
|
2258
2258
|
uint32_t codepoint;
|
data/src/prism.c
CHANGED
@@ -870,6 +870,105 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
|
|
870
870
|
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
|
871
871
|
}
|
872
872
|
|
873
|
+
/******************************************************************************/
|
874
|
+
/* Basic character checks */
|
875
|
+
/******************************************************************************/
|
876
|
+
|
877
|
+
/**
|
878
|
+
* This function is used extremely frequently to lex all of the identifiers in a
|
879
|
+
* source file, so it's important that it be as fast as possible. For this
|
880
|
+
* reason we have the encoding_changed boolean to check if we need to go through
|
881
|
+
* the function pointer or can just directly use the UTF-8 functions.
|
882
|
+
*/
|
883
|
+
static inline size_t
|
884
|
+
char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b) {
|
885
|
+
if (parser->encoding_changed) {
|
886
|
+
size_t width;
|
887
|
+
if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
|
888
|
+
return width;
|
889
|
+
} else if (*b == '_') {
|
890
|
+
return 1;
|
891
|
+
} else if (*b >= 0x80) {
|
892
|
+
return parser->encoding->char_width(b, parser->end - b);
|
893
|
+
} else {
|
894
|
+
return 0;
|
895
|
+
}
|
896
|
+
} else if (*b < 0x80) {
|
897
|
+
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
|
898
|
+
} else {
|
899
|
+
return pm_encoding_utf_8_char_width(b, parser->end - b);
|
900
|
+
}
|
901
|
+
}
|
902
|
+
|
903
|
+
/**
|
904
|
+
* Similar to char_is_identifier but this function assumes that the encoding
|
905
|
+
* has not been changed.
|
906
|
+
*/
|
907
|
+
static inline size_t
|
908
|
+
char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
|
909
|
+
if (*b < 0x80) {
|
910
|
+
return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
|
911
|
+
} else {
|
912
|
+
return pm_encoding_utf_8_char_width(b, end - b);
|
913
|
+
}
|
914
|
+
}
|
915
|
+
|
916
|
+
/**
|
917
|
+
* Like the above, this function is also used extremely frequently to lex all of
|
918
|
+
* the identifiers in a source file once the first character has been found. So
|
919
|
+
* it's important that it be as fast as possible.
|
920
|
+
*/
|
921
|
+
static inline size_t
|
922
|
+
char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
|
923
|
+
if (parser->encoding_changed) {
|
924
|
+
size_t width;
|
925
|
+
if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
|
926
|
+
return width;
|
927
|
+
} else if (*b == '_') {
|
928
|
+
return 1;
|
929
|
+
} else if (*b >= 0x80) {
|
930
|
+
return parser->encoding->char_width(b, parser->end - b);
|
931
|
+
} else {
|
932
|
+
return 0;
|
933
|
+
}
|
934
|
+
}
|
935
|
+
return char_is_identifier_utf8(b, parser->end);
|
936
|
+
}
|
937
|
+
|
938
|
+
// Here we're defining a perfect hash for the characters that are allowed in
|
939
|
+
// global names. This is used to quickly check the next character after a $ to
|
940
|
+
// see if it's a valid character for a global name.
|
941
|
+
#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
|
942
|
+
#define PUNCT(idx) ( \
|
943
|
+
BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
|
944
|
+
BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
|
945
|
+
BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
|
946
|
+
BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
|
947
|
+
BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
|
948
|
+
BIT('0', idx))
|
949
|
+
|
950
|
+
const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
|
951
|
+
|
952
|
+
#undef BIT
|
953
|
+
#undef PUNCT
|
954
|
+
|
955
|
+
static inline bool
|
956
|
+
char_is_global_name_punctuation(const uint8_t b) {
|
957
|
+
const unsigned int i = (const unsigned int) b;
|
958
|
+
if (i <= 0x20 || 0x7e < i) return false;
|
959
|
+
|
960
|
+
return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
|
961
|
+
}
|
962
|
+
|
963
|
+
static inline bool
|
964
|
+
token_is_setter_name(pm_token_t *token) {
|
965
|
+
return (
|
966
|
+
(token->type == PM_TOKEN_IDENTIFIER) &&
|
967
|
+
(token->end - token->start >= 2) &&
|
968
|
+
(token->end[-1] == '=')
|
969
|
+
);
|
970
|
+
}
|
971
|
+
|
873
972
|
/******************************************************************************/
|
874
973
|
/* Node flag handling functions */
|
875
974
|
/******************************************************************************/
|
@@ -1923,11 +2022,12 @@ pm_call_node_index_p(pm_call_node_t *node) {
|
|
1923
2022
|
* operator assignment.
|
1924
2023
|
*/
|
1925
2024
|
static inline bool
|
1926
|
-
pm_call_node_writable_p(pm_call_node_t *node) {
|
2025
|
+
pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
|
1927
2026
|
return (
|
1928
2027
|
(node->message_loc.start != NULL) &&
|
1929
2028
|
(node->message_loc.end[-1] != '!') &&
|
1930
2029
|
(node->message_loc.end[-1] != '?') &&
|
2030
|
+
char_is_identifier_start(parser, node->message_loc.start) &&
|
1931
2031
|
(node->opening_loc.start == NULL) &&
|
1932
2032
|
(node->arguments == NULL) &&
|
1933
2033
|
(node->block == NULL)
|
@@ -2744,19 +2844,21 @@ pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *targ
|
|
2744
2844
|
* Check if the receiver of a `def` node is allowed.
|
2745
2845
|
*/
|
2746
2846
|
static void
|
2747
|
-
|
2748
|
-
switch (
|
2847
|
+
pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
|
2848
|
+
switch (PM_NODE_TYPE(node)) {
|
2749
2849
|
case PM_BEGIN_NODE: {
|
2750
|
-
pm_begin_node_t *
|
2751
|
-
|
2850
|
+
const pm_begin_node_t *cast = (pm_begin_node_t *) node;
|
2851
|
+
if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements);
|
2752
2852
|
break;
|
2753
2853
|
}
|
2754
|
-
case PM_PARENTHESES_NODE:
|
2755
|
-
|
2854
|
+
case PM_PARENTHESES_NODE: {
|
2855
|
+
const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
|
2856
|
+
if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
|
2756
2857
|
break;
|
2858
|
+
}
|
2757
2859
|
case PM_STATEMENTS_NODE: {
|
2758
|
-
pm_statements_node_t *
|
2759
|
-
|
2860
|
+
const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
|
2861
|
+
pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
|
2760
2862
|
break;
|
2761
2863
|
}
|
2762
2864
|
case PM_ARRAY_NODE:
|
@@ -2775,7 +2877,10 @@ pm_check_def_receiver(pm_parser_t *parser, pm_node_t *receiver) {
|
|
2775
2877
|
case PM_STRING_NODE:
|
2776
2878
|
case PM_SYMBOL_NODE:
|
2777
2879
|
case PM_X_STRING_NODE:
|
2778
|
-
pm_parser_err_node(parser,
|
2880
|
+
pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
|
2881
|
+
break;
|
2882
|
+
default:
|
2883
|
+
break;
|
2779
2884
|
}
|
2780
2885
|
}
|
2781
2886
|
|
@@ -2807,7 +2912,7 @@ pm_def_node_create(
|
|
2807
2912
|
}
|
2808
2913
|
|
2809
2914
|
if ((receiver != NULL) && PM_NODE_TYPE_P(receiver, PM_PARENTHESES_NODE)) {
|
2810
|
-
|
2915
|
+
pm_def_node_receiver_check(parser, receiver);
|
2811
2916
|
}
|
2812
2917
|
|
2813
2918
|
*node = (pm_def_node_t) {
|
@@ -5330,7 +5435,7 @@ pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword)
|
|
5330
5435
|
.flags = PM_NODE_FLAG_STATIC_LITERAL,
|
5331
5436
|
.location = PM_LOCATION_TOKEN_VALUE(file_keyword),
|
5332
5437
|
},
|
5333
|
-
.filepath = parser->
|
5438
|
+
.filepath = parser->filepath
|
5334
5439
|
};
|
5335
5440
|
|
5336
5441
|
return node;
|
@@ -6220,6 +6325,16 @@ pm_parser_local_add_owned(pm_parser_t *parser, const uint8_t *start, size_t leng
|
|
6220
6325
|
return constant_id;
|
6221
6326
|
}
|
6222
6327
|
|
6328
|
+
/**
|
6329
|
+
* Add a local variable from a constant string to the current scope.
|
6330
|
+
*/
|
6331
|
+
static pm_constant_id_t
|
6332
|
+
pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
|
6333
|
+
pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
|
6334
|
+
if (constant_id != 0) pm_parser_local_add(parser, constant_id);
|
6335
|
+
return constant_id;
|
6336
|
+
}
|
6337
|
+
|
6223
6338
|
/**
|
6224
6339
|
* Add a parameter name to the current scope and check whether the name of the
|
6225
6340
|
* parameter is unique or not.
|
@@ -6259,105 +6374,6 @@ pm_parser_scope_pop(pm_parser_t *parser) {
|
|
6259
6374
|
free(scope);
|
6260
6375
|
}
|
6261
6376
|
|
6262
|
-
/******************************************************************************/
|
6263
|
-
/* Basic character checks */
|
6264
|
-
/******************************************************************************/
|
6265
|
-
|
6266
|
-
/**
|
6267
|
-
* This function is used extremely frequently to lex all of the identifiers in a
|
6268
|
-
* source file, so it's important that it be as fast as possible. For this
|
6269
|
-
* reason we have the encoding_changed boolean to check if we need to go through
|
6270
|
-
* the function pointer or can just directly use the UTF-8 functions.
|
6271
|
-
*/
|
6272
|
-
static inline size_t
|
6273
|
-
char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) {
|
6274
|
-
if (parser->encoding_changed) {
|
6275
|
-
size_t width;
|
6276
|
-
if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
|
6277
|
-
return width;
|
6278
|
-
} else if (*b == '_') {
|
6279
|
-
return 1;
|
6280
|
-
} else if (*b >= 0x80) {
|
6281
|
-
return parser->encoding->char_width(b, parser->end - b);
|
6282
|
-
} else {
|
6283
|
-
return 0;
|
6284
|
-
}
|
6285
|
-
} else if (*b < 0x80) {
|
6286
|
-
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
|
6287
|
-
} else {
|
6288
|
-
return pm_encoding_utf_8_char_width(b, parser->end - b);
|
6289
|
-
}
|
6290
|
-
}
|
6291
|
-
|
6292
|
-
/**
|
6293
|
-
* Similar to char_is_identifier but this function assumes that the encoding
|
6294
|
-
* has not been changed.
|
6295
|
-
*/
|
6296
|
-
static inline size_t
|
6297
|
-
char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
|
6298
|
-
if (*b < 0x80) {
|
6299
|
-
return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
|
6300
|
-
} else {
|
6301
|
-
return pm_encoding_utf_8_char_width(b, end - b);
|
6302
|
-
}
|
6303
|
-
}
|
6304
|
-
|
6305
|
-
/**
|
6306
|
-
* Like the above, this function is also used extremely frequently to lex all of
|
6307
|
-
* the identifiers in a source file once the first character has been found. So
|
6308
|
-
* it's important that it be as fast as possible.
|
6309
|
-
*/
|
6310
|
-
static inline size_t
|
6311
|
-
char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
|
6312
|
-
if (parser->encoding_changed) {
|
6313
|
-
size_t width;
|
6314
|
-
if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
|
6315
|
-
return width;
|
6316
|
-
} else if (*b == '_') {
|
6317
|
-
return 1;
|
6318
|
-
} else if (*b >= 0x80) {
|
6319
|
-
return parser->encoding->char_width(b, parser->end - b);
|
6320
|
-
} else {
|
6321
|
-
return 0;
|
6322
|
-
}
|
6323
|
-
}
|
6324
|
-
return char_is_identifier_utf8(b, parser->end);
|
6325
|
-
}
|
6326
|
-
|
6327
|
-
// Here we're defining a perfect hash for the characters that are allowed in
|
6328
|
-
// global names. This is used to quickly check the next character after a $ to
|
6329
|
-
// see if it's a valid character for a global name.
|
6330
|
-
#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
|
6331
|
-
#define PUNCT(idx) ( \
|
6332
|
-
BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
|
6333
|
-
BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
|
6334
|
-
BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
|
6335
|
-
BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
|
6336
|
-
BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
|
6337
|
-
BIT('0', idx))
|
6338
|
-
|
6339
|
-
const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
|
6340
|
-
|
6341
|
-
#undef BIT
|
6342
|
-
#undef PUNCT
|
6343
|
-
|
6344
|
-
static inline bool
|
6345
|
-
char_is_global_name_punctuation(const uint8_t b) {
|
6346
|
-
const unsigned int i = (const unsigned int) b;
|
6347
|
-
if (i <= 0x20 || 0x7e < i) return false;
|
6348
|
-
|
6349
|
-
return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
|
6350
|
-
}
|
6351
|
-
|
6352
|
-
static inline bool
|
6353
|
-
token_is_setter_name(pm_token_t *token) {
|
6354
|
-
return (
|
6355
|
-
(token->type == PM_TOKEN_IDENTIFIER) &&
|
6356
|
-
(token->end - token->start >= 2) &&
|
6357
|
-
(token->end[-1] == '=')
|
6358
|
-
);
|
6359
|
-
}
|
6360
|
-
|
6361
6377
|
/******************************************************************************/
|
6362
6378
|
/* Stack helpers */
|
6363
6379
|
/******************************************************************************/
|
@@ -7673,6 +7689,28 @@ escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte
|
|
7673
7689
|
pm_buffer_append_byte(buffer, byte);
|
7674
7690
|
}
|
7675
7691
|
|
7692
|
+
/**
|
7693
|
+
* Write each byte of the given escaped character into the buffer.
|
7694
|
+
*/
|
7695
|
+
static inline void
|
7696
|
+
escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
|
7697
|
+
size_t width;
|
7698
|
+
if (parser->encoding_changed) {
|
7699
|
+
width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
7700
|
+
} else {
|
7701
|
+
width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
|
7702
|
+
}
|
7703
|
+
|
7704
|
+
// TODO: If the character is invalid in the given encoding, then we'll just
|
7705
|
+
// push one byte into the buffer. This should actually be an error.
|
7706
|
+
width = (width == 0) ? 1 : width;
|
7707
|
+
|
7708
|
+
for (size_t index = 0; index < width; index++) {
|
7709
|
+
escape_write_byte_encoded(parser, buffer, *parser->current.end);
|
7710
|
+
parser->current.end++;
|
7711
|
+
}
|
7712
|
+
}
|
7713
|
+
|
7676
7714
|
/**
|
7677
7715
|
* The regular expression engine doesn't support the same escape sequences as
|
7678
7716
|
* Ruby does. So first we have to read the escape sequence, and then we have to
|
@@ -8011,7 +8049,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
|
|
8011
8049
|
/* fallthrough */
|
8012
8050
|
default: {
|
8013
8051
|
if (parser->current.end < parser->end) {
|
8014
|
-
|
8052
|
+
escape_write_escape_encoded(parser, buffer);
|
8015
8053
|
}
|
8016
8054
|
return;
|
8017
8055
|
}
|
@@ -8288,10 +8326,40 @@ typedef struct {
|
|
8288
8326
|
* Push the given byte into the token buffer.
|
8289
8327
|
*/
|
8290
8328
|
static inline void
|
8291
|
-
|
8329
|
+
pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
|
8292
8330
|
pm_buffer_append_byte(&token_buffer->buffer, byte);
|
8293
8331
|
}
|
8294
8332
|
|
8333
|
+
/**
|
8334
|
+
* Append the given bytes into the token buffer.
|
8335
|
+
*/
|
8336
|
+
static inline void
|
8337
|
+
pm_token_buffer_push_bytes(pm_token_buffer_t *token_buffer, const uint8_t *bytes, size_t length) {
|
8338
|
+
pm_buffer_append_bytes(&token_buffer->buffer, bytes, length);
|
8339
|
+
}
|
8340
|
+
|
8341
|
+
/**
|
8342
|
+
* Push an escaped character into the token buffer.
|
8343
|
+
*/
|
8344
|
+
static inline void
|
8345
|
+
pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
|
8346
|
+
// First, determine the width of the character to be escaped.
|
8347
|
+
size_t width;
|
8348
|
+
if (parser->encoding_changed) {
|
8349
|
+
width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
8350
|
+
} else {
|
8351
|
+
width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
|
8352
|
+
}
|
8353
|
+
|
8354
|
+
// TODO: If the character is invalid in the given encoding, then we'll just
|
8355
|
+
// push one byte into the buffer. This should actually be an error.
|
8356
|
+
width = (width == 0 ? 1 : width);
|
8357
|
+
|
8358
|
+
// Now, push the bytes into the buffer.
|
8359
|
+
pm_token_buffer_push_bytes(token_buffer, parser->current.end, width);
|
8360
|
+
parser->current.end += width;
|
8361
|
+
}
|
8362
|
+
|
8295
8363
|
/**
|
8296
8364
|
* When we're about to return from lexing the current token and we know for sure
|
8297
8365
|
* that we have found an escape sequence, this function is called to copy the
|
@@ -9704,18 +9772,18 @@ parser_lex(pm_parser_t *parser) {
|
|
9704
9772
|
case '\t':
|
9705
9773
|
case '\v':
|
9706
9774
|
case '\\':
|
9707
|
-
|
9775
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
9708
9776
|
parser->current.end++;
|
9709
9777
|
break;
|
9710
9778
|
case '\r':
|
9711
9779
|
parser->current.end++;
|
9712
9780
|
if (peek(parser) != '\n') {
|
9713
|
-
|
9781
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
9714
9782
|
break;
|
9715
9783
|
}
|
9716
9784
|
/* fallthrough */
|
9717
9785
|
case '\n':
|
9718
|
-
|
9786
|
+
pm_token_buffer_push_byte(&token_buffer, '\n');
|
9719
9787
|
|
9720
9788
|
if (parser->heredoc_end) {
|
9721
9789
|
// ... if we are on the same line as a heredoc,
|
@@ -9733,14 +9801,13 @@ parser_lex(pm_parser_t *parser) {
|
|
9733
9801
|
break;
|
9734
9802
|
default:
|
9735
9803
|
if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
|
9736
|
-
|
9804
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
9737
9805
|
parser->current.end++;
|
9738
9806
|
} else if (lex_mode->as.list.interpolation) {
|
9739
9807
|
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
|
9740
9808
|
} else {
|
9741
|
-
|
9742
|
-
|
9743
|
-
parser->current.end++;
|
9809
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
9810
|
+
pm_token_buffer_push_escaped(&token_buffer, parser);
|
9744
9811
|
}
|
9745
9812
|
|
9746
9813
|
break;
|
@@ -9898,9 +9965,9 @@ parser_lex(pm_parser_t *parser) {
|
|
9898
9965
|
parser->current.end++;
|
9899
9966
|
if (peek(parser) != '\n') {
|
9900
9967
|
if (lex_mode->as.regexp.terminator != '\r') {
|
9901
|
-
|
9968
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
9902
9969
|
}
|
9903
|
-
|
9970
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
9904
9971
|
break;
|
9905
9972
|
}
|
9906
9973
|
/* fallthrough */
|
@@ -9935,20 +10002,19 @@ parser_lex(pm_parser_t *parser) {
|
|
9935
10002
|
case '$': case ')': case '*': case '+':
|
9936
10003
|
case '.': case '>': case '?': case ']':
|
9937
10004
|
case '^': case '|': case '}':
|
9938
|
-
|
10005
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
9939
10006
|
break;
|
9940
10007
|
default:
|
9941
10008
|
break;
|
9942
10009
|
}
|
9943
10010
|
|
9944
|
-
|
10011
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
9945
10012
|
parser->current.end++;
|
9946
10013
|
break;
|
9947
10014
|
}
|
9948
10015
|
|
9949
|
-
if (peeked < 0x80)
|
9950
|
-
|
9951
|
-
parser->current.end++;
|
10016
|
+
if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer, '\\');
|
10017
|
+
pm_token_buffer_push_escaped(&token_buffer, parser);
|
9952
10018
|
break;
|
9953
10019
|
}
|
9954
10020
|
|
@@ -10115,23 +10181,23 @@ parser_lex(pm_parser_t *parser) {
|
|
10115
10181
|
|
10116
10182
|
switch (peeked) {
|
10117
10183
|
case '\\':
|
10118
|
-
|
10184
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10119
10185
|
parser->current.end++;
|
10120
10186
|
break;
|
10121
10187
|
case '\r':
|
10122
10188
|
parser->current.end++;
|
10123
10189
|
if (peek(parser) != '\n') {
|
10124
10190
|
if (!lex_mode->as.string.interpolation) {
|
10125
|
-
|
10191
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10126
10192
|
}
|
10127
|
-
|
10193
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
10128
10194
|
break;
|
10129
10195
|
}
|
10130
10196
|
/* fallthrough */
|
10131
10197
|
case '\n':
|
10132
10198
|
if (!lex_mode->as.string.interpolation) {
|
10133
|
-
|
10134
|
-
|
10199
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10200
|
+
pm_token_buffer_push_byte(&token_buffer, '\n');
|
10135
10201
|
}
|
10136
10202
|
|
10137
10203
|
if (parser->heredoc_end) {
|
@@ -10150,17 +10216,16 @@ parser_lex(pm_parser_t *parser) {
|
|
10150
10216
|
break;
|
10151
10217
|
default:
|
10152
10218
|
if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
|
10153
|
-
|
10219
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
10154
10220
|
parser->current.end++;
|
10155
10221
|
} else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
|
10156
|
-
|
10222
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
10157
10223
|
parser->current.end++;
|
10158
10224
|
} else if (lex_mode->as.string.interpolation) {
|
10159
10225
|
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
|
10160
10226
|
} else {
|
10161
|
-
|
10162
|
-
|
10163
|
-
parser->current.end++;
|
10227
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10228
|
+
pm_token_buffer_push_escaped(&token_buffer, parser);
|
10164
10229
|
}
|
10165
10230
|
|
10166
10231
|
break;
|
@@ -10417,21 +10482,20 @@ parser_lex(pm_parser_t *parser) {
|
|
10417
10482
|
case '\r':
|
10418
10483
|
parser->current.end++;
|
10419
10484
|
if (peek(parser) != '\n') {
|
10420
|
-
|
10421
|
-
|
10485
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10486
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
10422
10487
|
break;
|
10423
10488
|
}
|
10424
10489
|
/* fallthrough */
|
10425
10490
|
case '\n':
|
10426
|
-
|
10427
|
-
|
10491
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10492
|
+
pm_token_buffer_push_byte(&token_buffer, '\n');
|
10428
10493
|
token_buffer.cursor = parser->current.end + 1;
|
10429
10494
|
breakpoint = parser->current.end;
|
10430
10495
|
continue;
|
10431
10496
|
default:
|
10432
|
-
|
10433
|
-
|
10434
|
-
pm_token_buffer_push(&token_buffer, peeked);
|
10497
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10498
|
+
pm_token_buffer_push_escaped(&token_buffer, parser);
|
10435
10499
|
break;
|
10436
10500
|
}
|
10437
10501
|
} else {
|
@@ -10439,7 +10503,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10439
10503
|
case '\r':
|
10440
10504
|
parser->current.end++;
|
10441
10505
|
if (peek(parser) != '\n') {
|
10442
|
-
|
10506
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
10443
10507
|
break;
|
10444
10508
|
}
|
10445
10509
|
/* fallthrough */
|
@@ -10715,14 +10779,6 @@ match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
|
|
10715
10779
|
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
|
10716
10780
|
}
|
10717
10781
|
|
10718
|
-
/**
|
10719
|
-
* Returns true if the current token is any of the five given types.
|
10720
|
-
*/
|
10721
|
-
static inline bool
|
10722
|
-
match5(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5) {
|
10723
|
-
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5);
|
10724
|
-
}
|
10725
|
-
|
10726
10782
|
/**
|
10727
10783
|
* Returns true if the current token is any of the six given types.
|
10728
10784
|
*/
|
@@ -11359,7 +11415,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
11359
11415
|
break;
|
11360
11416
|
}
|
11361
11417
|
|
11362
|
-
// If we have a terminator, then we will parse all
|
11418
|
+
// If we have a terminator, then we will parse all consecutive terminators
|
11363
11419
|
// and then continue parsing the statements list.
|
11364
11420
|
if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
|
11365
11421
|
// If we have a terminator, then we will continue parsing the statements
|
@@ -13149,6 +13205,15 @@ outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
|
|
13149
13205
|
return false;
|
13150
13206
|
}
|
13151
13207
|
|
13208
|
+
/**
|
13209
|
+
* These are the names of the various numbered parameters. We have them here so
|
13210
|
+
* that when we insert them into the constant pool we can use a constant string
|
13211
|
+
* and not have to allocate.
|
13212
|
+
*/
|
13213
|
+
static const char * const pm_numbered_parameter_names[] = {
|
13214
|
+
"_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
|
13215
|
+
};
|
13216
|
+
|
13152
13217
|
/**
|
13153
13218
|
* Parse an identifier into either a local variable read. If the local variable
|
13154
13219
|
* is not found, it returns NULL instead.
|
@@ -13171,12 +13236,10 @@ parse_variable(pm_parser_t *parser) {
|
|
13171
13236
|
pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
|
13172
13237
|
} else {
|
13173
13238
|
// Indicate that this scope is using numbered params so that child
|
13174
|
-
// scopes cannot.
|
13175
|
-
|
13176
|
-
|
13177
|
-
|
13178
|
-
// integer value of the number (only _1 through _9 are valid)
|
13179
|
-
uint8_t numbered_parameters = (uint8_t) (number - '0');
|
13239
|
+
// scopes cannot. We subtract the value for the character '0' to get
|
13240
|
+
// the actual integer value of the number (only _1 through _9 are
|
13241
|
+
// valid).
|
13242
|
+
uint8_t numbered_parameters = (uint8_t) (parser->previous.start[1] - '0');
|
13180
13243
|
if (numbered_parameters > parser->current_scope->numbered_parameters) {
|
13181
13244
|
parser->current_scope->numbered_parameters = numbered_parameters;
|
13182
13245
|
pm_parser_numbered_parameters_set(parser, numbered_parameters);
|
@@ -13187,21 +13250,13 @@ parse_variable(pm_parser_t *parser) {
|
|
13187
13250
|
// referencing _2 means that _1 must exist. Therefore here we
|
13188
13251
|
// loop through all of the possibilities and add them into the
|
13189
13252
|
// constant pool.
|
13190
|
-
uint8_t
|
13191
|
-
|
13192
|
-
|
13193
|
-
while (current < number) {
|
13194
|
-
value = malloc(2);
|
13195
|
-
value[0] = '_';
|
13196
|
-
value[1] = current++;
|
13197
|
-
pm_parser_local_add_owned(parser, value, 2);
|
13253
|
+
for (uint8_t numbered_parameter = 1; numbered_parameter <= numbered_parameters - 1; numbered_parameter++) {
|
13254
|
+
pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameter - 1], 2);
|
13198
13255
|
}
|
13199
13256
|
|
13200
|
-
//
|
13201
|
-
|
13202
|
-
|
13203
|
-
pm_parser_local_add_token(parser, &parser->previous);
|
13204
|
-
return pm_local_variable_read_node_create(parser, &parser->previous, 0);
|
13257
|
+
// Finally we can create the local variable read node.
|
13258
|
+
pm_constant_id_t name_id = pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameters - 1], 2);
|
13259
|
+
return pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
|
13205
13260
|
}
|
13206
13261
|
}
|
13207
13262
|
|
@@ -14010,7 +14065,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
|
|
14010
14065
|
// Gather up all of the patterns into the list.
|
14011
14066
|
while (accept1(parser, PM_TOKEN_COMMA)) {
|
14012
14067
|
// Break early here in case we have a trailing comma.
|
14013
|
-
if (
|
14068
|
+
if (match6(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
|
14014
14069
|
node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
|
14015
14070
|
pm_node_list_append(&nodes, node);
|
14016
14071
|
break;
|
@@ -16927,7 +16982,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
16927
16982
|
}
|
16928
16983
|
|
16929
16984
|
// If this node cannot be writable, then we have an error.
|
16930
|
-
if (pm_call_node_writable_p(cast)) {
|
16985
|
+
if (pm_call_node_writable_p(parser, cast)) {
|
16931
16986
|
parse_write_name(parser, &cast->name);
|
16932
16987
|
} else {
|
16933
16988
|
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
@@ -17038,7 +17093,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
17038
17093
|
}
|
17039
17094
|
|
17040
17095
|
// If this node cannot be writable, then we have an error.
|
17041
|
-
if (pm_call_node_writable_p(cast)) {
|
17096
|
+
if (pm_call_node_writable_p(parser, cast)) {
|
17042
17097
|
parse_write_name(parser, &cast->name);
|
17043
17098
|
} else {
|
17044
17099
|
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
@@ -17159,7 +17214,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
17159
17214
|
}
|
17160
17215
|
|
17161
17216
|
// If this node cannot be writable, then we have an error.
|
17162
|
-
if (pm_call_node_writable_p(cast)) {
|
17217
|
+
if (pm_call_node_writable_p(parser, cast)) {
|
17163
17218
|
parse_write_name(parser, &cast->name);
|
17164
17219
|
} else {
|
17165
17220
|
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
@@ -17751,7 +17806,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
17751
17806
|
.encoding_changed_callback = NULL,
|
17752
17807
|
.encoding_comment_start = source,
|
17753
17808
|
.lex_callback = NULL,
|
17754
|
-
.
|
17809
|
+
.filepath = { 0 },
|
17755
17810
|
.constant_pool = { 0 },
|
17756
17811
|
.newline_list = { 0 },
|
17757
17812
|
.integer_base = 0,
|
@@ -17794,7 +17849,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
17794
17849
|
// If options were provided to this parse, establish them here.
|
17795
17850
|
if (options != NULL) {
|
17796
17851
|
// filepath option
|
17797
|
-
parser->
|
17852
|
+
parser->filepath = options->filepath;
|
17798
17853
|
|
17799
17854
|
// line option
|
17800
17855
|
parser->start_line = options->line;
|
@@ -17896,7 +17951,7 @@ pm_magic_comment_list_free(pm_list_t *list) {
|
|
17896
17951
|
*/
|
17897
17952
|
PRISM_EXPORTED_FUNCTION void
|
17898
17953
|
pm_parser_free(pm_parser_t *parser) {
|
17899
|
-
pm_string_free(&parser->
|
17954
|
+
pm_string_free(&parser->filepath);
|
17900
17955
|
pm_diagnostic_list_free(&parser->error_list);
|
17901
17956
|
pm_diagnostic_list_free(&parser->warning_list);
|
17902
17957
|
pm_comment_list_free(&parser->comment_list);
|
@@ -18060,7 +18115,9 @@ pm_parser_errors_format_sort(const pm_list_t *error_list, const pm_newline_list_
|
|
18060
18115
|
|
18061
18116
|
// Now we're going to shift all of the errors after this one down one
|
18062
18117
|
// index to make room for the new error.
|
18063
|
-
|
18118
|
+
if (index + 1 < error_list->size) {
|
18119
|
+
memmove(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
|
18120
|
+
}
|
18064
18121
|
|
18065
18122
|
// Finally, we'll insert the error into the array.
|
18066
18123
|
uint32_t column_end;
|
data/src/util/pm_constant_pool.c
CHANGED
@@ -181,6 +181,31 @@ pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t
|
|
181
181
|
return &pool->constants[constant_id - 1];
|
182
182
|
}
|
183
183
|
|
184
|
+
/**
|
185
|
+
* Find a constant in a constant pool. Returns the id of the constant, or 0 if
|
186
|
+
* the constant is not found.
|
187
|
+
*/
|
188
|
+
pm_constant_id_t
|
189
|
+
pm_constant_pool_find(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
190
|
+
assert(is_power_of_two(pool->capacity));
|
191
|
+
const uint32_t mask = pool->capacity - 1;
|
192
|
+
|
193
|
+
uint32_t hash = pm_constant_pool_hash(start, length);
|
194
|
+
uint32_t index = hash & mask;
|
195
|
+
pm_constant_pool_bucket_t *bucket;
|
196
|
+
|
197
|
+
while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
|
198
|
+
pm_constant_t *constant = &pool->constants[bucket->id - 1];
|
199
|
+
if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
|
200
|
+
return bucket->id;
|
201
|
+
}
|
202
|
+
|
203
|
+
index = (index + 1) & mask;
|
204
|
+
}
|
205
|
+
|
206
|
+
return PM_CONSTANT_ID_UNSET;
|
207
|
+
}
|
208
|
+
|
184
209
|
/**
|
185
210
|
* Insert a constant into a constant pool and return its index in the pool.
|
186
211
|
*/
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: prism
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.21.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shopify
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-02-
|
11
|
+
date: 2024-02-05 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email:
|