prism 0.13.0 → 0.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +20 -1
- data/README.md +4 -1
- data/config.yml +10 -14
- data/docs/fuzzing.md +5 -10
- data/docs/prism.png +0 -0
- data/docs/serialization.md +10 -0
- data/ext/prism/api_node.c +35 -28
- data/ext/prism/extension.c +35 -48
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +38 -36
- data/include/prism/node.h +1 -0
- data/include/prism/parser.h +26 -0
- data/include/prism/util/pm_buffer.h +3 -0
- data/include/prism/util/pm_constant_pool.h +5 -0
- data/include/prism/util/pm_string.h +2 -1
- data/include/prism/version.h +2 -2
- data/include/prism.h +0 -1
- data/lib/prism/compiler.rb +141 -141
- data/lib/prism/ffi.rb +2 -2
- data/lib/prism/lex_compat.rb +42 -8
- data/lib/prism/node.rb +1456 -46
- data/lib/prism/node_ext.rb +44 -0
- data/lib/prism/parse_result.rb +32 -5
- data/lib/prism/pattern.rb +1 -1
- data/lib/prism/serialize.rb +16 -14
- data/prism.gemspec +2 -3
- data/src/diagnostic.c +1 -1
- data/src/node.c +0 -14
- data/src/prettyprint.c +35 -35
- data/src/prism.c +1728 -811
- data/src/serialize.c +45 -22
- data/src/util/pm_buffer.c +9 -7
- metadata +3 -4
- data/include/prism/unescape.h +0 -48
- data/src/unescape.c +0 -637
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 53678e65563ce43602ed3d84bcc2689d8345daa8cb567f6e4dfd3cfc0df15dbd
|
4
|
+
data.tar.gz: b88cc5fc341b7667b07a811edef5ac3696af208062f627aa94f30c28ae038ea6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d6ca10499a6e87014534741e8b28559d29c83445a77dfe53023473229a3e529c6055d8f7877fae6d8095a6d439a1762a406033eea11031279ea19afb603e9359
|
7
|
+
data.tar.gz: 45afb6da2c1fe649080b8f0276e352d33e0aae71ed523a9074eed545c9542d45b04de9ff30b2de0d705b46bcad1b6d1cda56efad1c47fb5ff9a8eeb8b95ca736
|
data/CHANGELOG.md
CHANGED
@@ -6,6 +6,24 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
|
|
6
6
|
|
7
7
|
## [Unreleased]
|
8
8
|
|
9
|
+
## [0.14.0] - 2023-10-13
|
10
|
+
|
11
|
+
### Added
|
12
|
+
|
13
|
+
- Syntax errors are added for invalid lambda local semicolon placement.
|
14
|
+
- Lambda locals are now checked for duplicate names.
|
15
|
+
- Destructured parameters are now checked for duplicate names.
|
16
|
+
- `Constant{Read,Path,PathTarget}Node#full_name` and `Constant{Read,Path,PathTarget}Node#full_name_parts` are added to walk constant paths for you to find the full name of the constant.
|
17
|
+
- Syntax errors are added when assigning to a numbered parameter.
|
18
|
+
- `Node::type` is added, which matches the `Node#type` API.
|
19
|
+
- Magic comments are now parsed as part of the parsing process and a new field is added in the form of `ParseResult#magic_comments` to access them.
|
20
|
+
|
21
|
+
### Changed
|
22
|
+
|
23
|
+
- **BREAKING**: `Call*Node#name` methods now return symbols instead of strings.
|
24
|
+
- **BREAKING**: For loops now have their index value considered as part of the body, so depths of local variable assignments will be increased by 1.
|
25
|
+
- Tilde heredocs now split up their lines into multiple string nodes to make them easier to dedent.
|
26
|
+
|
9
27
|
## [0.13.0] - 2023-09-29
|
10
28
|
|
11
29
|
### Added
|
@@ -161,7 +179,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
|
|
161
179
|
|
162
180
|
- 🎉 Initial release! 🎉
|
163
181
|
|
164
|
-
[unreleased]: https://github.com/ruby/prism/compare/v0.
|
182
|
+
[unreleased]: https://github.com/ruby/prism/compare/v0.14.0...HEAD
|
183
|
+
[0.14.0]: https://github.com/ruby/prism/compare/v0.13.0...v0.14.0
|
165
184
|
[0.13.0]: https://github.com/ruby/prism/compare/v0.12.0...v0.13.0
|
166
185
|
[0.12.0]: https://github.com/ruby/prism/compare/v0.11.0...v0.12.0
|
167
186
|
[0.11.0]: https://github.com/ruby/prism/compare/v0.10.0...v0.11.0
|
data/README.md
CHANGED
@@ -1,4 +1,7 @@
|
|
1
|
-
|
1
|
+
<h1 align="center">Prism Ruby parser</h1>
|
2
|
+
<div align="center">
|
3
|
+
<img alt="Prism Ruby parser" height="256px" src="./docs/prism.png">
|
4
|
+
</div>
|
2
5
|
|
3
6
|
This is a parser for the Ruby programming language. It is designed to be portable, error tolerant, and maintainable. It is written in C99 and has no dependencies. It is currently being integrated into [CRuby](https://github.com/ruby/ruby), [JRuby](https://github.com/jruby/jruby), [TruffleRuby](https://github.com/oracle/truffleruby), [Sorbet](https://github.com/sorbet/sorbet), and [Syntax Tree](https://github.com/ruby-syntax-tree/syntax_tree).
|
4
7
|
|
data/config.yml
CHANGED
@@ -361,6 +361,8 @@ flags:
|
|
361
361
|
comment: "x - ignores whitespace and allows comments in regular expressions"
|
362
362
|
- name: MULTI_LINE
|
363
363
|
comment: "m - allows $ to match the end of lines within strings"
|
364
|
+
- name: ONCE
|
365
|
+
comment: "o - only interpolates values into the regular expression once"
|
364
366
|
- name: EUC_JP
|
365
367
|
comment: "e - forces the EUC-JP encoding"
|
366
368
|
- name: ASCII_8BIT
|
@@ -369,12 +371,10 @@ flags:
|
|
369
371
|
comment: "s - forces the Windows-31J encoding"
|
370
372
|
- name: UTF_8
|
371
373
|
comment: "u - forces the UTF-8 encoding"
|
372
|
-
- name: ONCE
|
373
|
-
comment: "o - only interpolates values into the regular expression once"
|
374
374
|
- name: StringFlags
|
375
375
|
values:
|
376
376
|
- name: FROZEN
|
377
|
-
comment: "frozen by virtue of a frozen_string_literal comment"
|
377
|
+
comment: "frozen by virtue of a `frozen_string_literal` comment"
|
378
378
|
nodes:
|
379
379
|
- name: AliasGlobalVariableNode
|
380
380
|
fields:
|
@@ -641,9 +641,9 @@ nodes:
|
|
641
641
|
type: flags
|
642
642
|
kind: CallNodeFlags
|
643
643
|
- name: read_name
|
644
|
-
type:
|
644
|
+
type: constant
|
645
645
|
- name: write_name
|
646
|
-
type:
|
646
|
+
type: constant
|
647
647
|
- name: operator_loc
|
648
648
|
type: location
|
649
649
|
- name: value
|
@@ -674,7 +674,7 @@ nodes:
|
|
674
674
|
type: flags
|
675
675
|
kind: CallNodeFlags
|
676
676
|
- name: name
|
677
|
-
type:
|
677
|
+
type: constant
|
678
678
|
comment: |
|
679
679
|
Represents a method call, in all of the various forms that can take.
|
680
680
|
|
@@ -714,9 +714,9 @@ nodes:
|
|
714
714
|
type: flags
|
715
715
|
kind: CallNodeFlags
|
716
716
|
- name: read_name
|
717
|
-
type:
|
717
|
+
type: constant
|
718
718
|
- name: write_name
|
719
|
-
type:
|
719
|
+
type: constant
|
720
720
|
- name: operator
|
721
721
|
type: constant
|
722
722
|
- name: operator_loc
|
@@ -747,9 +747,9 @@ nodes:
|
|
747
747
|
type: flags
|
748
748
|
kind: CallNodeFlags
|
749
749
|
- name: read_name
|
750
|
-
type:
|
750
|
+
type: constant
|
751
751
|
- name: write_name
|
752
|
-
type:
|
752
|
+
type: constant
|
753
753
|
- name: operator_loc
|
754
754
|
type: location
|
755
755
|
- name: value
|
@@ -1772,7 +1772,6 @@ nodes:
|
|
1772
1772
|
type: location
|
1773
1773
|
- name: content_loc
|
1774
1774
|
type: location
|
1775
|
-
semantic_field: true # https://github.com/ruby/prism/issues/1452
|
1776
1775
|
- name: closing_loc
|
1777
1776
|
type: location
|
1778
1777
|
- name: unescaped
|
@@ -2093,7 +2092,6 @@ nodes:
|
|
2093
2092
|
type: location
|
2094
2093
|
- name: content_loc
|
2095
2094
|
type: location
|
2096
|
-
semantic_field: true # https://github.com/ruby/prism/issues/1452
|
2097
2095
|
- name: closing_loc
|
2098
2096
|
type: location
|
2099
2097
|
- name: unescaped
|
@@ -2287,10 +2285,8 @@ nodes:
|
|
2287
2285
|
kind: StringFlags
|
2288
2286
|
- name: opening_loc
|
2289
2287
|
type: location?
|
2290
|
-
semantic_field: true # https://github.com/ruby/prism/issues/1452
|
2291
2288
|
- name: content_loc
|
2292
2289
|
type: location
|
2293
|
-
semantic_field: true # https://github.com/ruby/prism/issues/1452
|
2294
2290
|
- name: closing_loc
|
2295
2291
|
type: location?
|
2296
2292
|
- name: unescaped
|
data/docs/fuzzing.md
CHANGED
@@ -6,8 +6,7 @@ We use fuzzing to test the various entrypoints to the library. The fuzzer we use
|
|
6
6
|
fuzz
|
7
7
|
├── corpus
|
8
8
|
│ ├── parse fuzzing corpus for parsing (a symlink to our fixtures)
|
9
|
-
│
|
10
|
-
│ └── unescape fuzzing corpus for unescaping strings
|
9
|
+
│ └── regexp fuzzing corpus for regexp
|
11
10
|
├── dict a AFL++ dictionary containing various tokens
|
12
11
|
├── docker
|
13
12
|
│ └── Dockerfile for building a container with the fuzzer toolchain
|
@@ -17,11 +16,9 @@ fuzz
|
|
17
16
|
├── parse.sh script to run parsing fuzzer
|
18
17
|
├── regexp.c fuzz handler for regular expression parsing
|
19
18
|
├── regexp.sh script to run regexp fuzzer
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
├── unescape.c fuzz handler for unescape functionality
|
24
|
-
└── unescape.sh script to run unescape fuzzer
|
19
|
+
└── tools
|
20
|
+
├── backtrace.sh generates backtrace files for a crash directory
|
21
|
+
└── minimize.sh generates minimized crash or hang files
|
25
22
|
```
|
26
23
|
|
27
24
|
## Usage
|
@@ -30,14 +27,12 @@ There are currently three fuzzing targets
|
|
30
27
|
|
31
28
|
- `pm_parse_serialize` (parse)
|
32
29
|
- `pm_regexp_named_capture_group_names` (regexp)
|
33
|
-
- `pm_unescape_manipulate_string` (unescape)
|
34
30
|
|
35
31
|
Respectively, fuzzing can be performed with
|
36
32
|
|
37
33
|
```
|
38
34
|
make fuzz-run-parse
|
39
35
|
make fuzz-run-regexp
|
40
|
-
make fuzz-run-unescape
|
41
36
|
```
|
42
37
|
|
43
38
|
To end a fuzzing job, interrupt with CTRL+C. To enter a container with the fuzzing toolchain and debug utilities, run
|
@@ -60,7 +55,7 @@ Note, that this may make reproducing bugs difficult as they may depend on memory
|
|
60
55
|
|
61
56
|
```
|
62
57
|
make fuzz-debug # enter the docker container with build tools
|
63
|
-
make build/fuzz.heisenbug.parse # or .
|
58
|
+
make build/fuzz.heisenbug.parse # or .regexp
|
64
59
|
./build/fuzz.heisenbug.parse path-to-problem-input
|
65
60
|
```
|
66
61
|
|
data/docs/prism.png
ADDED
Binary file
|
data/docs/serialization.md
CHANGED
@@ -31,6 +31,7 @@ This drastically cuts down on the size of the serialized string, especially when
|
|
31
31
|
### comment
|
32
32
|
|
33
33
|
The comment type is one of:
|
34
|
+
|
34
35
|
* 0=`INLINE` (`# comment`)
|
35
36
|
* 1=`EMBEDDED_DOCUMENT` (`=begin`/`=end`)
|
36
37
|
* 2=`__END__` (after `__END__`)
|
@@ -40,6 +41,13 @@ The comment type is one of:
|
|
40
41
|
| `1` | comment type |
|
41
42
|
| location | the location in the source of this comment |
|
42
43
|
|
44
|
+
### magic comment
|
45
|
+
|
46
|
+
| # bytes | field |
|
47
|
+
| --- | --- |
|
48
|
+
| location | the location of the key of the magic comment |
|
49
|
+
| location | the location of the value of the magic comment |
|
50
|
+
|
43
51
|
### diagnostic
|
44
52
|
|
45
53
|
| # bytes | field |
|
@@ -66,6 +74,8 @@ The header is structured like the following table:
|
|
66
74
|
| string | the encoding name |
|
67
75
|
| varint | number of comments |
|
68
76
|
| comment* | comments |
|
77
|
+
| varint | number of magic comments |
|
78
|
+
| magic comment* | magic comments |
|
69
79
|
| varint | number of errors |
|
70
80
|
| diagnostic* | errors |
|
71
81
|
| varint | number of warnings |
|
data/ext/prism/api_node.c
CHANGED
@@ -1437,15 +1437,17 @@ pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding) {
|
|
1437
1437
|
|
1438
1438
|
// flags
|
1439
1439
|
#line 179 "api_node.c.erb"
|
1440
|
-
argv[6] = ULONG2NUM(node->flags
|
1440
|
+
argv[6] = ULONG2NUM(node->flags & ~PM_NODE_FLAG_COMMON_MASK);
|
1441
1441
|
|
1442
1442
|
// read_name
|
1443
|
-
#line
|
1444
|
-
|
1443
|
+
#line 157 "api_node.c.erb"
|
1444
|
+
assert(cast->read_name != 0);
|
1445
|
+
argv[7] = rb_id2sym(constants[cast->read_name - 1]);
|
1445
1446
|
|
1446
1447
|
// write_name
|
1447
|
-
#line
|
1448
|
-
|
1448
|
+
#line 157 "api_node.c.erb"
|
1449
|
+
assert(cast->write_name != 0);
|
1450
|
+
argv[8] = rb_id2sym(constants[cast->write_name - 1]);
|
1449
1451
|
|
1450
1452
|
// operator_loc
|
1451
1453
|
#line 170 "api_node.c.erb"
|
@@ -1496,11 +1498,12 @@ pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding) {
|
|
1496
1498
|
|
1497
1499
|
// flags
|
1498
1500
|
#line 179 "api_node.c.erb"
|
1499
|
-
argv[7] = ULONG2NUM(node->flags
|
1501
|
+
argv[7] = ULONG2NUM(node->flags & ~PM_NODE_FLAG_COMMON_MASK);
|
1500
1502
|
|
1501
1503
|
// name
|
1502
|
-
#line
|
1503
|
-
|
1504
|
+
#line 157 "api_node.c.erb"
|
1505
|
+
assert(cast->name != 0);
|
1506
|
+
argv[8] = rb_id2sym(constants[cast->name - 1]);
|
1504
1507
|
|
1505
1508
|
// location
|
1506
1509
|
argv[9] = pm_location_new(parser, node->location.start, node->location.end, source);
|
@@ -1539,15 +1542,17 @@ pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding) {
|
|
1539
1542
|
|
1540
1543
|
// flags
|
1541
1544
|
#line 179 "api_node.c.erb"
|
1542
|
-
argv[6] = ULONG2NUM(node->flags
|
1545
|
+
argv[6] = ULONG2NUM(node->flags & ~PM_NODE_FLAG_COMMON_MASK);
|
1543
1546
|
|
1544
1547
|
// read_name
|
1545
|
-
#line
|
1546
|
-
|
1548
|
+
#line 157 "api_node.c.erb"
|
1549
|
+
assert(cast->read_name != 0);
|
1550
|
+
argv[7] = rb_id2sym(constants[cast->read_name - 1]);
|
1547
1551
|
|
1548
1552
|
// write_name
|
1549
|
-
#line
|
1550
|
-
|
1553
|
+
#line 157 "api_node.c.erb"
|
1554
|
+
assert(cast->write_name != 0);
|
1555
|
+
argv[8] = rb_id2sym(constants[cast->write_name - 1]);
|
1551
1556
|
|
1552
1557
|
// operator
|
1553
1558
|
#line 157 "api_node.c.erb"
|
@@ -1599,15 +1604,17 @@ pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding) {
|
|
1599
1604
|
|
1600
1605
|
// flags
|
1601
1606
|
#line 179 "api_node.c.erb"
|
1602
|
-
argv[6] = ULONG2NUM(node->flags
|
1607
|
+
argv[6] = ULONG2NUM(node->flags & ~PM_NODE_FLAG_COMMON_MASK);
|
1603
1608
|
|
1604
1609
|
// read_name
|
1605
|
-
#line
|
1606
|
-
|
1610
|
+
#line 157 "api_node.c.erb"
|
1611
|
+
assert(cast->read_name != 0);
|
1612
|
+
argv[7] = rb_id2sym(constants[cast->read_name - 1]);
|
1607
1613
|
|
1608
1614
|
// write_name
|
1609
|
-
#line
|
1610
|
-
|
1615
|
+
#line 157 "api_node.c.erb"
|
1616
|
+
assert(cast->write_name != 0);
|
1617
|
+
argv[8] = rb_id2sym(constants[cast->write_name - 1]);
|
1611
1618
|
|
1612
1619
|
// operator_loc
|
1613
1620
|
#line 170 "api_node.c.erb"
|
@@ -2415,7 +2422,7 @@ pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding) {
|
|
2415
2422
|
|
2416
2423
|
// flags
|
2417
2424
|
#line 179 "api_node.c.erb"
|
2418
|
-
argv[3] = ULONG2NUM(node->flags
|
2425
|
+
argv[3] = ULONG2NUM(node->flags & ~PM_NODE_FLAG_COMMON_MASK);
|
2419
2426
|
|
2420
2427
|
// location
|
2421
2428
|
argv[4] = pm_location_new(parser, node->location.start, node->location.end, source);
|
@@ -2956,7 +2963,7 @@ pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding) {
|
|
2956
2963
|
|
2957
2964
|
// flags
|
2958
2965
|
#line 179 "api_node.c.erb"
|
2959
|
-
argv[0] = ULONG2NUM(node->flags
|
2966
|
+
argv[0] = ULONG2NUM(node->flags & ~PM_NODE_FLAG_COMMON_MASK);
|
2960
2967
|
|
2961
2968
|
// location
|
2962
2969
|
argv[1] = pm_location_new(parser, node->location.start, node->location.end, source);
|
@@ -2986,7 +2993,7 @@ pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding) {
|
|
2986
2993
|
|
2987
2994
|
// flags
|
2988
2995
|
#line 179 "api_node.c.erb"
|
2989
|
-
argv[3] = ULONG2NUM(node->flags
|
2996
|
+
argv[3] = ULONG2NUM(node->flags & ~PM_NODE_FLAG_COMMON_MASK);
|
2990
2997
|
|
2991
2998
|
// location
|
2992
2999
|
argv[4] = pm_location_new(parser, node->location.start, node->location.end, source);
|
@@ -3016,7 +3023,7 @@ pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding) {
|
|
3016
3023
|
|
3017
3024
|
// flags
|
3018
3025
|
#line 179 "api_node.c.erb"
|
3019
|
-
argv[3] = ULONG2NUM(node->flags
|
3026
|
+
argv[3] = ULONG2NUM(node->flags & ~PM_NODE_FLAG_COMMON_MASK);
|
3020
3027
|
|
3021
3028
|
// location
|
3022
3029
|
argv[4] = pm_location_new(parser, node->location.start, node->location.end, source);
|
@@ -3401,7 +3408,7 @@ pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding) {
|
|
3401
3408
|
|
3402
3409
|
// flags
|
3403
3410
|
#line 179 "api_node.c.erb"
|
3404
|
-
argv[4] = ULONG2NUM(node->flags
|
3411
|
+
argv[4] = ULONG2NUM(node->flags & ~PM_NODE_FLAG_COMMON_MASK);
|
3405
3412
|
|
3406
3413
|
// location
|
3407
3414
|
argv[5] = pm_location_new(parser, node->location.start, node->location.end, source);
|
@@ -3918,7 +3925,7 @@ pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding) {
|
|
3918
3925
|
|
3919
3926
|
// flags
|
3920
3927
|
#line 179 "api_node.c.erb"
|
3921
|
-
argv[3] = ULONG2NUM(node->flags
|
3928
|
+
argv[3] = ULONG2NUM(node->flags & ~PM_NODE_FLAG_COMMON_MASK);
|
3922
3929
|
|
3923
3930
|
// location
|
3924
3931
|
argv[4] = pm_location_new(parser, node->location.start, node->location.end, source);
|
@@ -3973,7 +3980,7 @@ pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding) {
|
|
3973
3980
|
|
3974
3981
|
// flags
|
3975
3982
|
#line 179 "api_node.c.erb"
|
3976
|
-
argv[4] = ULONG2NUM(node->flags
|
3983
|
+
argv[4] = ULONG2NUM(node->flags & ~PM_NODE_FLAG_COMMON_MASK);
|
3977
3984
|
|
3978
3985
|
// location
|
3979
3986
|
argv[5] = pm_location_new(parser, node->location.start, node->location.end, source);
|
@@ -4281,7 +4288,7 @@ pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding) {
|
|
4281
4288
|
|
4282
4289
|
// flags
|
4283
4290
|
#line 179 "api_node.c.erb"
|
4284
|
-
argv[0] = ULONG2NUM(node->flags
|
4291
|
+
argv[0] = ULONG2NUM(node->flags & ~PM_NODE_FLAG_COMMON_MASK);
|
4285
4292
|
|
4286
4293
|
// opening_loc
|
4287
4294
|
#line 173 "api_node.c.erb"
|
@@ -4449,7 +4456,7 @@ pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding) {
|
|
4449
4456
|
|
4450
4457
|
// flags
|
4451
4458
|
#line 179 "api_node.c.erb"
|
4452
|
-
argv[4] = ULONG2NUM(node->flags
|
4459
|
+
argv[4] = ULONG2NUM(node->flags & ~PM_NODE_FLAG_COMMON_MASK);
|
4453
4460
|
|
4454
4461
|
// location
|
4455
4462
|
argv[5] = pm_location_new(parser, node->location.start, node->location.end, source);
|
@@ -4506,7 +4513,7 @@ pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding) {
|
|
4506
4513
|
|
4507
4514
|
// flags
|
4508
4515
|
#line 179 "api_node.c.erb"
|
4509
|
-
argv[4] = ULONG2NUM(node->flags
|
4516
|
+
argv[4] = ULONG2NUM(node->flags & ~PM_NODE_FLAG_COMMON_MASK);
|
4510
4517
|
|
4511
4518
|
// location
|
4512
4519
|
argv[5] = pm_location_new(parser, node->location.start, node->location.end, source);
|
data/ext/prism/extension.c
CHANGED
@@ -10,6 +10,7 @@ VALUE rb_cPrismToken;
|
|
10
10
|
VALUE rb_cPrismLocation;
|
11
11
|
|
12
12
|
VALUE rb_cPrismComment;
|
13
|
+
VALUE rb_cPrismMagicComment;
|
13
14
|
VALUE rb_cPrismParseError;
|
14
15
|
VALUE rb_cPrismParseWarning;
|
15
16
|
VALUE rb_cPrismParseResult;
|
@@ -153,6 +154,35 @@ parser_comments(pm_parser_t *parser, VALUE source) {
|
|
153
154
|
return comments;
|
154
155
|
}
|
155
156
|
|
157
|
+
// Extract the magic comments out of the parser into an array.
|
158
|
+
static VALUE
|
159
|
+
parser_magic_comments(pm_parser_t *parser, VALUE source) {
|
160
|
+
VALUE magic_comments = rb_ary_new();
|
161
|
+
|
162
|
+
for (pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) parser->magic_comment_list.head; magic_comment != NULL; magic_comment = (pm_magic_comment_t *) magic_comment->node.next) {
|
163
|
+
VALUE key_loc_argv[] = {
|
164
|
+
source,
|
165
|
+
LONG2FIX(magic_comment->key_start - parser->start),
|
166
|
+
LONG2FIX(magic_comment->key_length)
|
167
|
+
};
|
168
|
+
|
169
|
+
VALUE value_loc_argv[] = {
|
170
|
+
source,
|
171
|
+
LONG2FIX(magic_comment->value_start - parser->start),
|
172
|
+
LONG2FIX(magic_comment->value_length)
|
173
|
+
};
|
174
|
+
|
175
|
+
VALUE magic_comment_argv[] = {
|
176
|
+
rb_class_new_instance(3, key_loc_argv, rb_cPrismLocation),
|
177
|
+
rb_class_new_instance(3, value_loc_argv, rb_cPrismLocation)
|
178
|
+
};
|
179
|
+
|
180
|
+
rb_ary_push(magic_comments, rb_class_new_instance(2, magic_comment_argv, rb_cPrismMagicComment));
|
181
|
+
}
|
182
|
+
|
183
|
+
return magic_comments;
|
184
|
+
}
|
185
|
+
|
156
186
|
// Extract the errors out of the parser into an array.
|
157
187
|
static VALUE
|
158
188
|
parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
|
@@ -297,6 +327,7 @@ parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) {
|
|
297
327
|
VALUE result_argv[] = {
|
298
328
|
value,
|
299
329
|
parser_comments(&parser, source),
|
330
|
+
parser_magic_comments(&parser, source),
|
300
331
|
parser_errors(&parser, parse_lex_data.encoding, source),
|
301
332
|
parser_warnings(&parser, parse_lex_data.encoding, source),
|
302
333
|
source
|
@@ -304,7 +335,7 @@ parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) {
|
|
304
335
|
|
305
336
|
pm_node_destroy(&parser, node);
|
306
337
|
pm_parser_free(&parser);
|
307
|
-
return rb_class_new_instance(
|
338
|
+
return rb_class_new_instance(6, result_argv, rb_cPrismParseResult);
|
308
339
|
}
|
309
340
|
|
310
341
|
// Return an array of tokens corresponding to the given string.
|
@@ -351,12 +382,13 @@ parse_input(pm_string_t *input, const char *filepath) {
|
|
351
382
|
VALUE result_argv[] = {
|
352
383
|
pm_ast_new(&parser, node, encoding),
|
353
384
|
parser_comments(&parser, source),
|
385
|
+
parser_magic_comments(&parser, source),
|
354
386
|
parser_errors(&parser, encoding, source),
|
355
387
|
parser_warnings(&parser, encoding, source),
|
356
388
|
source
|
357
389
|
};
|
358
390
|
|
359
|
-
VALUE result = rb_class_new_instance(
|
391
|
+
VALUE result = rb_class_new_instance(6, result_argv, rb_cPrismParseResult);
|
360
392
|
|
361
393
|
pm_node_destroy(&parser, node);
|
362
394
|
pm_parser_free(&parser);
|
@@ -461,48 +493,6 @@ named_captures(VALUE self, VALUE source) {
|
|
461
493
|
return names;
|
462
494
|
}
|
463
495
|
|
464
|
-
// Accepts a source string and a type of unescaping and returns the unescaped
|
465
|
-
// version.
|
466
|
-
static VALUE
|
467
|
-
unescape(VALUE source, pm_unescape_type_t unescape_type) {
|
468
|
-
pm_string_t result;
|
469
|
-
|
470
|
-
if (pm_unescape_string((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), unescape_type, &result)) {
|
471
|
-
VALUE str = rb_str_new((const char *) pm_string_source(&result), pm_string_length(&result));
|
472
|
-
pm_string_free(&result);
|
473
|
-
return str;
|
474
|
-
} else {
|
475
|
-
pm_string_free(&result);
|
476
|
-
return Qnil;
|
477
|
-
}
|
478
|
-
}
|
479
|
-
|
480
|
-
// Do not unescape anything in the given string. This is here to provide a
|
481
|
-
// consistent API.
|
482
|
-
static VALUE
|
483
|
-
unescape_none(VALUE self, VALUE source) {
|
484
|
-
return unescape(source, PM_UNESCAPE_NONE);
|
485
|
-
}
|
486
|
-
|
487
|
-
// Minimally unescape the given string. This means effectively unescaping just
|
488
|
-
// the quotes of a string. Returns the unescaped string.
|
489
|
-
static VALUE
|
490
|
-
unescape_minimal(VALUE self, VALUE source) {
|
491
|
-
return unescape(source, PM_UNESCAPE_MINIMAL);
|
492
|
-
}
|
493
|
-
|
494
|
-
// Escape the given string minimally plus whitespace. Returns the unescaped string.
|
495
|
-
static VALUE
|
496
|
-
unescape_whitespace(VALUE self, VALUE source) {
|
497
|
-
return unescape(source, PM_UNESCAPE_WHITESPACE);
|
498
|
-
}
|
499
|
-
|
500
|
-
// Unescape everything in the given string. Return the unescaped string.
|
501
|
-
static VALUE
|
502
|
-
unescape_all(VALUE self, VALUE source) {
|
503
|
-
return unescape(source, PM_UNESCAPE_ALL);
|
504
|
-
}
|
505
|
-
|
506
496
|
// Return a hash of information about the given source string's memory usage.
|
507
497
|
static VALUE
|
508
498
|
memsize(VALUE self, VALUE string) {
|
@@ -589,6 +579,7 @@ Init_prism(void) {
|
|
589
579
|
rb_cPrismToken = rb_define_class_under(rb_cPrism, "Token", rb_cObject);
|
590
580
|
rb_cPrismLocation = rb_define_class_under(rb_cPrism, "Location", rb_cObject);
|
591
581
|
rb_cPrismComment = rb_define_class_under(rb_cPrism, "Comment", rb_cObject);
|
582
|
+
rb_cPrismMagicComment = rb_define_class_under(rb_cPrism, "MagicComment", rb_cObject);
|
592
583
|
rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
|
593
584
|
rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);
|
594
585
|
rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cObject);
|
@@ -612,10 +603,6 @@ Init_prism(void) {
|
|
612
603
|
// internal tasks. We expose these to make them easier to test.
|
613
604
|
VALUE rb_cPrismDebug = rb_define_module_under(rb_cPrism, "Debug");
|
614
605
|
rb_define_singleton_method(rb_cPrismDebug, "named_captures", named_captures, 1);
|
615
|
-
rb_define_singleton_method(rb_cPrismDebug, "unescape_none", unescape_none, 1);
|
616
|
-
rb_define_singleton_method(rb_cPrismDebug, "unescape_minimal", unescape_minimal, 1);
|
617
|
-
rb_define_singleton_method(rb_cPrismDebug, "unescape_whitespace", unescape_whitespace, 1);
|
618
|
-
rb_define_singleton_method(rb_cPrismDebug, "unescape_all", unescape_all, 1);
|
619
606
|
rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1);
|
620
607
|
rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
|
621
608
|
rb_define_singleton_method(rb_cPrismDebug, "parse_serialize_file_metadata", parse_serialize_file_metadata, 2);
|