prism 0.22.0 → 0.24.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +39 -1
- data/README.md +2 -1
- data/docs/releasing.md +67 -17
- data/docs/ruby_parser_translation.md +19 -0
- data/docs/serialization.md +2 -0
- data/ext/prism/api_node.c +1982 -1538
- data/ext/prism/extension.c +12 -7
- data/ext/prism/extension.h +2 -2
- data/include/prism/diagnostic.h +3 -4
- data/include/prism/encoding.h +7 -0
- data/include/prism/util/pm_constant_pool.h +1 -1
- data/include/prism/util/pm_newline_list.h +4 -3
- data/include/prism/util/pm_strpbrk.h +4 -1
- data/include/prism/version.h +2 -2
- data/lib/prism/desugar_compiler.rb +225 -80
- data/lib/prism/dsl.rb +302 -299
- data/lib/prism/ffi.rb +103 -77
- data/lib/prism/lex_compat.rb +1 -0
- data/lib/prism/node.rb +3624 -2114
- data/lib/prism/node_ext.rb +25 -2
- data/lib/prism/parse_result.rb +56 -19
- data/lib/prism/serialize.rb +605 -303
- data/lib/prism/translation/parser/compiler.rb +1 -1
- data/lib/prism/translation/parser/rubocop.rb +11 -3
- data/lib/prism/translation/parser.rb +25 -12
- data/lib/prism/translation/parser33.rb +12 -0
- data/lib/prism/translation/parser34.rb +12 -0
- data/lib/prism/translation/ripper.rb +696 -0
- data/lib/prism/translation/ruby_parser.rb +1521 -0
- data/lib/prism/translation.rb +3 -3
- data/lib/prism.rb +0 -1
- data/prism.gemspec +6 -2
- data/src/diagnostic.c +10 -11
- data/src/encoding.c +16 -17
- data/src/options.c +7 -2
- data/src/prettyprint.c +3 -3
- data/src/prism.c +172 -97
- data/src/serialize.c +24 -13
- data/src/token_type.c +3 -3
- data/src/util/pm_constant_pool.c +1 -1
- data/src/util/pm_newline_list.c +6 -3
- data/src/util/pm_strpbrk.c +122 -14
- metadata +8 -4
- data/lib/prism/ripper_compat.rb +0 -285
data/src/serialize.c
CHANGED
@@ -1843,6 +1843,17 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1843
1843
|
}
|
1844
1844
|
}
|
1845
1845
|
|
1846
|
+
static void
|
1847
|
+
pm_serialize_newline_list(pm_newline_list_t *list, pm_buffer_t *buffer) {
|
1848
|
+
uint32_t size = pm_sizet_to_u32(list->size);
|
1849
|
+
pm_buffer_append_varuint(buffer, size);
|
1850
|
+
|
1851
|
+
for (uint32_t i = 0; i < size; i++) {
|
1852
|
+
uint32_t offset = pm_sizet_to_u32(list->offsets[i]);
|
1853
|
+
pm_buffer_append_varuint(buffer, offset);
|
1854
|
+
}
|
1855
|
+
}
|
1856
|
+
|
1846
1857
|
static void
|
1847
1858
|
pm_serialize_comment(pm_parser_t *parser, pm_comment_t *comment, pm_buffer_t *buffer) {
|
1848
1859
|
// serialize type
|
@@ -1929,19 +1940,25 @@ pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer) {
|
|
1929
1940
|
pm_buffer_append_string(buffer, encoding->name, encoding_length);
|
1930
1941
|
}
|
1931
1942
|
|
1932
|
-
|
1933
|
-
|
1934
|
-
* Serialize the encoding, metadata, nodes, and constant pool.
|
1935
|
-
*/
|
1936
|
-
void
|
1937
|
-
pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
1943
|
+
static void
|
1944
|
+
pm_serialize_metadata(pm_parser_t *parser, pm_buffer_t *buffer) {
|
1938
1945
|
pm_serialize_encoding(parser->encoding, buffer);
|
1939
1946
|
pm_buffer_append_varsint(buffer, parser->start_line);
|
1947
|
+
pm_serialize_newline_list(&parser->newline_list, buffer);
|
1940
1948
|
pm_serialize_comment_list(parser, &parser->comment_list, buffer);
|
1941
1949
|
pm_serialize_magic_comment_list(parser, &parser->magic_comment_list, buffer);
|
1942
1950
|
pm_serialize_data_loc(parser, buffer);
|
1943
1951
|
pm_serialize_diagnostic_list(parser, &parser->error_list, buffer);
|
1944
1952
|
pm_serialize_diagnostic_list(parser, &parser->warning_list, buffer);
|
1953
|
+
}
|
1954
|
+
|
1955
|
+
#line 243 "serialize.c.erb"
|
1956
|
+
/**
|
1957
|
+
* Serialize the metadata, nodes, and constant pool.
|
1958
|
+
*/
|
1959
|
+
void
|
1960
|
+
pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
1961
|
+
pm_serialize_metadata(parser, buffer);
|
1945
1962
|
|
1946
1963
|
// Here we're going to leave space for the offset of the constant pool in
|
1947
1964
|
// the buffer.
|
@@ -2032,13 +2049,7 @@ pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const
|
|
2032
2049
|
// Append 0 to mark end of tokens.
|
2033
2050
|
pm_buffer_append_byte(buffer, 0);
|
2034
2051
|
|
2035
|
-
|
2036
|
-
pm_buffer_append_varsint(buffer, parser.start_line);
|
2037
|
-
pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
|
2038
|
-
pm_serialize_magic_comment_list(&parser, &parser.magic_comment_list, buffer);
|
2039
|
-
pm_serialize_data_loc(&parser, buffer);
|
2040
|
-
pm_serialize_diagnostic_list(&parser, &parser.error_list, buffer);
|
2041
|
-
pm_serialize_diagnostic_list(&parser, &parser.warning_list, buffer);
|
2052
|
+
pm_serialize_metadata(&parser, buffer);
|
2042
2053
|
|
2043
2054
|
pm_node_destroy(&parser, node);
|
2044
2055
|
pm_parser_free(&parser);
|
data/src/token_type.c
CHANGED
@@ -469,7 +469,7 @@ pm_token_type_human(pm_token_type_t token_type) {
|
|
469
469
|
case PM_TOKEN_HEREDOC_START:
|
470
470
|
return "heredoc beginning";
|
471
471
|
case PM_TOKEN_IDENTIFIER:
|
472
|
-
return "local variable or method
|
472
|
+
return "local variable or method";
|
473
473
|
case PM_TOKEN_IGNORED_NEWLINE:
|
474
474
|
return "ignored newline";
|
475
475
|
case PM_TOKEN_INSTANCE_VARIABLE:
|
@@ -579,7 +579,7 @@ pm_token_type_human(pm_token_type_t token_type) {
|
|
579
579
|
case PM_TOKEN_LABEL:
|
580
580
|
return "label";
|
581
581
|
case PM_TOKEN_LABEL_END:
|
582
|
-
return "
|
582
|
+
return "label terminator";
|
583
583
|
case PM_TOKEN_LAMBDA_BEGIN:
|
584
584
|
return "'{'";
|
585
585
|
case PM_TOKEN_LESS:
|
@@ -681,7 +681,7 @@ pm_token_type_human(pm_token_type_t token_type) {
|
|
681
681
|
case PM_TOKEN_UPLUS:
|
682
682
|
return "'+'";
|
683
683
|
case PM_TOKEN_USTAR:
|
684
|
-
return "
|
684
|
+
return "*";
|
685
685
|
case PM_TOKEN_USTAR_STAR:
|
686
686
|
return "'**'";
|
687
687
|
case PM_TOKEN_WORDS_SEP:
|
data/src/util/pm_constant_pool.c
CHANGED
@@ -186,7 +186,7 @@ pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t
|
|
186
186
|
* the constant is not found.
|
187
187
|
*/
|
188
188
|
pm_constant_id_t
|
189
|
-
pm_constant_pool_find(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
189
|
+
pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
190
190
|
assert(is_power_of_two(pool->capacity));
|
191
191
|
const uint32_t mask = pool->capacity - 1;
|
192
192
|
|
data/src/util/pm_newline_list.c
CHANGED
@@ -51,7 +51,7 @@ pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
|
|
51
51
|
* are returned.
|
52
52
|
*/
|
53
53
|
pm_line_column_t
|
54
|
-
pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor) {
|
54
|
+
pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line) {
|
55
55
|
assert(cursor >= list->start);
|
56
56
|
size_t offset = (size_t) (cursor - list->start);
|
57
57
|
|
@@ -62,7 +62,7 @@ pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor
|
|
62
62
|
size_t mid = left + (right - left) / 2;
|
63
63
|
|
64
64
|
if (list->offsets[mid] == offset) {
|
65
|
-
return ((pm_line_column_t) { mid +
|
65
|
+
return ((pm_line_column_t) { ((int32_t) mid) + start_line, 0 });
|
66
66
|
}
|
67
67
|
|
68
68
|
if (list->offsets[mid] < offset) {
|
@@ -72,7 +72,10 @@ pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor
|
|
72
72
|
}
|
73
73
|
}
|
74
74
|
|
75
|
-
return ((pm_line_column_t) {
|
75
|
+
return ((pm_line_column_t) {
|
76
|
+
.line = ((int32_t) left) + start_line - 1,
|
77
|
+
.column = (uint32_t) (offset - list->offsets[left - 1])
|
78
|
+
});
|
76
79
|
}
|
77
80
|
|
78
81
|
/**
|
data/src/util/pm_strpbrk.c
CHANGED
@@ -1,10 +1,18 @@
|
|
1
1
|
#include "prism/util/pm_strpbrk.h"
|
2
2
|
|
3
3
|
/**
|
4
|
-
*
|
4
|
+
* Add an invalid multibyte character error to the parser.
|
5
|
+
*/
|
6
|
+
static inline void
|
7
|
+
pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
8
|
+
pm_diagnostic_list_append_format(&parser->error_list, start, end, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *start);
|
9
|
+
}
|
10
|
+
|
11
|
+
/**
|
12
|
+
* This is the default path.
|
5
13
|
*/
|
6
14
|
static inline const uint8_t *
|
7
|
-
|
15
|
+
pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
|
8
16
|
size_t index = 0;
|
9
17
|
|
10
18
|
while (index < maximum) {
|
@@ -12,22 +20,39 @@ pm_strpbrk_multi_byte(const pm_parser_t *parser, const uint8_t *source, const ui
|
|
12
20
|
return source + index;
|
13
21
|
}
|
14
22
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
23
|
+
if (source[index] < 0x80) {
|
24
|
+
index++;
|
25
|
+
} else {
|
26
|
+
size_t width = pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index));
|
19
27
|
|
20
|
-
|
28
|
+
if (width > 0) {
|
29
|
+
index += width;
|
30
|
+
} else if (!validate) {
|
31
|
+
index++;
|
32
|
+
} else {
|
33
|
+
// At this point we know we have an invalid multibyte character.
|
34
|
+
// We'll walk forward as far as we can until we find the next
|
35
|
+
// valid character so that we don't spam the user with a ton of
|
36
|
+
// the same kind of error.
|
37
|
+
const size_t start = index;
|
38
|
+
|
39
|
+
do {
|
40
|
+
index++;
|
41
|
+
} while (index < maximum && pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
|
42
|
+
|
43
|
+
pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
|
44
|
+
}
|
45
|
+
}
|
21
46
|
}
|
22
47
|
|
23
48
|
return NULL;
|
24
49
|
}
|
25
50
|
|
26
51
|
/**
|
27
|
-
* This is the
|
52
|
+
* This is the path when the encoding is ASCII-8BIT.
|
28
53
|
*/
|
29
54
|
static inline const uint8_t *
|
30
|
-
|
55
|
+
pm_strpbrk_ascii_8bit(const uint8_t *source, const uint8_t *charset, size_t maximum) {
|
31
56
|
size_t index = 0;
|
32
57
|
|
33
58
|
while (index < maximum) {
|
@@ -41,6 +66,85 @@ pm_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t max
|
|
41
66
|
return NULL;
|
42
67
|
}
|
43
68
|
|
69
|
+
/**
|
70
|
+
* This is the slow path that does care about the encoding.
|
71
|
+
*/
|
72
|
+
static inline const uint8_t *
|
73
|
+
pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
|
74
|
+
size_t index = 0;
|
75
|
+
|
76
|
+
while (index < maximum) {
|
77
|
+
if (strchr((const char *) charset, source[index]) != NULL) {
|
78
|
+
return source + index;
|
79
|
+
}
|
80
|
+
|
81
|
+
if (source[index] < 0x80) {
|
82
|
+
index++;
|
83
|
+
} else {
|
84
|
+
size_t width = parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
|
85
|
+
|
86
|
+
if (width > 0) {
|
87
|
+
index += width;
|
88
|
+
} else if (!validate) {
|
89
|
+
index++;
|
90
|
+
} else {
|
91
|
+
// At this point we know we have an invalid multibyte character.
|
92
|
+
// We'll walk forward as far as we can until we find the next
|
93
|
+
// valid character so that we don't spam the user with a ton of
|
94
|
+
// the same kind of error.
|
95
|
+
const size_t start = index;
|
96
|
+
|
97
|
+
do {
|
98
|
+
index++;
|
99
|
+
} while (index < maximum && parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
|
100
|
+
|
101
|
+
pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
|
102
|
+
}
|
103
|
+
}
|
104
|
+
}
|
105
|
+
|
106
|
+
return NULL;
|
107
|
+
}
|
108
|
+
|
109
|
+
/**
|
110
|
+
* This is the fast path that does not care about the encoding because we know
|
111
|
+
* the encoding only supports single-byte characters.
|
112
|
+
*/
|
113
|
+
static inline const uint8_t *
|
114
|
+
pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
|
115
|
+
size_t index = 0;
|
116
|
+
|
117
|
+
while (index < maximum) {
|
118
|
+
if (strchr((const char *) charset, source[index]) != NULL) {
|
119
|
+
return source + index;
|
120
|
+
}
|
121
|
+
|
122
|
+
if (source[index] < 0x80 || !validate) {
|
123
|
+
index++;
|
124
|
+
} else {
|
125
|
+
size_t width = parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
|
126
|
+
|
127
|
+
if (width > 0) {
|
128
|
+
index += width;
|
129
|
+
} else {
|
130
|
+
// At this point we know we have an invalid multibyte character.
|
131
|
+
// We'll walk forward as far as we can until we find the next
|
132
|
+
// valid character so that we don't spam the user with a ton of
|
133
|
+
// the same kind of error.
|
134
|
+
const size_t start = index;
|
135
|
+
|
136
|
+
do {
|
137
|
+
index++;
|
138
|
+
} while (index < maximum && parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
|
139
|
+
|
140
|
+
pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
|
141
|
+
}
|
142
|
+
}
|
143
|
+
}
|
144
|
+
|
145
|
+
return NULL;
|
146
|
+
}
|
147
|
+
|
44
148
|
/**
|
45
149
|
* Here we have rolled our own version of strpbrk. The standard library strpbrk
|
46
150
|
* has undefined behavior when the source string is not null-terminated. We want
|
@@ -57,16 +161,20 @@ pm_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t max
|
|
57
161
|
*
|
58
162
|
* Finally, we want to support encodings wherein the charset could contain
|
59
163
|
* characters that are trailing bytes of multi-byte characters. For example, in
|
60
|
-
*
|
164
|
+
* Shift_JIS, the backslash character can be a trailing byte. In that case we
|
61
165
|
* need to take a slower path and iterate one multi-byte character at a time.
|
62
166
|
*/
|
63
167
|
const uint8_t *
|
64
|
-
pm_strpbrk(
|
168
|
+
pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate) {
|
65
169
|
if (length <= 0) {
|
66
170
|
return NULL;
|
67
|
-
} else if (parser->encoding_changed
|
68
|
-
return
|
171
|
+
} else if (!parser->encoding_changed) {
|
172
|
+
return pm_strpbrk_utf8(parser, source, charset, (size_t) length, validate);
|
173
|
+
} else if (parser->encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
|
174
|
+
return pm_strpbrk_ascii_8bit(source, charset, (size_t) length);
|
175
|
+
} else if (parser->encoding->multibyte) {
|
176
|
+
return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length, validate);
|
69
177
|
} else {
|
70
|
-
return pm_strpbrk_single_byte(source, charset, (size_t) length);
|
178
|
+
return pm_strpbrk_single_byte(parser, source, charset, (size_t) length, validate);
|
71
179
|
}
|
72
180
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: prism
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.24.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shopify
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-02-
|
11
|
+
date: 2024-02-15 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email:
|
@@ -40,6 +40,7 @@ files:
|
|
40
40
|
- docs/releasing.md
|
41
41
|
- docs/ripper.md
|
42
42
|
- docs/ruby_api.md
|
43
|
+
- docs/ruby_parser_translation.md
|
43
44
|
- docs/serialization.md
|
44
45
|
- docs/testing.md
|
45
46
|
- ext/prism/api_node.c
|
@@ -88,13 +89,16 @@ files:
|
|
88
89
|
- lib/prism/parse_result/comments.rb
|
89
90
|
- lib/prism/parse_result/newlines.rb
|
90
91
|
- lib/prism/pattern.rb
|
91
|
-
- lib/prism/ripper_compat.rb
|
92
92
|
- lib/prism/serialize.rb
|
93
93
|
- lib/prism/translation.rb
|
94
94
|
- lib/prism/translation/parser.rb
|
95
95
|
- lib/prism/translation/parser/compiler.rb
|
96
96
|
- lib/prism/translation/parser/lexer.rb
|
97
97
|
- lib/prism/translation/parser/rubocop.rb
|
98
|
+
- lib/prism/translation/parser33.rb
|
99
|
+
- lib/prism/translation/parser34.rb
|
100
|
+
- lib/prism/translation/ripper.rb
|
101
|
+
- lib/prism/translation/ruby_parser.rb
|
98
102
|
- lib/prism/visitor.rb
|
99
103
|
- prism.gemspec
|
100
104
|
- rbi/prism.rbi
|
@@ -144,7 +148,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
144
148
|
- !ruby/object:Gem::Version
|
145
149
|
version: '0'
|
146
150
|
requirements: []
|
147
|
-
rubygems_version: 3.
|
151
|
+
rubygems_version: 3.4.1
|
148
152
|
signing_key:
|
149
153
|
specification_version: 4
|
150
154
|
summary: Prism Ruby parser
|
data/lib/prism/ripper_compat.rb
DELETED
@@ -1,285 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "ripper"
|
4
|
-
|
5
|
-
module Prism
|
6
|
-
# Note: This integration is not finished, and therefore still has many
|
7
|
-
# inconsistencies with Ripper. If you'd like to help out, pull requests would
|
8
|
-
# be greatly appreciated!
|
9
|
-
#
|
10
|
-
# This class is meant to provide a compatibility layer between prism and
|
11
|
-
# Ripper. It functions by parsing the entire tree first and then walking it
|
12
|
-
# and executing each of the Ripper callbacks as it goes.
|
13
|
-
#
|
14
|
-
# This class is going to necessarily be slower than the native Ripper API. It
|
15
|
-
# is meant as a stopgap until developers migrate to using prism. It is also
|
16
|
-
# meant as a test harness for the prism parser.
|
17
|
-
#
|
18
|
-
# To use this class, you treat `Prism::RipperCompat` effectively as you would
|
19
|
-
# treat the `Ripper` class.
|
20
|
-
class RipperCompat < Visitor
|
21
|
-
# This class mirrors the ::Ripper::SexpBuilder subclass of ::Ripper that
|
22
|
-
# returns the arrays of [type, *children].
|
23
|
-
class SexpBuilder < RipperCompat
|
24
|
-
private
|
25
|
-
|
26
|
-
Ripper::PARSER_EVENTS.each do |event|
|
27
|
-
define_method(:"on_#{event}") do |*args|
|
28
|
-
[event, *args]
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
Ripper::SCANNER_EVENTS.each do |event|
|
33
|
-
define_method(:"on_#{event}") do |value|
|
34
|
-
[:"@#{event}", value, [lineno, column]]
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
# This class mirrors the ::Ripper::SexpBuilderPP subclass of ::Ripper that
|
40
|
-
# returns the same values as ::Ripper::SexpBuilder except with a couple of
|
41
|
-
# niceties that flatten linked lists into arrays.
|
42
|
-
class SexpBuilderPP < SexpBuilder
|
43
|
-
private
|
44
|
-
|
45
|
-
def _dispatch_event_new # :nodoc:
|
46
|
-
[]
|
47
|
-
end
|
48
|
-
|
49
|
-
def _dispatch_event_push(list, item) # :nodoc:
|
50
|
-
list << item
|
51
|
-
list
|
52
|
-
end
|
53
|
-
|
54
|
-
Ripper::PARSER_EVENT_TABLE.each do |event, arity|
|
55
|
-
case event
|
56
|
-
when /_new\z/
|
57
|
-
alias_method :"on_#{event}", :_dispatch_event_new if arity == 0
|
58
|
-
when /_add\z/
|
59
|
-
alias_method :"on_#{event}", :_dispatch_event_push
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
# The source that is being parsed.
|
65
|
-
attr_reader :source
|
66
|
-
|
67
|
-
# The current line number of the parser.
|
68
|
-
attr_reader :lineno
|
69
|
-
|
70
|
-
# The current column number of the parser.
|
71
|
-
attr_reader :column
|
72
|
-
|
73
|
-
# Create a new RipperCompat object with the given source.
|
74
|
-
def initialize(source)
|
75
|
-
@source = source
|
76
|
-
@result = nil
|
77
|
-
@lineno = nil
|
78
|
-
@column = nil
|
79
|
-
end
|
80
|
-
|
81
|
-
############################################################################
|
82
|
-
# Public interface
|
83
|
-
############################################################################
|
84
|
-
|
85
|
-
# True if the parser encountered an error during parsing.
|
86
|
-
def error?
|
87
|
-
result.failure?
|
88
|
-
end
|
89
|
-
|
90
|
-
# Parse the source and return the result.
|
91
|
-
def parse
|
92
|
-
result.magic_comments.each do |magic_comment|
|
93
|
-
on_magic_comment(magic_comment.key, magic_comment.value)
|
94
|
-
end
|
95
|
-
|
96
|
-
if error?
|
97
|
-
result.errors.each do |error|
|
98
|
-
on_parse_error(error.message)
|
99
|
-
end
|
100
|
-
|
101
|
-
nil
|
102
|
-
else
|
103
|
-
result.value.accept(self)
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
############################################################################
|
108
|
-
# Visitor methods
|
109
|
-
############################################################################
|
110
|
-
|
111
|
-
# Visit an ArrayNode node.
|
112
|
-
def visit_array_node(node)
|
113
|
-
elements = visit_elements(node.elements) unless node.elements.empty?
|
114
|
-
bounds(node.location)
|
115
|
-
on_array(elements)
|
116
|
-
end
|
117
|
-
|
118
|
-
# Visit a CallNode node.
|
119
|
-
def visit_call_node(node)
|
120
|
-
if node.variable_call?
|
121
|
-
if node.message.match?(/^[[:alpha:]_]/)
|
122
|
-
bounds(node.message_loc)
|
123
|
-
return on_vcall(on_ident(node.message))
|
124
|
-
end
|
125
|
-
|
126
|
-
raise NotImplementedError, "Non-alpha variable call"
|
127
|
-
end
|
128
|
-
|
129
|
-
if node.opening_loc.nil?
|
130
|
-
left = visit(node.receiver)
|
131
|
-
if node.arguments&.arguments&.length == 1
|
132
|
-
right = visit(node.arguments.arguments.first)
|
133
|
-
|
134
|
-
on_binary(left, node.name, right)
|
135
|
-
elsif !node.arguments || node.arguments.empty?
|
136
|
-
on_unary(node.name, left)
|
137
|
-
else
|
138
|
-
raise NotImplementedError, "More than two arguments for operator"
|
139
|
-
end
|
140
|
-
else
|
141
|
-
raise NotImplementedError, "Non-nil opening_loc"
|
142
|
-
end
|
143
|
-
end
|
144
|
-
|
145
|
-
# Visit a FloatNode node.
|
146
|
-
def visit_float_node(node)
|
147
|
-
visit_number(node) { |text| on_float(text) }
|
148
|
-
end
|
149
|
-
|
150
|
-
# Visit a ImaginaryNode node.
|
151
|
-
def visit_imaginary_node(node)
|
152
|
-
visit_number(node) { |text| on_imaginary(text) }
|
153
|
-
end
|
154
|
-
|
155
|
-
# Visit an IntegerNode node.
|
156
|
-
def visit_integer_node(node)
|
157
|
-
visit_number(node) { |text| on_int(text) }
|
158
|
-
end
|
159
|
-
|
160
|
-
# Visit a ParenthesesNode node.
|
161
|
-
def visit_parentheses_node(node)
|
162
|
-
body =
|
163
|
-
if node.body.nil?
|
164
|
-
on_stmts_add(on_stmts_new, on_void_stmt)
|
165
|
-
else
|
166
|
-
visit(node.body)
|
167
|
-
end
|
168
|
-
|
169
|
-
bounds(node.location)
|
170
|
-
on_paren(body)
|
171
|
-
end
|
172
|
-
|
173
|
-
# Visit a ProgramNode node.
|
174
|
-
def visit_program_node(node)
|
175
|
-
statements = visit(node.statements)
|
176
|
-
bounds(node.location)
|
177
|
-
on_program(statements)
|
178
|
-
end
|
179
|
-
|
180
|
-
# Visit a RangeNode node.
|
181
|
-
def visit_range_node(node)
|
182
|
-
left = visit(node.left)
|
183
|
-
right = visit(node.right)
|
184
|
-
|
185
|
-
bounds(node.location)
|
186
|
-
if node.exclude_end?
|
187
|
-
on_dot3(left, right)
|
188
|
-
else
|
189
|
-
on_dot2(left, right)
|
190
|
-
end
|
191
|
-
end
|
192
|
-
|
193
|
-
# Visit a RationalNode node.
|
194
|
-
def visit_rational_node(node)
|
195
|
-
visit_number(node) { |text| on_rational(text) }
|
196
|
-
end
|
197
|
-
|
198
|
-
# Visit a StatementsNode node.
|
199
|
-
def visit_statements_node(node)
|
200
|
-
bounds(node.location)
|
201
|
-
node.body.inject(on_stmts_new) do |stmts, stmt|
|
202
|
-
on_stmts_add(stmts, visit(stmt))
|
203
|
-
end
|
204
|
-
end
|
205
|
-
|
206
|
-
############################################################################
|
207
|
-
# Entrypoints for subclasses
|
208
|
-
############################################################################
|
209
|
-
|
210
|
-
# This is a convenience method that runs the SexpBuilder subclass parser.
|
211
|
-
def self.sexp_raw(source)
|
212
|
-
SexpBuilder.new(source).parse
|
213
|
-
end
|
214
|
-
|
215
|
-
# This is a convenience method that runs the SexpBuilderPP subclass parser.
|
216
|
-
def self.sexp(source)
|
217
|
-
SexpBuilderPP.new(source).parse
|
218
|
-
end
|
219
|
-
|
220
|
-
private
|
221
|
-
|
222
|
-
# Visit a list of elements, like the elements of an array or arguments.
|
223
|
-
def visit_elements(elements)
|
224
|
-
bounds(elements.first.location)
|
225
|
-
elements.inject(on_args_new) do |args, element|
|
226
|
-
on_args_add(args, visit(element))
|
227
|
-
end
|
228
|
-
end
|
229
|
-
|
230
|
-
# Visit a node that represents a number. We need to explicitly handle the
|
231
|
-
# unary - operator.
|
232
|
-
def visit_number(node)
|
233
|
-
slice = node.slice
|
234
|
-
location = node.location
|
235
|
-
|
236
|
-
if slice[0] == "-"
|
237
|
-
bounds_values(location.start_line, location.start_column + 1)
|
238
|
-
value = yield slice[1..-1]
|
239
|
-
|
240
|
-
bounds(node.location)
|
241
|
-
on_unary(RUBY_ENGINE == "jruby" ? :- : :-@, value)
|
242
|
-
else
|
243
|
-
bounds(location)
|
244
|
-
yield slice
|
245
|
-
end
|
246
|
-
end
|
247
|
-
|
248
|
-
# This method is responsible for updating lineno and column information
|
249
|
-
# to reflect the current node.
|
250
|
-
#
|
251
|
-
# This method could be drastically improved with some caching on the start
|
252
|
-
# of every line, but for now it's good enough.
|
253
|
-
def bounds(location)
|
254
|
-
@lineno = location.start_line
|
255
|
-
@column = location.start_column
|
256
|
-
end
|
257
|
-
|
258
|
-
# If we need to do something unusual, we can directly update the line number
|
259
|
-
# and column to reflect the current node.
|
260
|
-
def bounds_values(lineno, column)
|
261
|
-
@lineno = lineno
|
262
|
-
@column = column
|
263
|
-
end
|
264
|
-
|
265
|
-
# Lazily initialize the parse result.
|
266
|
-
def result
|
267
|
-
@result ||= Prism.parse(source)
|
268
|
-
end
|
269
|
-
|
270
|
-
def _dispatch0; end # :nodoc:
|
271
|
-
def _dispatch1(_); end # :nodoc:
|
272
|
-
def _dispatch2(_, _); end # :nodoc:
|
273
|
-
def _dispatch3(_, _, _); end # :nodoc:
|
274
|
-
def _dispatch4(_, _, _, _); end # :nodoc:
|
275
|
-
def _dispatch5(_, _, _, _, _); end # :nodoc:
|
276
|
-
def _dispatch7(_, _, _, _, _, _, _); end # :nodoc:
|
277
|
-
|
278
|
-
alias_method :on_parse_error, :_dispatch1
|
279
|
-
alias_method :on_magic_comment, :_dispatch2
|
280
|
-
|
281
|
-
(Ripper::SCANNER_EVENT_TABLE.merge(Ripper::PARSER_EVENT_TABLE)).each do |event, arity|
|
282
|
-
alias_method :"on_#{event}", :"_dispatch#{arity}"
|
283
|
-
end
|
284
|
-
end
|
285
|
-
end
|