prism 0.21.0 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +40 -1
- data/README.md +2 -1
- data/docs/releasing.md +84 -16
- data/docs/ruby_parser_translation.md +19 -0
- data/docs/serialization.md +2 -0
- data/ext/prism/api_node.c +784 -785
- data/ext/prism/extension.c +56 -19
- data/ext/prism/extension.h +2 -2
- data/include/prism/diagnostic.h +11 -6
- data/include/prism/encoding.h +7 -0
- data/include/prism/util/pm_constant_pool.h +1 -1
- data/include/prism/util/pm_strpbrk.h +4 -1
- data/include/prism/version.h +2 -2
- data/lib/prism/ffi.rb +8 -3
- data/lib/prism/lex_compat.rb +17 -1
- data/lib/prism/node.rb +212 -32
- data/lib/prism/node_ext.rb +25 -2
- data/lib/prism/parse_result.rb +46 -16
- data/lib/prism/serialize.rb +14 -6
- data/lib/prism/translation/parser/compiler.rb +16 -6
- data/lib/prism/translation/parser.rb +19 -12
- data/lib/prism/translation/ripper.rb +577 -0
- data/lib/prism/translation/ruby_parser.rb +1521 -0
- data/lib/prism/translation.rb +3 -3
- data/lib/prism.rb +0 -1
- data/prism.gemspec +5 -3
- data/src/diagnostic.c +20 -15
- data/src/encoding.c +16 -17
- data/src/options.c +7 -2
- data/src/prism.c +145 -90
- data/src/serialize.c +24 -13
- data/src/token_type.c +3 -3
- data/src/util/pm_constant_pool.c +1 -1
- data/src/util/pm_string.c +0 -7
- data/src/util/pm_strpbrk.c +122 -14
- metadata +6 -4
- data/lib/prism/ripper_compat.rb +0 -207
data/src/serialize.c
CHANGED
@@ -1843,6 +1843,17 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
|
1843
1843
|
}
|
1844
1844
|
}
|
1845
1845
|
|
1846
|
+
static void
|
1847
|
+
pm_serialize_newline_list(pm_newline_list_t *list, pm_buffer_t *buffer) {
|
1848
|
+
uint32_t size = pm_sizet_to_u32(list->size);
|
1849
|
+
pm_buffer_append_varuint(buffer, size);
|
1850
|
+
|
1851
|
+
for (uint32_t i = 0; i < size; i++) {
|
1852
|
+
uint32_t offset = pm_sizet_to_u32(list->offsets[i]);
|
1853
|
+
pm_buffer_append_varuint(buffer, offset);
|
1854
|
+
}
|
1855
|
+
}
|
1856
|
+
|
1846
1857
|
static void
|
1847
1858
|
pm_serialize_comment(pm_parser_t *parser, pm_comment_t *comment, pm_buffer_t *buffer) {
|
1848
1859
|
// serialize type
|
@@ -1929,19 +1940,25 @@ pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer) {
|
|
1929
1940
|
pm_buffer_append_string(buffer, encoding->name, encoding_length);
|
1930
1941
|
}
|
1931
1942
|
|
1932
|
-
|
1933
|
-
|
1934
|
-
* Serialize the encoding, metadata, nodes, and constant pool.
|
1935
|
-
*/
|
1936
|
-
void
|
1937
|
-
pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
1943
|
+
static void
|
1944
|
+
pm_serialize_metadata(pm_parser_t *parser, pm_buffer_t *buffer) {
|
1938
1945
|
pm_serialize_encoding(parser->encoding, buffer);
|
1939
1946
|
pm_buffer_append_varsint(buffer, parser->start_line);
|
1947
|
+
pm_serialize_newline_list(&parser->newline_list, buffer);
|
1940
1948
|
pm_serialize_comment_list(parser, &parser->comment_list, buffer);
|
1941
1949
|
pm_serialize_magic_comment_list(parser, &parser->magic_comment_list, buffer);
|
1942
1950
|
pm_serialize_data_loc(parser, buffer);
|
1943
1951
|
pm_serialize_diagnostic_list(parser, &parser->error_list, buffer);
|
1944
1952
|
pm_serialize_diagnostic_list(parser, &parser->warning_list, buffer);
|
1953
|
+
}
|
1954
|
+
|
1955
|
+
#line 243 "serialize.c.erb"
|
1956
|
+
/**
|
1957
|
+
* Serialize the metadata, nodes, and constant pool.
|
1958
|
+
*/
|
1959
|
+
void
|
1960
|
+
pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
|
1961
|
+
pm_serialize_metadata(parser, buffer);
|
1945
1962
|
|
1946
1963
|
// Here we're going to leave space for the offset of the constant pool in
|
1947
1964
|
// the buffer.
|
@@ -2032,13 +2049,7 @@ pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const
|
|
2032
2049
|
// Append 0 to mark end of tokens.
|
2033
2050
|
pm_buffer_append_byte(buffer, 0);
|
2034
2051
|
|
2035
|
-
|
2036
|
-
pm_buffer_append_varsint(buffer, parser.start_line);
|
2037
|
-
pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
|
2038
|
-
pm_serialize_magic_comment_list(&parser, &parser.magic_comment_list, buffer);
|
2039
|
-
pm_serialize_data_loc(&parser, buffer);
|
2040
|
-
pm_serialize_diagnostic_list(&parser, &parser.error_list, buffer);
|
2041
|
-
pm_serialize_diagnostic_list(&parser, &parser.warning_list, buffer);
|
2052
|
+
pm_serialize_metadata(&parser, buffer);
|
2042
2053
|
|
2043
2054
|
pm_node_destroy(&parser, node);
|
2044
2055
|
pm_parser_free(&parser);
|
data/src/token_type.c
CHANGED
@@ -469,7 +469,7 @@ pm_token_type_human(pm_token_type_t token_type) {
|
|
469
469
|
case PM_TOKEN_HEREDOC_START:
|
470
470
|
return "heredoc beginning";
|
471
471
|
case PM_TOKEN_IDENTIFIER:
|
472
|
-
return "local variable or method
|
472
|
+
return "local variable or method";
|
473
473
|
case PM_TOKEN_IGNORED_NEWLINE:
|
474
474
|
return "ignored newline";
|
475
475
|
case PM_TOKEN_INSTANCE_VARIABLE:
|
@@ -579,7 +579,7 @@ pm_token_type_human(pm_token_type_t token_type) {
|
|
579
579
|
case PM_TOKEN_LABEL:
|
580
580
|
return "label";
|
581
581
|
case PM_TOKEN_LABEL_END:
|
582
|
-
return "
|
582
|
+
return "label terminator";
|
583
583
|
case PM_TOKEN_LAMBDA_BEGIN:
|
584
584
|
return "'{'";
|
585
585
|
case PM_TOKEN_LESS:
|
@@ -681,7 +681,7 @@ pm_token_type_human(pm_token_type_t token_type) {
|
|
681
681
|
case PM_TOKEN_UPLUS:
|
682
682
|
return "'+'";
|
683
683
|
case PM_TOKEN_USTAR:
|
684
|
-
return "
|
684
|
+
return "*";
|
685
685
|
case PM_TOKEN_USTAR_STAR:
|
686
686
|
return "'**'";
|
687
687
|
case PM_TOKEN_WORDS_SEP:
|
data/src/util/pm_constant_pool.c
CHANGED
@@ -186,7 +186,7 @@ pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t
|
|
186
186
|
* the constant is not found.
|
187
187
|
*/
|
188
188
|
pm_constant_id_t
|
189
|
-
pm_constant_pool_find(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
189
|
+
pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
190
190
|
assert(is_power_of_two(pool->capacity));
|
191
191
|
const uint32_t mask = pool->capacity - 1;
|
192
192
|
|
data/src/util/pm_string.c
CHANGED
@@ -65,7 +65,6 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
|
|
65
65
|
HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
|
66
66
|
|
67
67
|
if (file == INVALID_HANDLE_VALUE) {
|
68
|
-
perror("CreateFile failed");
|
69
68
|
return false;
|
70
69
|
}
|
71
70
|
|
@@ -73,7 +72,6 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
|
|
73
72
|
DWORD file_size = GetFileSize(file, NULL);
|
74
73
|
if (file_size == INVALID_FILE_SIZE) {
|
75
74
|
CloseHandle(file);
|
76
|
-
perror("GetFileSize failed");
|
77
75
|
return false;
|
78
76
|
}
|
79
77
|
|
@@ -90,7 +88,6 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
|
|
90
88
|
HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL);
|
91
89
|
if (mapping == NULL) {
|
92
90
|
CloseHandle(file);
|
93
|
-
perror("CreateFileMapping failed");
|
94
91
|
return false;
|
95
92
|
}
|
96
93
|
|
@@ -100,7 +97,6 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
|
|
100
97
|
CloseHandle(file);
|
101
98
|
|
102
99
|
if (source == NULL) {
|
103
|
-
perror("MapViewOfFile failed");
|
104
100
|
return false;
|
105
101
|
}
|
106
102
|
|
@@ -110,7 +106,6 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
|
|
110
106
|
// Open the file for reading
|
111
107
|
int fd = open(filepath, O_RDONLY);
|
112
108
|
if (fd == -1) {
|
113
|
-
perror("open");
|
114
109
|
return false;
|
115
110
|
}
|
116
111
|
|
@@ -118,7 +113,6 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
|
|
118
113
|
struct stat sb;
|
119
114
|
if (fstat(fd, &sb) == -1) {
|
120
115
|
close(fd);
|
121
|
-
perror("fstat");
|
122
116
|
return false;
|
123
117
|
}
|
124
118
|
|
@@ -135,7 +129,6 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) {
|
|
135
129
|
|
136
130
|
source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
|
137
131
|
if (source == MAP_FAILED) {
|
138
|
-
perror("Map failed");
|
139
132
|
return false;
|
140
133
|
}
|
141
134
|
|
data/src/util/pm_strpbrk.c
CHANGED
@@ -1,10 +1,18 @@
|
|
1
1
|
#include "prism/util/pm_strpbrk.h"
|
2
2
|
|
3
3
|
/**
|
4
|
-
*
|
4
|
+
* Add an invalid multibyte character error to the parser.
|
5
|
+
*/
|
6
|
+
static inline void
|
7
|
+
pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
8
|
+
pm_diagnostic_list_append_format(&parser->error_list, start, end, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *start);
|
9
|
+
}
|
10
|
+
|
11
|
+
/**
|
12
|
+
* This is the default path.
|
5
13
|
*/
|
6
14
|
static inline const uint8_t *
|
7
|
-
|
15
|
+
pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
|
8
16
|
size_t index = 0;
|
9
17
|
|
10
18
|
while (index < maximum) {
|
@@ -12,22 +20,39 @@ pm_strpbrk_multi_byte(const pm_parser_t *parser, const uint8_t *source, const ui
|
|
12
20
|
return source + index;
|
13
21
|
}
|
14
22
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
23
|
+
if (source[index] < 0x80) {
|
24
|
+
index++;
|
25
|
+
} else {
|
26
|
+
size_t width = pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index));
|
19
27
|
|
20
|
-
|
28
|
+
if (width > 0) {
|
29
|
+
index += width;
|
30
|
+
} else if (!validate) {
|
31
|
+
index++;
|
32
|
+
} else {
|
33
|
+
// At this point we know we have an invalid multibyte character.
|
34
|
+
// We'll walk forward as far as we can until we find the next
|
35
|
+
// valid character so that we don't spam the user with a ton of
|
36
|
+
// the same kind of error.
|
37
|
+
const size_t start = index;
|
38
|
+
|
39
|
+
do {
|
40
|
+
index++;
|
41
|
+
} while (index < maximum && pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
|
42
|
+
|
43
|
+
pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
|
44
|
+
}
|
45
|
+
}
|
21
46
|
}
|
22
47
|
|
23
48
|
return NULL;
|
24
49
|
}
|
25
50
|
|
26
51
|
/**
|
27
|
-
* This is the
|
52
|
+
* This is the path when the encoding is ASCII-8BIT.
|
28
53
|
*/
|
29
54
|
static inline const uint8_t *
|
30
|
-
|
55
|
+
pm_strpbrk_ascii_8bit(const uint8_t *source, const uint8_t *charset, size_t maximum) {
|
31
56
|
size_t index = 0;
|
32
57
|
|
33
58
|
while (index < maximum) {
|
@@ -41,6 +66,85 @@ pm_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t max
|
|
41
66
|
return NULL;
|
42
67
|
}
|
43
68
|
|
69
|
+
/**
|
70
|
+
* This is the slow path that does care about the encoding.
|
71
|
+
*/
|
72
|
+
static inline const uint8_t *
|
73
|
+
pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
|
74
|
+
size_t index = 0;
|
75
|
+
|
76
|
+
while (index < maximum) {
|
77
|
+
if (strchr((const char *) charset, source[index]) != NULL) {
|
78
|
+
return source + index;
|
79
|
+
}
|
80
|
+
|
81
|
+
if (source[index] < 0x80) {
|
82
|
+
index++;
|
83
|
+
} else {
|
84
|
+
size_t width = parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
|
85
|
+
|
86
|
+
if (width > 0) {
|
87
|
+
index += width;
|
88
|
+
} else if (!validate) {
|
89
|
+
index++;
|
90
|
+
} else {
|
91
|
+
// At this point we know we have an invalid multibyte character.
|
92
|
+
// We'll walk forward as far as we can until we find the next
|
93
|
+
// valid character so that we don't spam the user with a ton of
|
94
|
+
// the same kind of error.
|
95
|
+
const size_t start = index;
|
96
|
+
|
97
|
+
do {
|
98
|
+
index++;
|
99
|
+
} while (index < maximum && parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
|
100
|
+
|
101
|
+
pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
|
102
|
+
}
|
103
|
+
}
|
104
|
+
}
|
105
|
+
|
106
|
+
return NULL;
|
107
|
+
}
|
108
|
+
|
109
|
+
/**
|
110
|
+
* This is the fast path that does not care about the encoding because we know
|
111
|
+
* the encoding only supports single-byte characters.
|
112
|
+
*/
|
113
|
+
static inline const uint8_t *
|
114
|
+
pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
|
115
|
+
size_t index = 0;
|
116
|
+
|
117
|
+
while (index < maximum) {
|
118
|
+
if (strchr((const char *) charset, source[index]) != NULL) {
|
119
|
+
return source + index;
|
120
|
+
}
|
121
|
+
|
122
|
+
if (source[index] < 0x80 || !validate) {
|
123
|
+
index++;
|
124
|
+
} else {
|
125
|
+
size_t width = parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
|
126
|
+
|
127
|
+
if (width > 0) {
|
128
|
+
index += width;
|
129
|
+
} else {
|
130
|
+
// At this point we know we have an invalid multibyte character.
|
131
|
+
// We'll walk forward as far as we can until we find the next
|
132
|
+
// valid character so that we don't spam the user with a ton of
|
133
|
+
// the same kind of error.
|
134
|
+
const size_t start = index;
|
135
|
+
|
136
|
+
do {
|
137
|
+
index++;
|
138
|
+
} while (index < maximum && parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
|
139
|
+
|
140
|
+
pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
|
141
|
+
}
|
142
|
+
}
|
143
|
+
}
|
144
|
+
|
145
|
+
return NULL;
|
146
|
+
}
|
147
|
+
|
44
148
|
/**
|
45
149
|
* Here we have rolled our own version of strpbrk. The standard library strpbrk
|
46
150
|
* has undefined behavior when the source string is not null-terminated. We want
|
@@ -57,16 +161,20 @@ pm_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t max
|
|
57
161
|
*
|
58
162
|
* Finally, we want to support encodings wherein the charset could contain
|
59
163
|
* characters that are trailing bytes of multi-byte characters. For example, in
|
60
|
-
*
|
164
|
+
* Shift_JIS, the backslash character can be a trailing byte. In that case we
|
61
165
|
* need to take a slower path and iterate one multi-byte character at a time.
|
62
166
|
*/
|
63
167
|
const uint8_t *
|
64
|
-
pm_strpbrk(
|
168
|
+
pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate) {
|
65
169
|
if (length <= 0) {
|
66
170
|
return NULL;
|
67
|
-
} else if (parser->encoding_changed
|
68
|
-
return
|
171
|
+
} else if (!parser->encoding_changed) {
|
172
|
+
return pm_strpbrk_utf8(parser, source, charset, (size_t) length, validate);
|
173
|
+
} else if (parser->encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
|
174
|
+
return pm_strpbrk_ascii_8bit(source, charset, (size_t) length);
|
175
|
+
} else if (parser->encoding->multibyte) {
|
176
|
+
return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length, validate);
|
69
177
|
} else {
|
70
|
-
return pm_strpbrk_single_byte(source, charset, (size_t) length);
|
178
|
+
return pm_strpbrk_single_byte(parser, source, charset, (size_t) length, validate);
|
71
179
|
}
|
72
180
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: prism
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.23.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shopify
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-02-
|
11
|
+
date: 2024-02-14 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email:
|
@@ -40,6 +40,7 @@ files:
|
|
40
40
|
- docs/releasing.md
|
41
41
|
- docs/ripper.md
|
42
42
|
- docs/ruby_api.md
|
43
|
+
- docs/ruby_parser_translation.md
|
43
44
|
- docs/serialization.md
|
44
45
|
- docs/testing.md
|
45
46
|
- ext/prism/api_node.c
|
@@ -88,13 +89,14 @@ files:
|
|
88
89
|
- lib/prism/parse_result/comments.rb
|
89
90
|
- lib/prism/parse_result/newlines.rb
|
90
91
|
- lib/prism/pattern.rb
|
91
|
-
- lib/prism/ripper_compat.rb
|
92
92
|
- lib/prism/serialize.rb
|
93
93
|
- lib/prism/translation.rb
|
94
94
|
- lib/prism/translation/parser.rb
|
95
95
|
- lib/prism/translation/parser/compiler.rb
|
96
96
|
- lib/prism/translation/parser/lexer.rb
|
97
97
|
- lib/prism/translation/parser/rubocop.rb
|
98
|
+
- lib/prism/translation/ripper.rb
|
99
|
+
- lib/prism/translation/ruby_parser.rb
|
98
100
|
- lib/prism/visitor.rb
|
99
101
|
- prism.gemspec
|
100
102
|
- rbi/prism.rbi
|
@@ -137,7 +139,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
137
139
|
requirements:
|
138
140
|
- - ">="
|
139
141
|
- !ruby/object:Gem::Version
|
140
|
-
version:
|
142
|
+
version: 2.7.0
|
141
143
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
142
144
|
requirements:
|
143
145
|
- - ">="
|
data/lib/prism/ripper_compat.rb
DELETED
@@ -1,207 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "ripper"
|
4
|
-
|
5
|
-
module Prism
|
6
|
-
# Note: This integration is not finished, and therefore still has many
|
7
|
-
# inconsistencies with Ripper. If you'd like to help out, pull requests would
|
8
|
-
# be greatly appreciated!
|
9
|
-
#
|
10
|
-
# This class is meant to provide a compatibility layer between prism and
|
11
|
-
# Ripper. It functions by parsing the entire tree first and then walking it
|
12
|
-
# and executing each of the Ripper callbacks as it goes.
|
13
|
-
#
|
14
|
-
# This class is going to necessarily be slower than the native Ripper API. It
|
15
|
-
# is meant as a stopgap until developers migrate to using prism. It is also
|
16
|
-
# meant as a test harness for the prism parser.
|
17
|
-
#
|
18
|
-
# To use this class, you treat `Prism::RipperCompat` effectively as you would
|
19
|
-
# treat the `Ripper` class.
|
20
|
-
class RipperCompat < Visitor
|
21
|
-
# This class mirrors the ::Ripper::SexpBuilder subclass of ::Ripper that
|
22
|
-
# returns the arrays of [type, *children].
|
23
|
-
class SexpBuilder < RipperCompat
|
24
|
-
private
|
25
|
-
|
26
|
-
Ripper::PARSER_EVENTS.each do |event|
|
27
|
-
define_method(:"on_#{event}") do |*args|
|
28
|
-
[event, *args]
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
Ripper::SCANNER_EVENTS.each do |event|
|
33
|
-
define_method(:"on_#{event}") do |value|
|
34
|
-
[:"@#{event}", value, [lineno, column]]
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
# This class mirrors the ::Ripper::SexpBuilderPP subclass of ::Ripper that
|
40
|
-
# returns the same values as ::Ripper::SexpBuilder except with a couple of
|
41
|
-
# niceties that flatten linked lists into arrays.
|
42
|
-
class SexpBuilderPP < SexpBuilder
|
43
|
-
private
|
44
|
-
|
45
|
-
def _dispatch_event_new # :nodoc:
|
46
|
-
[]
|
47
|
-
end
|
48
|
-
|
49
|
-
def _dispatch_event_push(list, item) # :nodoc:
|
50
|
-
list << item
|
51
|
-
list
|
52
|
-
end
|
53
|
-
|
54
|
-
Ripper::PARSER_EVENT_TABLE.each do |event, arity|
|
55
|
-
case event
|
56
|
-
when /_new\z/
|
57
|
-
alias_method :"on_#{event}", :_dispatch_event_new if arity == 0
|
58
|
-
when /_add\z/
|
59
|
-
alias_method :"on_#{event}", :_dispatch_event_push
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
# The source that is being parsed.
|
65
|
-
attr_reader :source
|
66
|
-
|
67
|
-
# The current line number of the parser.
|
68
|
-
attr_reader :lineno
|
69
|
-
|
70
|
-
# The current column number of the parser.
|
71
|
-
attr_reader :column
|
72
|
-
|
73
|
-
# Create a new RipperCompat object with the given source.
|
74
|
-
def initialize(source)
|
75
|
-
@source = source
|
76
|
-
@result = nil
|
77
|
-
@lineno = nil
|
78
|
-
@column = nil
|
79
|
-
end
|
80
|
-
|
81
|
-
############################################################################
|
82
|
-
# Public interface
|
83
|
-
############################################################################
|
84
|
-
|
85
|
-
# True if the parser encountered an error during parsing.
|
86
|
-
def error?
|
87
|
-
result.failure?
|
88
|
-
end
|
89
|
-
|
90
|
-
# Parse the source and return the result.
|
91
|
-
def parse
|
92
|
-
result.magic_comments.each do |magic_comment|
|
93
|
-
on_magic_comment(magic_comment.key, magic_comment.value)
|
94
|
-
end
|
95
|
-
|
96
|
-
if error?
|
97
|
-
result.errors.each do |error|
|
98
|
-
on_parse_error(error.message)
|
99
|
-
end
|
100
|
-
else
|
101
|
-
result.value.accept(self)
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
############################################################################
|
106
|
-
# Visitor methods
|
107
|
-
############################################################################
|
108
|
-
|
109
|
-
# Visit a CallNode node.
|
110
|
-
def visit_call_node(node)
|
111
|
-
if !node.message.match?(/^[[:alpha:]_]/) && node.opening_loc.nil? && node.arguments&.arguments&.length == 1
|
112
|
-
left = visit(node.receiver)
|
113
|
-
right = visit(node.arguments.arguments.first)
|
114
|
-
|
115
|
-
bounds(node.location)
|
116
|
-
on_binary(left, node.name, right)
|
117
|
-
else
|
118
|
-
raise NotImplementedError
|
119
|
-
end
|
120
|
-
end
|
121
|
-
|
122
|
-
# Visit a FloatNode node.
|
123
|
-
def visit_float_node(node)
|
124
|
-
bounds(node.location)
|
125
|
-
on_float(node.slice)
|
126
|
-
end
|
127
|
-
|
128
|
-
# Visit a ImaginaryNode node.
|
129
|
-
def visit_imaginary_node(node)
|
130
|
-
bounds(node.location)
|
131
|
-
on_imaginary(node.slice)
|
132
|
-
end
|
133
|
-
|
134
|
-
# Visit an IntegerNode node.
|
135
|
-
def visit_integer_node(node)
|
136
|
-
bounds(node.location)
|
137
|
-
on_int(node.slice)
|
138
|
-
end
|
139
|
-
|
140
|
-
# Visit a RationalNode node.
|
141
|
-
def visit_rational_node(node)
|
142
|
-
bounds(node.location)
|
143
|
-
on_rational(node.slice)
|
144
|
-
end
|
145
|
-
|
146
|
-
# Visit a StatementsNode node.
|
147
|
-
def visit_statements_node(node)
|
148
|
-
bounds(node.location)
|
149
|
-
node.body.inject(on_stmts_new) do |stmts, stmt|
|
150
|
-
on_stmts_add(stmts, visit(stmt))
|
151
|
-
end
|
152
|
-
end
|
153
|
-
|
154
|
-
# Visit a ProgramNode node.
|
155
|
-
def visit_program_node(node)
|
156
|
-
statements = visit(node.statements)
|
157
|
-
bounds(node.location)
|
158
|
-
on_program(statements)
|
159
|
-
end
|
160
|
-
|
161
|
-
############################################################################
|
162
|
-
# Entrypoints for subclasses
|
163
|
-
############################################################################
|
164
|
-
|
165
|
-
# This is a convenience method that runs the SexpBuilder subclass parser.
|
166
|
-
def self.sexp_raw(source)
|
167
|
-
SexpBuilder.new(source).parse
|
168
|
-
end
|
169
|
-
|
170
|
-
# This is a convenience method that runs the SexpBuilderPP subclass parser.
|
171
|
-
def self.sexp(source)
|
172
|
-
SexpBuilderPP.new(source).parse
|
173
|
-
end
|
174
|
-
|
175
|
-
private
|
176
|
-
|
177
|
-
# This method is responsible for updating lineno and column information
|
178
|
-
# to reflect the current node.
|
179
|
-
#
|
180
|
-
# This method could be drastically improved with some caching on the start
|
181
|
-
# of every line, but for now it's good enough.
|
182
|
-
def bounds(location)
|
183
|
-
@lineno = location.start_line
|
184
|
-
@column = location.start_column
|
185
|
-
end
|
186
|
-
|
187
|
-
# Lazily initialize the parse result.
|
188
|
-
def result
|
189
|
-
@result ||= Prism.parse(source)
|
190
|
-
end
|
191
|
-
|
192
|
-
def _dispatch0; end # :nodoc:
|
193
|
-
def _dispatch1(_); end # :nodoc:
|
194
|
-
def _dispatch2(_, _); end # :nodoc:
|
195
|
-
def _dispatch3(_, _, _); end # :nodoc:
|
196
|
-
def _dispatch4(_, _, _, _); end # :nodoc:
|
197
|
-
def _dispatch5(_, _, _, _, _); end # :nodoc:
|
198
|
-
def _dispatch7(_, _, _, _, _, _, _); end # :nodoc:
|
199
|
-
|
200
|
-
alias_method :on_parse_error, :_dispatch1
|
201
|
-
alias_method :on_magic_comment, :_dispatch2
|
202
|
-
|
203
|
-
(Ripper::SCANNER_EVENT_TABLE.merge(Ripper::PARSER_EVENT_TABLE)).each do |event, arity|
|
204
|
-
alias_method :"on_#{event}", :"_dispatch#{arity}"
|
205
|
-
end
|
206
|
-
end
|
207
|
-
end
|