prism 0.17.1 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +60 -1
- data/Makefile +5 -5
- data/README.md +4 -3
- data/config.yml +214 -68
- data/docs/build_system.md +6 -6
- data/docs/building.md +10 -3
- data/docs/configuration.md +11 -9
- data/docs/encoding.md +92 -88
- data/docs/heredocs.md +1 -1
- data/docs/javascript.md +29 -1
- data/docs/local_variable_depth.md +229 -0
- data/docs/ruby_api.md +16 -0
- data/docs/serialization.md +18 -13
- data/ext/prism/api_node.c +411 -240
- data/ext/prism/extconf.rb +97 -127
- data/ext/prism/extension.c +97 -33
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +377 -159
- data/include/prism/defines.h +17 -0
- data/include/prism/diagnostic.h +38 -6
- data/include/prism/{enc/pm_encoding.h → encoding.h} +126 -64
- data/include/prism/options.h +2 -2
- data/include/prism/parser.h +62 -36
- data/include/prism/regexp.h +2 -2
- data/include/prism/util/pm_buffer.h +9 -1
- data/include/prism/util/pm_memchr.h +2 -2
- data/include/prism/util/pm_strpbrk.h +3 -3
- data/include/prism/version.h +3 -3
- data/include/prism.h +13 -15
- data/lib/prism/compiler.rb +15 -3
- data/lib/prism/debug.rb +13 -4
- data/lib/prism/desugar_compiler.rb +4 -3
- data/lib/prism/dispatcher.rb +70 -14
- data/lib/prism/dot_visitor.rb +4612 -0
- data/lib/prism/dsl.rb +77 -57
- data/lib/prism/ffi.rb +19 -6
- data/lib/prism/lex_compat.rb +19 -9
- data/lib/prism/mutation_compiler.rb +26 -6
- data/lib/prism/node.rb +1314 -522
- data/lib/prism/node_ext.rb +102 -19
- data/lib/prism/parse_result.rb +58 -27
- data/lib/prism/ripper_compat.rb +49 -34
- data/lib/prism/serialize.rb +251 -227
- data/lib/prism/visitor.rb +15 -3
- data/lib/prism.rb +21 -4
- data/prism.gemspec +7 -9
- data/rbi/prism.rbi +688 -284
- data/rbi/prism_static.rbi +3 -0
- data/sig/prism.rbs +426 -156
- data/sig/prism_static.rbs +1 -0
- data/src/diagnostic.c +280 -216
- data/src/encoding.c +5137 -0
- data/src/node.c +99 -21
- data/src/options.c +21 -2
- data/src/prettyprint.c +1743 -1241
- data/src/prism.c +1774 -831
- data/src/regexp.c +15 -15
- data/src/serialize.c +261 -164
- data/src/util/pm_buffer.c +10 -1
- data/src/util/pm_memchr.c +1 -1
- data/src/util/pm_strpbrk.c +4 -4
- metadata +8 -10
- data/src/enc/pm_big5.c +0 -53
- data/src/enc/pm_euc_jp.c +0 -59
- data/src/enc/pm_gbk.c +0 -62
- data/src/enc/pm_shift_jis.c +0 -57
- data/src/enc/pm_tables.c +0 -743
- data/src/enc/pm_unicode.c +0 -2369
- data/src/enc/pm_windows_31j.c +0 -57
data/src/util/pm_buffer.c
CHANGED
@@ -138,7 +138,7 @@ pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value) {
|
|
138
138
|
* Append a 32-bit unsigned integer to the buffer as a variable-length integer.
|
139
139
|
*/
|
140
140
|
void
|
141
|
-
|
141
|
+
pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value) {
|
142
142
|
if (value < 128) {
|
143
143
|
pm_buffer_append_byte(buffer, (uint8_t) value);
|
144
144
|
} else {
|
@@ -151,6 +151,15 @@ pm_buffer_append_varint(pm_buffer_t *buffer, uint32_t value) {
|
|
151
151
|
}
|
152
152
|
}
|
153
153
|
|
154
|
+
/**
|
155
|
+
* Append a 32-bit signed integer to the buffer as a variable-length integer.
|
156
|
+
*/
|
157
|
+
void
|
158
|
+
pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value) {
|
159
|
+
uint32_t unsigned_int = ((uint32_t)(value) << 1) ^ ((uint32_t)(value >> 31));
|
160
|
+
pm_buffer_append_varuint(buffer, unsigned_int);
|
161
|
+
}
|
162
|
+
|
154
163
|
/**
|
155
164
|
* Concatenate one buffer onto another.
|
156
165
|
*/
|
data/src/util/pm_memchr.c
CHANGED
@@ -8,7 +8,7 @@
|
|
8
8
|
* of a multibyte character.
|
9
9
|
*/
|
10
10
|
void *
|
11
|
-
pm_memchr(const void *memory, int character, size_t number, bool encoding_changed, pm_encoding_t *encoding) {
|
11
|
+
pm_memchr(const void *memory, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding) {
|
12
12
|
if (encoding_changed && encoding->multibyte && character >= PRISM_MEMCHR_TRAILING_BYTE_MINIMUM) {
|
13
13
|
const uint8_t *source = (const uint8_t *) memory;
|
14
14
|
size_t index = 0;
|
data/src/util/pm_strpbrk.c
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
* This is the slow path that does care about the encoding.
|
5
5
|
*/
|
6
6
|
static inline const uint8_t *
|
7
|
-
pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum) {
|
7
|
+
pm_strpbrk_multi_byte(const pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum) {
|
8
8
|
size_t index = 0;
|
9
9
|
|
10
10
|
while (index < maximum) {
|
@@ -12,7 +12,7 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
|
|
12
12
|
return source + index;
|
13
13
|
}
|
14
14
|
|
15
|
-
size_t width = parser->encoding
|
15
|
+
size_t width = parser->encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
|
16
16
|
if (width == 0) {
|
17
17
|
return NULL;
|
18
18
|
}
|
@@ -61,10 +61,10 @@ pm_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t max
|
|
61
61
|
* need to take a slower path and iterate one multi-byte character at a time.
|
62
62
|
*/
|
63
63
|
const uint8_t *
|
64
|
-
pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length) {
|
64
|
+
pm_strpbrk(const pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length) {
|
65
65
|
if (length <= 0) {
|
66
66
|
return NULL;
|
67
|
-
} else if (parser->encoding_changed && parser->encoding
|
67
|
+
} else if (parser->encoding_changed && parser->encoding->multibyte) {
|
68
68
|
return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length);
|
69
69
|
} else {
|
70
70
|
return pm_strpbrk_single_byte(source, charset, (size_t) length);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: prism
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.19.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shopify
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-12-14 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email:
|
@@ -33,6 +33,7 @@ files:
|
|
33
33
|
- docs/fuzzing.md
|
34
34
|
- docs/heredocs.md
|
35
35
|
- docs/javascript.md
|
36
|
+
- docs/local_variable_depth.md
|
36
37
|
- docs/mapping.md
|
37
38
|
- docs/releasing.md
|
38
39
|
- docs/ripper.md
|
@@ -48,7 +49,7 @@ files:
|
|
48
49
|
- include/prism/ast.h
|
49
50
|
- include/prism/defines.h
|
50
51
|
- include/prism/diagnostic.h
|
51
|
-
- include/prism/
|
52
|
+
- include/prism/encoding.h
|
52
53
|
- include/prism/node.h
|
53
54
|
- include/prism/options.h
|
54
55
|
- include/prism/pack.h
|
@@ -72,6 +73,7 @@ files:
|
|
72
73
|
- lib/prism/debug.rb
|
73
74
|
- lib/prism/desugar_compiler.rb
|
74
75
|
- lib/prism/dispatcher.rb
|
76
|
+
- lib/prism/dot_visitor.rb
|
75
77
|
- lib/prism/dsl.rb
|
76
78
|
- lib/prism/ffi.rb
|
77
79
|
- lib/prism/lex_compat.rb
|
@@ -93,13 +95,7 @@ files:
|
|
93
95
|
- sig/prism.rbs
|
94
96
|
- sig/prism_static.rbs
|
95
97
|
- src/diagnostic.c
|
96
|
-
- src/
|
97
|
-
- src/enc/pm_euc_jp.c
|
98
|
-
- src/enc/pm_gbk.c
|
99
|
-
- src/enc/pm_shift_jis.c
|
100
|
-
- src/enc/pm_tables.c
|
101
|
-
- src/enc/pm_unicode.c
|
102
|
-
- src/enc/pm_windows_31j.c
|
98
|
+
- src/encoding.c
|
103
99
|
- src/node.c
|
104
100
|
- src/options.c
|
105
101
|
- src/pack.c
|
@@ -124,6 +120,8 @@ licenses:
|
|
124
120
|
- MIT
|
125
121
|
metadata:
|
126
122
|
allowed_push_host: https://rubygems.org
|
123
|
+
source_code_uri: https://github.com/ruby/prism
|
124
|
+
changelog_uri: https://github.com/ruby/prism/blob/main/CHANGELOG.md
|
127
125
|
post_install_message:
|
128
126
|
rdoc_options: []
|
129
127
|
require_paths:
|
data/src/enc/pm_big5.c
DELETED
@@ -1,53 +0,0 @@
|
|
1
|
-
#include "prism/enc/pm_encoding.h"
|
2
|
-
|
3
|
-
static size_t
|
4
|
-
pm_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
|
5
|
-
// These are the single byte characters.
|
6
|
-
if (*b < 0x80) {
|
7
|
-
return 1;
|
8
|
-
}
|
9
|
-
|
10
|
-
// These are the double byte characters.
|
11
|
-
if ((n > 1) && (b[0] >= 0xA1 && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xFE)) {
|
12
|
-
return 2;
|
13
|
-
}
|
14
|
-
|
15
|
-
return 0;
|
16
|
-
}
|
17
|
-
|
18
|
-
static size_t
|
19
|
-
pm_encoding_big5_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
20
|
-
if (pm_encoding_big5_char_width(b, n) == 1) {
|
21
|
-
return pm_encoding_ascii_alpha_char(b, n);
|
22
|
-
} else {
|
23
|
-
return 0;
|
24
|
-
}
|
25
|
-
}
|
26
|
-
|
27
|
-
static size_t
|
28
|
-
pm_encoding_big5_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
29
|
-
if (pm_encoding_big5_char_width(b, n) == 1) {
|
30
|
-
return pm_encoding_ascii_alnum_char(b, n);
|
31
|
-
} else {
|
32
|
-
return 0;
|
33
|
-
}
|
34
|
-
}
|
35
|
-
|
36
|
-
static bool
|
37
|
-
pm_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
38
|
-
if (pm_encoding_big5_char_width(b, n) == 1) {
|
39
|
-
return pm_encoding_ascii_isupper_char(b, n);
|
40
|
-
} else {
|
41
|
-
return false;
|
42
|
-
}
|
43
|
-
}
|
44
|
-
|
45
|
-
/** Big5 encoding */
|
46
|
-
pm_encoding_t pm_encoding_big5 = {
|
47
|
-
.name = "big5",
|
48
|
-
.char_width = pm_encoding_big5_char_width,
|
49
|
-
.alnum_char = pm_encoding_big5_alnum_char,
|
50
|
-
.alpha_char = pm_encoding_big5_alpha_char,
|
51
|
-
.isupper_char = pm_encoding_big5_isupper_char,
|
52
|
-
.multibyte = true
|
53
|
-
};
|
data/src/enc/pm_euc_jp.c
DELETED
@@ -1,59 +0,0 @@
|
|
1
|
-
#include "prism/enc/pm_encoding.h"
|
2
|
-
|
3
|
-
static size_t
|
4
|
-
pm_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
|
5
|
-
// These are the single byte characters.
|
6
|
-
if (*b < 0x80) {
|
7
|
-
return 1;
|
8
|
-
}
|
9
|
-
|
10
|
-
// These are the double byte characters.
|
11
|
-
if (
|
12
|
-
(n > 1) &&
|
13
|
-
(
|
14
|
-
((b[0] == 0x8E) && (b[1] >= 0xA1 && b[1] <= 0xFE)) ||
|
15
|
-
((b[0] >= 0xA1 && b[0] <= 0xFE) && (b[1] >= 0xA1 && b[1] <= 0xFE))
|
16
|
-
)
|
17
|
-
) {
|
18
|
-
return 2;
|
19
|
-
}
|
20
|
-
|
21
|
-
return 0;
|
22
|
-
}
|
23
|
-
|
24
|
-
static size_t
|
25
|
-
pm_encoding_euc_jp_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
26
|
-
if (pm_encoding_euc_jp_char_width(b, n) == 1) {
|
27
|
-
return pm_encoding_ascii_alpha_char(b, n);
|
28
|
-
} else {
|
29
|
-
return 0;
|
30
|
-
}
|
31
|
-
}
|
32
|
-
|
33
|
-
static size_t
|
34
|
-
pm_encoding_euc_jp_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
35
|
-
if (pm_encoding_euc_jp_char_width(b, n) == 1) {
|
36
|
-
return pm_encoding_ascii_alnum_char(b, n);
|
37
|
-
} else {
|
38
|
-
return 0;
|
39
|
-
}
|
40
|
-
}
|
41
|
-
|
42
|
-
static bool
|
43
|
-
pm_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
44
|
-
if (pm_encoding_euc_jp_char_width(b, n) == 1) {
|
45
|
-
return pm_encoding_ascii_isupper_char(b, n);
|
46
|
-
} else {
|
47
|
-
return 0;
|
48
|
-
}
|
49
|
-
}
|
50
|
-
|
51
|
-
/** EUC-JP encoding */
|
52
|
-
pm_encoding_t pm_encoding_euc_jp = {
|
53
|
-
.name = "euc-jp",
|
54
|
-
.char_width = pm_encoding_euc_jp_char_width,
|
55
|
-
.alnum_char = pm_encoding_euc_jp_alnum_char,
|
56
|
-
.alpha_char = pm_encoding_euc_jp_alpha_char,
|
57
|
-
.isupper_char = pm_encoding_euc_jp_isupper_char,
|
58
|
-
.multibyte = true
|
59
|
-
};
|
data/src/enc/pm_gbk.c
DELETED
@@ -1,62 +0,0 @@
|
|
1
|
-
#include "prism/enc/pm_encoding.h"
|
2
|
-
|
3
|
-
static size_t
|
4
|
-
pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
|
5
|
-
// These are the single byte characters.
|
6
|
-
if (*b < 0x80) {
|
7
|
-
return 1;
|
8
|
-
}
|
9
|
-
|
10
|
-
// These are the double byte characters.
|
11
|
-
if (
|
12
|
-
(n > 1) &&
|
13
|
-
(
|
14
|
-
((b[0] >= 0xA1 && b[0] <= 0xA9) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/1
|
15
|
-
((b[0] >= 0xB0 && b[0] <= 0xF7) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/2
|
16
|
-
((b[0] >= 0x81 && b[0] <= 0xA0) && (b[1] >= 0x40 && b[1] <= 0xFE) && (b[1] != 0x7F)) || // GBK/3
|
17
|
-
((b[0] >= 0xAA && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) || // GBK/4
|
18
|
-
((b[0] >= 0xA8 && b[0] <= 0xA9) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) // GBK/5
|
19
|
-
)
|
20
|
-
) {
|
21
|
-
return 2;
|
22
|
-
}
|
23
|
-
|
24
|
-
return 0;
|
25
|
-
}
|
26
|
-
|
27
|
-
static size_t
|
28
|
-
pm_encoding_gbk_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
29
|
-
if (pm_encoding_gbk_char_width(b, n) == 1) {
|
30
|
-
return pm_encoding_ascii_alpha_char(b, n);
|
31
|
-
} else {
|
32
|
-
return 0;
|
33
|
-
}
|
34
|
-
}
|
35
|
-
|
36
|
-
static size_t
|
37
|
-
pm_encoding_gbk_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
38
|
-
if (pm_encoding_gbk_char_width(b, n) == 1) {
|
39
|
-
return pm_encoding_ascii_alnum_char(b, n);
|
40
|
-
} else {
|
41
|
-
return 0;
|
42
|
-
}
|
43
|
-
}
|
44
|
-
|
45
|
-
static bool
|
46
|
-
pm_encoding_gbk_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
47
|
-
if (pm_encoding_gbk_char_width(b, n) == 1) {
|
48
|
-
return pm_encoding_ascii_isupper_char(b, n);
|
49
|
-
} else {
|
50
|
-
return false;
|
51
|
-
}
|
52
|
-
}
|
53
|
-
|
54
|
-
/** GBK encoding */
|
55
|
-
pm_encoding_t pm_encoding_gbk = {
|
56
|
-
.name = "gbk",
|
57
|
-
.char_width = pm_encoding_gbk_char_width,
|
58
|
-
.alnum_char = pm_encoding_gbk_alnum_char,
|
59
|
-
.alpha_char = pm_encoding_gbk_alpha_char,
|
60
|
-
.isupper_char = pm_encoding_gbk_isupper_char,
|
61
|
-
.multibyte = true
|
62
|
-
};
|
data/src/enc/pm_shift_jis.c
DELETED
@@ -1,57 +0,0 @@
|
|
1
|
-
#include "prism/enc/pm_encoding.h"
|
2
|
-
|
3
|
-
static size_t
|
4
|
-
pm_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) {
|
5
|
-
// These are the single byte characters.
|
6
|
-
if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
|
7
|
-
return 1;
|
8
|
-
}
|
9
|
-
|
10
|
-
// These are the double byte characters.
|
11
|
-
if (
|
12
|
-
(n > 1) &&
|
13
|
-
((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) &&
|
14
|
-
(b[1] >= 0x40 && b[1] <= 0xFC)
|
15
|
-
) {
|
16
|
-
return 2;
|
17
|
-
}
|
18
|
-
|
19
|
-
return 0;
|
20
|
-
}
|
21
|
-
|
22
|
-
static size_t
|
23
|
-
pm_encoding_shift_jis_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
24
|
-
if (pm_encoding_shift_jis_char_width(b, n) == 1) {
|
25
|
-
return pm_encoding_ascii_alpha_char(b, n);
|
26
|
-
} else {
|
27
|
-
return 0;
|
28
|
-
}
|
29
|
-
}
|
30
|
-
|
31
|
-
static size_t
|
32
|
-
pm_encoding_shift_jis_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
33
|
-
if (pm_encoding_shift_jis_char_width(b, n) == 1) {
|
34
|
-
return pm_encoding_ascii_alnum_char(b, n);
|
35
|
-
} else {
|
36
|
-
return 0;
|
37
|
-
}
|
38
|
-
}
|
39
|
-
|
40
|
-
static bool
|
41
|
-
pm_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
42
|
-
if (pm_encoding_shift_jis_char_width(b, n) == 1) {
|
43
|
-
return pm_encoding_ascii_isupper_char(b, n);
|
44
|
-
} else {
|
45
|
-
return 0;
|
46
|
-
}
|
47
|
-
}
|
48
|
-
|
49
|
-
/** Shift_JIS encoding */
|
50
|
-
pm_encoding_t pm_encoding_shift_jis = {
|
51
|
-
.name = "shift_jis",
|
52
|
-
.char_width = pm_encoding_shift_jis_char_width,
|
53
|
-
.alnum_char = pm_encoding_shift_jis_alnum_char,
|
54
|
-
.alpha_char = pm_encoding_shift_jis_alpha_char,
|
55
|
-
.isupper_char = pm_encoding_shift_jis_isupper_char,
|
56
|
-
.multibyte = true
|
57
|
-
};
|