prism 0.17.0 → 0.18.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +37 -1
- data/Makefile +5 -5
- data/README.md +2 -2
- data/config.yml +26 -13
- data/docs/build_system.md +6 -6
- data/docs/building.md +1 -1
- data/docs/configuration.md +1 -0
- data/docs/encoding.md +68 -32
- data/docs/heredocs.md +1 -1
- data/docs/javascript.md +29 -1
- data/docs/releasing.md +4 -1
- data/docs/ruby_api.md +14 -0
- data/ext/prism/api_node.c +74 -45
- data/ext/prism/extconf.rb +91 -127
- data/ext/prism/extension.c +4 -1
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +148 -133
- data/include/prism/diagnostic.h +27 -1
- data/include/prism/enc/pm_encoding.h +42 -1
- data/include/prism/parser.h +6 -0
- data/include/prism/version.h +2 -2
- data/lib/prism/compiler.rb +3 -3
- data/lib/prism/debug.rb +4 -0
- data/lib/prism/desugar_compiler.rb +1 -0
- data/lib/prism/dispatcher.rb +14 -14
- data/lib/prism/dot_visitor.rb +4334 -0
- data/lib/prism/dsl.rb +11 -11
- data/lib/prism/ffi.rb +3 -3
- data/lib/prism/mutation_compiler.rb +6 -6
- data/lib/prism/node.rb +182 -113
- data/lib/prism/node_ext.rb +61 -3
- data/lib/prism/parse_result.rb +46 -12
- data/lib/prism/serialize.rb +124 -130
- data/lib/prism/visitor.rb +3 -3
- data/lib/prism.rb +1 -0
- data/prism.gemspec +5 -1
- data/rbi/prism.rbi +5565 -5540
- data/rbi/prism_static.rbi +138 -142
- data/sig/prism.rbs +47 -32
- data/src/diagnostic.c +61 -3
- data/src/enc/pm_big5.c +63 -0
- data/src/enc/pm_cp51932.c +57 -0
- data/src/enc/pm_euc_jp.c +10 -0
- data/src/enc/pm_gbk.c +5 -2
- data/src/enc/pm_tables.c +1478 -148
- data/src/node.c +33 -21
- data/src/prettyprint.c +1027 -925
- data/src/prism.c +925 -374
- data/src/regexp.c +12 -12
- data/src/serialize.c +36 -9
- metadata +6 -2
data/src/enc/pm_big5.c
CHANGED
@@ -15,6 +15,22 @@ pm_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
|
|
15
15
|
return 0;
|
16
16
|
}
|
17
17
|
|
18
|
+
static size_t
|
19
|
+
pm_encoding_big5_star_char_width(const uint8_t *b, ptrdiff_t n) {
|
20
|
+
// These are the single byte characters.
|
21
|
+
if (*b < 0x80) {
|
22
|
+
return 1;
|
23
|
+
}
|
24
|
+
|
25
|
+
// These are the double byte characters.
|
26
|
+
if ((n > 1) && (b[0] >= 0x87 && b[0] <= 0xFE) &&
|
27
|
+
((b[1] >= 0x40 && b[1] <= 0x7E) || (b[1] >= 0xA1 && b[1] <= 0xFE))) {
|
28
|
+
return 2;
|
29
|
+
}
|
30
|
+
|
31
|
+
return 0;
|
32
|
+
}
|
33
|
+
|
18
34
|
static size_t
|
19
35
|
pm_encoding_big5_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
20
36
|
if (pm_encoding_big5_char_width(b, n) == 1) {
|
@@ -24,6 +40,15 @@ pm_encoding_big5_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
|
24
40
|
}
|
25
41
|
}
|
26
42
|
|
43
|
+
static size_t
|
44
|
+
pm_encoding_big5_star_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
45
|
+
if (pm_encoding_big5_star_char_width(b, n) == 1) {
|
46
|
+
return pm_encoding_ascii_alpha_char(b, n);
|
47
|
+
} else {
|
48
|
+
return 0;
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
27
52
|
static size_t
|
28
53
|
pm_encoding_big5_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
29
54
|
if (pm_encoding_big5_char_width(b, n) == 1) {
|
@@ -33,6 +58,15 @@ pm_encoding_big5_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
|
33
58
|
}
|
34
59
|
}
|
35
60
|
|
61
|
+
static size_t
|
62
|
+
pm_encoding_big5_star_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
63
|
+
if (pm_encoding_big5_star_char_width(b, n) == 1) {
|
64
|
+
return pm_encoding_ascii_alnum_char(b, n);
|
65
|
+
} else {
|
66
|
+
return 0;
|
67
|
+
}
|
68
|
+
}
|
69
|
+
|
36
70
|
static bool
|
37
71
|
pm_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
38
72
|
if (pm_encoding_big5_char_width(b, n) == 1) {
|
@@ -42,6 +76,15 @@ pm_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
|
42
76
|
}
|
43
77
|
}
|
44
78
|
|
79
|
+
static bool
|
80
|
+
pm_encoding_big5_star_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
81
|
+
if (pm_encoding_big5_star_char_width(b, n) == 1) {
|
82
|
+
return pm_encoding_ascii_isupper_char(b, n);
|
83
|
+
} else {
|
84
|
+
return false;
|
85
|
+
}
|
86
|
+
}
|
87
|
+
|
45
88
|
/** Big5 encoding */
|
46
89
|
pm_encoding_t pm_encoding_big5 = {
|
47
90
|
.name = "big5",
|
@@ -51,3 +94,23 @@ pm_encoding_t pm_encoding_big5 = {
|
|
51
94
|
.isupper_char = pm_encoding_big5_isupper_char,
|
52
95
|
.multibyte = true
|
53
96
|
};
|
97
|
+
|
98
|
+
/** Big5-HKSCS encoding */
|
99
|
+
pm_encoding_t pm_encoding_big5_hkscs = {
|
100
|
+
.name = "big5-hkscs",
|
101
|
+
.char_width = pm_encoding_big5_star_char_width,
|
102
|
+
.alnum_char = pm_encoding_big5_star_alnum_char,
|
103
|
+
.alpha_char = pm_encoding_big5_star_alpha_char,
|
104
|
+
.isupper_char = pm_encoding_big5_star_isupper_char,
|
105
|
+
.multibyte = true
|
106
|
+
};
|
107
|
+
|
108
|
+
/** Big5-UAO encoding */
|
109
|
+
pm_encoding_t pm_encoding_big5_uao = {
|
110
|
+
.name = "big5-uao",
|
111
|
+
.char_width = pm_encoding_big5_star_char_width,
|
112
|
+
.alnum_char = pm_encoding_big5_star_alnum_char,
|
113
|
+
.alpha_char = pm_encoding_big5_star_alpha_char,
|
114
|
+
.isupper_char = pm_encoding_big5_star_isupper_char,
|
115
|
+
.multibyte = true
|
116
|
+
};
|
@@ -0,0 +1,57 @@
|
|
1
|
+
#include "prism/enc/pm_encoding.h"
|
2
|
+
|
3
|
+
static size_t
|
4
|
+
pm_encoding_cp51932_char_width(const uint8_t *b, ptrdiff_t n) {
|
5
|
+
// These are the single byte characters.
|
6
|
+
if (*b < 0x80) {
|
7
|
+
return 1;
|
8
|
+
}
|
9
|
+
|
10
|
+
// These are the double byte characters.
|
11
|
+
if (
|
12
|
+
(n > 1) &&
|
13
|
+
((b[0] >= 0xa1 && b[0] <= 0xfe) || (b[0] == 0x8e)) &&
|
14
|
+
(b[1] >= 0xa1 && b[1] <= 0xfe)
|
15
|
+
) {
|
16
|
+
return 2;
|
17
|
+
}
|
18
|
+
|
19
|
+
return 0;
|
20
|
+
}
|
21
|
+
|
22
|
+
static size_t
|
23
|
+
pm_encoding_cp51932_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
24
|
+
if (pm_encoding_cp51932_char_width(b, n) == 1) {
|
25
|
+
return pm_encoding_ascii_alpha_char(b, n);
|
26
|
+
} else {
|
27
|
+
return 0;
|
28
|
+
}
|
29
|
+
}
|
30
|
+
|
31
|
+
static size_t
|
32
|
+
pm_encoding_cp51932_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
33
|
+
if (pm_encoding_cp51932_char_width(b, n) == 1) {
|
34
|
+
return pm_encoding_ascii_alnum_char(b, n);
|
35
|
+
} else {
|
36
|
+
return 0;
|
37
|
+
}
|
38
|
+
}
|
39
|
+
|
40
|
+
static bool
|
41
|
+
pm_encoding_cp51932_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
42
|
+
if (pm_encoding_cp51932_char_width(b, n) == 1) {
|
43
|
+
return pm_encoding_ascii_isupper_char(b, n);
|
44
|
+
} else {
|
45
|
+
return 0;
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
49
|
+
/** cp51932 encoding */
|
50
|
+
pm_encoding_t pm_encoding_cp51932 = {
|
51
|
+
.name = "cp51932",
|
52
|
+
.char_width = pm_encoding_cp51932_char_width,
|
53
|
+
.alnum_char = pm_encoding_cp51932_alnum_char,
|
54
|
+
.alpha_char = pm_encoding_cp51932_alpha_char,
|
55
|
+
.isupper_char = pm_encoding_cp51932_isupper_char,
|
56
|
+
.multibyte = true
|
57
|
+
};
|
data/src/enc/pm_euc_jp.c
CHANGED
@@ -18,6 +18,16 @@ pm_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
|
|
18
18
|
return 2;
|
19
19
|
}
|
20
20
|
|
21
|
+
// These are the triple byte characters.
|
22
|
+
if (
|
23
|
+
(n > 2) &&
|
24
|
+
(b[0] == 0x8F) &&
|
25
|
+
(b[1] >= 0xA1 && b[2] <= 0xFE) &&
|
26
|
+
(b[2] >= 0xA1 && b[2] <= 0xFE)
|
27
|
+
) {
|
28
|
+
return 3;
|
29
|
+
}
|
30
|
+
|
21
31
|
return 0;
|
22
32
|
}
|
23
33
|
|
data/src/enc/pm_gbk.c
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
static size_t
|
4
4
|
pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
|
5
5
|
// These are the single byte characters.
|
6
|
-
if (*b
|
6
|
+
if (*b <= 0x80) {
|
7
7
|
return 1;
|
8
8
|
}
|
9
9
|
|
@@ -15,7 +15,10 @@ pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
|
|
15
15
|
((b[0] >= 0xB0 && b[0] <= 0xF7) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/2
|
16
16
|
((b[0] >= 0x81 && b[0] <= 0xA0) && (b[1] >= 0x40 && b[1] <= 0xFE) && (b[1] != 0x7F)) || // GBK/3
|
17
17
|
((b[0] >= 0xAA && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) || // GBK/4
|
18
|
-
((b[0] >= 0xA8 && b[0] <= 0xA9) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) // GBK/5
|
18
|
+
((b[0] >= 0xA8 && b[0] <= 0xA9) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) || // GBK/5
|
19
|
+
((b[0] >= 0xAA && b[0] <= 0xAF) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // user-defined 1
|
20
|
+
((b[0] >= 0xF8 && b[0] <= 0xFE) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // user-defined 2
|
21
|
+
((b[0] >= 0xA1 && b[0] <= 0xA7) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) // user-defined 3
|
19
22
|
)
|
20
23
|
) {
|
21
24
|
return 2;
|