prism 0.17.1 → 0.18.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +30 -1
- data/Makefile +5 -5
- data/README.md +2 -2
- data/config.yml +26 -13
- data/docs/build_system.md +6 -6
- data/docs/building.md +1 -1
- data/docs/configuration.md +1 -0
- data/docs/encoding.md +68 -32
- data/docs/heredocs.md +1 -1
- data/docs/javascript.md +29 -1
- data/docs/ruby_api.md +14 -0
- data/ext/prism/api_node.c +74 -45
- data/ext/prism/extconf.rb +91 -127
- data/ext/prism/extension.c +1 -1
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +148 -133
- data/include/prism/diagnostic.h +27 -1
- data/include/prism/enc/pm_encoding.h +42 -1
- data/include/prism/parser.h +6 -0
- data/include/prism/version.h +3 -3
- data/lib/prism/compiler.rb +3 -3
- data/lib/prism/debug.rb +4 -0
- data/lib/prism/desugar_compiler.rb +1 -0
- data/lib/prism/dispatcher.rb +14 -14
- data/lib/prism/dot_visitor.rb +4334 -0
- data/lib/prism/dsl.rb +11 -11
- data/lib/prism/ffi.rb +3 -3
- data/lib/prism/mutation_compiler.rb +6 -6
- data/lib/prism/node.rb +182 -113
- data/lib/prism/node_ext.rb +61 -3
- data/lib/prism/parse_result.rb +46 -12
- data/lib/prism/serialize.rb +125 -131
- data/lib/prism/visitor.rb +3 -3
- data/lib/prism.rb +1 -0
- data/prism.gemspec +5 -1
- data/rbi/prism.rbi +83 -54
- data/sig/prism.rbs +47 -32
- data/src/diagnostic.c +61 -3
- data/src/enc/pm_big5.c +63 -0
- data/src/enc/pm_cp51932.c +57 -0
- data/src/enc/pm_euc_jp.c +10 -0
- data/src/enc/pm_gbk.c +5 -2
- data/src/enc/pm_tables.c +1478 -148
- data/src/node.c +33 -21
- data/src/prettyprint.c +1027 -925
- data/src/prism.c +925 -374
- data/src/regexp.c +12 -12
- data/src/serialize.c +36 -9
- metadata +6 -2
@@ -0,0 +1,57 @@
|
|
1
|
+
#include "prism/enc/pm_encoding.h"
|
2
|
+
|
3
|
+
static size_t
|
4
|
+
pm_encoding_cp51932_char_width(const uint8_t *b, ptrdiff_t n) {
|
5
|
+
// These are the single byte characters.
|
6
|
+
if (*b < 0x80) {
|
7
|
+
return 1;
|
8
|
+
}
|
9
|
+
|
10
|
+
// These are the double byte characters.
|
11
|
+
if (
|
12
|
+
(n > 1) &&
|
13
|
+
((b[0] >= 0xa1 && b[0] <= 0xfe) || (b[0] == 0x8e)) &&
|
14
|
+
(b[1] >= 0xa1 && b[1] <= 0xfe)
|
15
|
+
) {
|
16
|
+
return 2;
|
17
|
+
}
|
18
|
+
|
19
|
+
return 0;
|
20
|
+
}
|
21
|
+
|
22
|
+
static size_t
|
23
|
+
pm_encoding_cp51932_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
24
|
+
if (pm_encoding_cp51932_char_width(b, n) == 1) {
|
25
|
+
return pm_encoding_ascii_alpha_char(b, n);
|
26
|
+
} else {
|
27
|
+
return 0;
|
28
|
+
}
|
29
|
+
}
|
30
|
+
|
31
|
+
static size_t
|
32
|
+
pm_encoding_cp51932_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
33
|
+
if (pm_encoding_cp51932_char_width(b, n) == 1) {
|
34
|
+
return pm_encoding_ascii_alnum_char(b, n);
|
35
|
+
} else {
|
36
|
+
return 0;
|
37
|
+
}
|
38
|
+
}
|
39
|
+
|
40
|
+
static bool
|
41
|
+
pm_encoding_cp51932_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
42
|
+
if (pm_encoding_cp51932_char_width(b, n) == 1) {
|
43
|
+
return pm_encoding_ascii_isupper_char(b, n);
|
44
|
+
} else {
|
45
|
+
return 0;
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
49
|
+
/** cp51932 encoding */
|
50
|
+
pm_encoding_t pm_encoding_cp51932 = {
|
51
|
+
.name = "cp51932",
|
52
|
+
.char_width = pm_encoding_cp51932_char_width,
|
53
|
+
.alnum_char = pm_encoding_cp51932_alnum_char,
|
54
|
+
.alpha_char = pm_encoding_cp51932_alpha_char,
|
55
|
+
.isupper_char = pm_encoding_cp51932_isupper_char,
|
56
|
+
.multibyte = true
|
57
|
+
};
|
data/src/enc/pm_euc_jp.c
CHANGED
@@ -18,6 +18,16 @@ pm_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
|
|
18
18
|
return 2;
|
19
19
|
}
|
20
20
|
|
21
|
+
// These are the triple byte characters.
|
22
|
+
if (
|
23
|
+
(n > 2) &&
|
24
|
+
(b[0] == 0x8F) &&
|
25
|
+
(b[1] >= 0xA1 && b[2] <= 0xFE) &&
|
26
|
+
(b[2] >= 0xA1 && b[2] <= 0xFE)
|
27
|
+
) {
|
28
|
+
return 3;
|
29
|
+
}
|
30
|
+
|
21
31
|
return 0;
|
22
32
|
}
|
23
33
|
|
data/src/enc/pm_gbk.c
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
static size_t
|
4
4
|
pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
|
5
5
|
// These are the single byte characters.
|
6
|
-
if (*b
|
6
|
+
if (*b <= 0x80) {
|
7
7
|
return 1;
|
8
8
|
}
|
9
9
|
|
@@ -15,7 +15,10 @@ pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
|
|
15
15
|
((b[0] >= 0xB0 && b[0] <= 0xF7) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/2
|
16
16
|
((b[0] >= 0x81 && b[0] <= 0xA0) && (b[1] >= 0x40 && b[1] <= 0xFE) && (b[1] != 0x7F)) || // GBK/3
|
17
17
|
((b[0] >= 0xAA && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) || // GBK/4
|
18
|
-
((b[0] >= 0xA8 && b[0] <= 0xA9) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) // GBK/5
|
18
|
+
((b[0] >= 0xA8 && b[0] <= 0xA9) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) || // GBK/5
|
19
|
+
((b[0] >= 0xAA && b[0] <= 0xAF) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // user-defined 1
|
20
|
+
((b[0] >= 0xF8 && b[0] <= 0xFE) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // user-defined 2
|
21
|
+
((b[0] >= 0xA1 && b[0] <= 0xA7) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) // user-defined 3
|
19
22
|
)
|
20
23
|
) {
|
21
24
|
return 2;
|