yarp 0.12.0 → 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +29 -8
- data/CONTRIBUTING.md +2 -2
- data/Makefile +5 -5
- data/README.md +11 -12
- data/config.yml +6 -2
- data/docs/build_system.md +21 -21
- data/docs/building.md +4 -4
- data/docs/configuration.md +25 -21
- data/docs/design.md +2 -2
- data/docs/encoding.md +17 -17
- data/docs/fuzzing.md +4 -4
- data/docs/heredocs.md +3 -3
- data/docs/mapping.md +94 -94
- data/docs/ripper.md +4 -4
- data/docs/ruby_api.md +11 -11
- data/docs/serialization.md +17 -16
- data/docs/testing.md +6 -6
- data/ext/prism/api_node.c +4725 -0
- data/ext/{yarp → prism}/api_pack.c +82 -82
- data/ext/{yarp → prism}/extconf.rb +13 -13
- data/ext/{yarp → prism}/extension.c +175 -168
- data/ext/prism/extension.h +18 -0
- data/include/prism/ast.h +1932 -0
- data/include/prism/defines.h +45 -0
- data/include/prism/diagnostic.h +231 -0
- data/include/{yarp/enc/yp_encoding.h → prism/enc/pm_encoding.h} +40 -40
- data/include/prism/node.h +41 -0
- data/include/prism/pack.h +141 -0
- data/include/{yarp → prism}/parser.h +143 -142
- data/include/prism/regexp.h +19 -0
- data/include/prism/unescape.h +48 -0
- data/include/prism/util/pm_buffer.h +51 -0
- data/include/{yarp/util/yp_char.h → prism/util/pm_char.h} +20 -20
- data/include/{yarp/util/yp_constant_pool.h → prism/util/pm_constant_pool.h} +26 -22
- data/include/{yarp/util/yp_list.h → prism/util/pm_list.h} +21 -21
- data/include/prism/util/pm_memchr.h +14 -0
- data/include/{yarp/util/yp_newline_list.h → prism/util/pm_newline_list.h} +11 -11
- data/include/prism/util/pm_state_stack.h +24 -0
- data/include/{yarp/util/yp_string.h → prism/util/pm_string.h} +20 -20
- data/include/prism/util/pm_string_list.h +25 -0
- data/include/{yarp/util/yp_strpbrk.h → prism/util/pm_strpbrk.h} +7 -7
- data/include/prism/version.h +4 -0
- data/include/prism.h +82 -0
- data/lib/prism/compiler.rb +465 -0
- data/lib/prism/debug.rb +157 -0
- data/lib/{yarp/desugar_visitor.rb → prism/desugar_compiler.rb} +4 -2
- data/lib/prism/dispatcher.rb +2051 -0
- data/lib/prism/dsl.rb +750 -0
- data/lib/{yarp → prism}/ffi.rb +66 -67
- data/lib/{yarp → prism}/lex_compat.rb +40 -43
- data/lib/{yarp/mutation_visitor.rb → prism/mutation_compiler.rb} +3 -3
- data/lib/{yarp → prism}/node.rb +2012 -2593
- data/lib/prism/node_ext.rb +55 -0
- data/lib/prism/node_inspector.rb +68 -0
- data/lib/{yarp → prism}/pack.rb +1 -1
- data/lib/{yarp → prism}/parse_result/comments.rb +1 -1
- data/lib/{yarp → prism}/parse_result/newlines.rb +1 -1
- data/lib/prism/parse_result.rb +266 -0
- data/lib/{yarp → prism}/pattern.rb +14 -14
- data/lib/{yarp → prism}/ripper_compat.rb +5 -5
- data/lib/{yarp → prism}/serialize.rb +12 -7
- data/lib/prism/visitor.rb +470 -0
- data/lib/prism.rb +64 -0
- data/lib/yarp.rb +2 -614
- data/src/diagnostic.c +213 -208
- data/src/enc/pm_big5.c +52 -0
- data/src/enc/pm_euc_jp.c +58 -0
- data/src/enc/{yp_gbk.c → pm_gbk.c} +16 -16
- data/src/enc/pm_shift_jis.c +56 -0
- data/src/enc/{yp_tables.c → pm_tables.c} +69 -69
- data/src/enc/{yp_unicode.c → pm_unicode.c} +40 -40
- data/src/enc/pm_windows_31j.c +56 -0
- data/src/node.c +1293 -1233
- data/src/pack.c +247 -247
- data/src/prettyprint.c +1479 -1479
- data/src/{yarp.c → prism.c} +5205 -5083
- data/src/regexp.c +132 -132
- data/src/serialize.c +1121 -1121
- data/src/token_type.c +169 -167
- data/src/unescape.c +106 -87
- data/src/util/pm_buffer.c +103 -0
- data/src/util/{yp_char.c → pm_char.c} +72 -72
- data/src/util/{yp_constant_pool.c → pm_constant_pool.c} +85 -64
- data/src/util/{yp_list.c → pm_list.c} +10 -10
- data/src/util/{yp_memchr.c → pm_memchr.c} +6 -4
- data/src/util/{yp_newline_list.c → pm_newline_list.c} +21 -21
- data/src/util/{yp_state_stack.c → pm_state_stack.c} +4 -4
- data/src/util/{yp_string.c → pm_string.c} +38 -38
- data/src/util/pm_string_list.c +29 -0
- data/src/util/{yp_strncasecmp.c → pm_strncasecmp.c} +1 -1
- data/src/util/{yp_strpbrk.c → pm_strpbrk.c} +8 -8
- data/yarp.gemspec +68 -59
- metadata +70 -61
- data/ext/yarp/api_node.c +0 -4728
- data/ext/yarp/extension.h +0 -18
- data/include/yarp/ast.h +0 -1929
- data/include/yarp/defines.h +0 -45
- data/include/yarp/diagnostic.h +0 -226
- data/include/yarp/node.h +0 -42
- data/include/yarp/pack.h +0 -141
- data/include/yarp/regexp.h +0 -19
- data/include/yarp/unescape.h +0 -44
- data/include/yarp/util/yp_buffer.h +0 -51
- data/include/yarp/util/yp_memchr.h +0 -14
- data/include/yarp/util/yp_state_stack.h +0 -24
- data/include/yarp/util/yp_string_list.h +0 -25
- data/include/yarp/version.h +0 -4
- data/include/yarp.h +0 -82
- data/src/enc/yp_big5.c +0 -52
- data/src/enc/yp_euc_jp.c +0 -58
- data/src/enc/yp_shift_jis.c +0 -56
- data/src/enc/yp_windows_31j.c +0 -56
- data/src/util/yp_buffer.c +0 -101
- data/src/util/yp_string_list.c +0 -29
data/src/enc/pm_euc_jp.c
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
#include "prism/enc/pm_encoding.h"
|
2
|
+
|
3
|
+
static size_t
|
4
|
+
pm_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
|
5
|
+
// These are the single byte characters.
|
6
|
+
if (*b < 0x80) {
|
7
|
+
return 1;
|
8
|
+
}
|
9
|
+
|
10
|
+
// These are the double byte characters.
|
11
|
+
if (
|
12
|
+
(n > 1) &&
|
13
|
+
(
|
14
|
+
((b[0] == 0x8E) && (b[1] >= 0xA1 && b[1] <= 0xFE)) ||
|
15
|
+
((b[0] >= 0xA1 && b[0] <= 0xFE) && (b[1] >= 0xA1 && b[1] <= 0xFE))
|
16
|
+
)
|
17
|
+
) {
|
18
|
+
return 2;
|
19
|
+
}
|
20
|
+
|
21
|
+
return 0;
|
22
|
+
}
|
23
|
+
|
24
|
+
static size_t
|
25
|
+
pm_encoding_euc_jp_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
26
|
+
if (pm_encoding_euc_jp_char_width(b, n) == 1) {
|
27
|
+
return pm_encoding_ascii_alpha_char(b, n);
|
28
|
+
} else {
|
29
|
+
return 0;
|
30
|
+
}
|
31
|
+
}
|
32
|
+
|
33
|
+
static size_t
|
34
|
+
pm_encoding_euc_jp_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
35
|
+
if (pm_encoding_euc_jp_char_width(b, n) == 1) {
|
36
|
+
return pm_encoding_ascii_alnum_char(b, n);
|
37
|
+
} else {
|
38
|
+
return 0;
|
39
|
+
}
|
40
|
+
}
|
41
|
+
|
42
|
+
static bool
|
43
|
+
pm_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
44
|
+
if (pm_encoding_euc_jp_char_width(b, n) == 1) {
|
45
|
+
return pm_encoding_ascii_isupper_char(b, n);
|
46
|
+
} else {
|
47
|
+
return 0;
|
48
|
+
}
|
49
|
+
}
|
50
|
+
|
51
|
+
pm_encoding_t pm_encoding_euc_jp = {
|
52
|
+
.name = "euc-jp",
|
53
|
+
.char_width = pm_encoding_euc_jp_char_width,
|
54
|
+
.alnum_char = pm_encoding_euc_jp_alnum_char,
|
55
|
+
.alpha_char = pm_encoding_euc_jp_alpha_char,
|
56
|
+
.isupper_char = pm_encoding_euc_jp_isupper_char,
|
57
|
+
.multibyte = true
|
58
|
+
};
|
@@ -1,7 +1,7 @@
|
|
1
|
-
#include "
|
1
|
+
#include "prism/enc/pm_encoding.h"
|
2
2
|
|
3
3
|
static size_t
|
4
|
-
|
4
|
+
pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
|
5
5
|
// These are the single byte characters.
|
6
6
|
if (*b < 0x80) {
|
7
7
|
return 1;
|
@@ -25,37 +25,37 @@ yp_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
|
|
25
25
|
}
|
26
26
|
|
27
27
|
static size_t
|
28
|
-
|
29
|
-
if (
|
30
|
-
return
|
28
|
+
pm_encoding_gbk_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
29
|
+
if (pm_encoding_gbk_char_width(b, n) == 1) {
|
30
|
+
return pm_encoding_ascii_alpha_char(b, n);
|
31
31
|
} else {
|
32
32
|
return 0;
|
33
33
|
}
|
34
34
|
}
|
35
35
|
|
36
36
|
static size_t
|
37
|
-
|
38
|
-
if (
|
39
|
-
return
|
37
|
+
pm_encoding_gbk_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
38
|
+
if (pm_encoding_gbk_char_width(b, n) == 1) {
|
39
|
+
return pm_encoding_ascii_alnum_char(b, n);
|
40
40
|
} else {
|
41
41
|
return 0;
|
42
42
|
}
|
43
43
|
}
|
44
44
|
|
45
45
|
static bool
|
46
|
-
|
47
|
-
if (
|
48
|
-
return
|
46
|
+
pm_encoding_gbk_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
47
|
+
if (pm_encoding_gbk_char_width(b, n) == 1) {
|
48
|
+
return pm_encoding_ascii_isupper_char(b, n);
|
49
49
|
} else {
|
50
50
|
return false;
|
51
51
|
}
|
52
52
|
}
|
53
53
|
|
54
|
-
|
54
|
+
pm_encoding_t pm_encoding_gbk = {
|
55
55
|
.name = "gbk",
|
56
|
-
.char_width =
|
57
|
-
.alnum_char =
|
58
|
-
.alpha_char =
|
59
|
-
.isupper_char =
|
56
|
+
.char_width = pm_encoding_gbk_char_width,
|
57
|
+
.alnum_char = pm_encoding_gbk_alnum_char,
|
58
|
+
.alpha_char = pm_encoding_gbk_alpha_char,
|
59
|
+
.isupper_char = pm_encoding_gbk_isupper_char,
|
60
60
|
.multibyte = true
|
61
61
|
};
|
@@ -0,0 +1,56 @@
|
|
1
|
+
#include "prism/enc/pm_encoding.h"
|
2
|
+
|
3
|
+
static size_t
|
4
|
+
pm_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) {
|
5
|
+
// These are the single byte characters.
|
6
|
+
if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
|
7
|
+
return 1;
|
8
|
+
}
|
9
|
+
|
10
|
+
// These are the double byte characters.
|
11
|
+
if (
|
12
|
+
(n > 1) &&
|
13
|
+
((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) &&
|
14
|
+
(b[1] >= 0x40 && b[1] <= 0xFC)
|
15
|
+
) {
|
16
|
+
return 2;
|
17
|
+
}
|
18
|
+
|
19
|
+
return 0;
|
20
|
+
}
|
21
|
+
|
22
|
+
static size_t
|
23
|
+
pm_encoding_shift_jis_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
24
|
+
if (pm_encoding_shift_jis_char_width(b, n) == 1) {
|
25
|
+
return pm_encoding_ascii_alpha_char(b, n);
|
26
|
+
} else {
|
27
|
+
return 0;
|
28
|
+
}
|
29
|
+
}
|
30
|
+
|
31
|
+
static size_t
|
32
|
+
pm_encoding_shift_jis_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
33
|
+
if (pm_encoding_shift_jis_char_width(b, n) == 1) {
|
34
|
+
return pm_encoding_ascii_alnum_char(b, n);
|
35
|
+
} else {
|
36
|
+
return 0;
|
37
|
+
}
|
38
|
+
}
|
39
|
+
|
40
|
+
static bool
|
41
|
+
pm_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
42
|
+
if (pm_encoding_shift_jis_char_width(b, n) == 1) {
|
43
|
+
return pm_encoding_ascii_isupper_char(b, n);
|
44
|
+
} else {
|
45
|
+
return 0;
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
49
|
+
pm_encoding_t pm_encoding_shift_jis = {
|
50
|
+
.name = "shift_jis",
|
51
|
+
.char_width = pm_encoding_shift_jis_char_width,
|
52
|
+
.alnum_char = pm_encoding_shift_jis_alnum_char,
|
53
|
+
.alpha_char = pm_encoding_shift_jis_alpha_char,
|
54
|
+
.isupper_char = pm_encoding_shift_jis_isupper_char,
|
55
|
+
.multibyte = true
|
56
|
+
};
|
@@ -1,8 +1,8 @@
|
|
1
|
-
#include "
|
1
|
+
#include "prism/enc/pm_encoding.h"
|
2
2
|
|
3
3
|
// Each element of the following table contains a bitfield that indicates a
|
4
4
|
// piece of information about the corresponding ASCII character.
|
5
|
-
static uint8_t
|
5
|
+
static uint8_t pm_encoding_ascii_table[256] = {
|
6
6
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
7
7
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
8
8
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -24,7 +24,7 @@ static uint8_t yp_encoding_ascii_table[256] = {
|
|
24
24
|
|
25
25
|
// Each element of the following table contains a bitfield that indicates a
|
26
26
|
// piece of information about the corresponding ISO-8859-1 character.
|
27
|
-
static uint8_t
|
27
|
+
static uint8_t pm_encoding_iso_8859_1_table[256] = {
|
28
28
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
29
29
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
30
30
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -46,7 +46,7 @@ static uint8_t yp_encoding_iso_8859_1_table[256] = {
|
|
46
46
|
|
47
47
|
// Each element of the following table contains a bitfield that indicates a
|
48
48
|
// piece of information about the corresponding ISO-8859-2 character.
|
49
|
-
static uint8_t
|
49
|
+
static uint8_t pm_encoding_iso_8859_2_table[256] = {
|
50
50
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
51
51
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
52
52
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -68,7 +68,7 @@ static uint8_t yp_encoding_iso_8859_2_table[256] = {
|
|
68
68
|
|
69
69
|
// Each element of the following table contains a bitfield that indicates a
|
70
70
|
// piece of information about the corresponding ISO-8859-3 character.
|
71
|
-
static uint8_t
|
71
|
+
static uint8_t pm_encoding_iso_8859_3_table[256] = {
|
72
72
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
73
73
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
74
74
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -90,7 +90,7 @@ static uint8_t yp_encoding_iso_8859_3_table[256] = {
|
|
90
90
|
|
91
91
|
// Each element of the following table contains a bitfield that indicates a
|
92
92
|
// piece of information about the corresponding ISO-8859-4 character.
|
93
|
-
static uint8_t
|
93
|
+
static uint8_t pm_encoding_iso_8859_4_table[256] = {
|
94
94
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
95
95
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
96
96
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -112,7 +112,7 @@ static uint8_t yp_encoding_iso_8859_4_table[256] = {
|
|
112
112
|
|
113
113
|
// Each element of the following table contains a bitfield that indicates a
|
114
114
|
// piece of information about the corresponding ISO-8859-5 character.
|
115
|
-
static uint8_t
|
115
|
+
static uint8_t pm_encoding_iso_8859_5_table[256] = {
|
116
116
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
117
117
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
118
118
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -134,7 +134,7 @@ static uint8_t yp_encoding_iso_8859_5_table[256] = {
|
|
134
134
|
|
135
135
|
// Each element of the following table contains a bitfield that indicates a
|
136
136
|
// piece of information about the corresponding ISO-8859-6 character.
|
137
|
-
static uint8_t
|
137
|
+
static uint8_t pm_encoding_iso_8859_6_table[256] = {
|
138
138
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
139
139
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
140
140
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -156,7 +156,7 @@ static uint8_t yp_encoding_iso_8859_6_table[256] = {
|
|
156
156
|
|
157
157
|
// Each element of the following table contains a bitfield that indicates a
|
158
158
|
// piece of information about the corresponding ISO-8859-7 character.
|
159
|
-
static uint8_t
|
159
|
+
static uint8_t pm_encoding_iso_8859_7_table[256] = {
|
160
160
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
161
161
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
162
162
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -178,7 +178,7 @@ static uint8_t yp_encoding_iso_8859_7_table[256] = {
|
|
178
178
|
|
179
179
|
// Each element of the following table contains a bitfield that indicates a
|
180
180
|
// piece of information about the corresponding ISO-8859-8 character.
|
181
|
-
static uint8_t
|
181
|
+
static uint8_t pm_encoding_iso_8859_8_table[256] = {
|
182
182
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
183
183
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
184
184
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -200,7 +200,7 @@ static uint8_t yp_encoding_iso_8859_8_table[256] = {
|
|
200
200
|
|
201
201
|
// Each element of the following table contains a bitfield that indicates a
|
202
202
|
// piece of information about the corresponding ISO-8859-9 character.
|
203
|
-
static uint8_t
|
203
|
+
static uint8_t pm_encoding_iso_8859_9_table[256] = {
|
204
204
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
205
205
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
206
206
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -222,7 +222,7 @@ static uint8_t yp_encoding_iso_8859_9_table[256] = {
|
|
222
222
|
|
223
223
|
// Each element of the following table contains a bitfield that indicates a
|
224
224
|
// piece of information about the corresponding ISO-8859-10 character.
|
225
|
-
static uint8_t
|
225
|
+
static uint8_t pm_encoding_iso_8859_10_table[256] = {
|
226
226
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
227
227
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
228
228
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -244,7 +244,7 @@ static uint8_t yp_encoding_iso_8859_10_table[256] = {
|
|
244
244
|
|
245
245
|
// Each element of the following table contains a bitfield that indicates a
|
246
246
|
// piece of information about the corresponding ISO-8859-11 character.
|
247
|
-
static uint8_t
|
247
|
+
static uint8_t pm_encoding_iso_8859_11_table[256] = {
|
248
248
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
249
249
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
250
250
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -266,7 +266,7 @@ static uint8_t yp_encoding_iso_8859_11_table[256] = {
|
|
266
266
|
|
267
267
|
// Each element of the following table contains a bitfield that indicates a
|
268
268
|
// piece of information about the corresponding ISO-8859-13 character.
|
269
|
-
static uint8_t
|
269
|
+
static uint8_t pm_encoding_iso_8859_13_table[256] = {
|
270
270
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
271
271
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
272
272
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -288,7 +288,7 @@ static uint8_t yp_encoding_iso_8859_13_table[256] = {
|
|
288
288
|
|
289
289
|
// Each element of the following table contains a bitfield that indicates a
|
290
290
|
// piece of information about the corresponding ISO-8859-14 character.
|
291
|
-
static uint8_t
|
291
|
+
static uint8_t pm_encoding_iso_8859_14_table[256] = {
|
292
292
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
293
293
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
294
294
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -310,7 +310,7 @@ static uint8_t yp_encoding_iso_8859_14_table[256] = {
|
|
310
310
|
|
311
311
|
// Each element of the following table contains a bitfield that indicates a
|
312
312
|
// piece of information about the corresponding ISO-8859-15 character.
|
313
|
-
static uint8_t
|
313
|
+
static uint8_t pm_encoding_iso_8859_15_table[256] = {
|
314
314
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
315
315
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
316
316
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -332,7 +332,7 @@ static uint8_t yp_encoding_iso_8859_15_table[256] = {
|
|
332
332
|
|
333
333
|
// Each element of the following table contains a bitfield that indicates a
|
334
334
|
// piece of information about the corresponding ISO-8859-16 character.
|
335
|
-
static uint8_t
|
335
|
+
static uint8_t pm_encoding_iso_8859_16_table[256] = {
|
336
336
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
337
337
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
338
338
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -354,7 +354,7 @@ static uint8_t yp_encoding_iso_8859_16_table[256] = {
|
|
354
354
|
|
355
355
|
// Each element of the following table contains a bitfield that indicates a
|
356
356
|
// piece of information about the corresponding KOI8-R character.
|
357
|
-
static uint8_t
|
357
|
+
static uint8_t pm_encoding_koi8_r_table[256] = {
|
358
358
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
359
359
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
360
360
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -376,7 +376,7 @@ static uint8_t yp_encoding_koi8_r_table[256] = {
|
|
376
376
|
|
377
377
|
// Each element of the following table contains a bitfield that indicates a
|
378
378
|
// piece of information about the corresponding windows-1251 character.
|
379
|
-
static uint8_t
|
379
|
+
static uint8_t pm_encoding_windows_1251_table[256] = {
|
380
380
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
381
381
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
382
382
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -398,7 +398,7 @@ static uint8_t yp_encoding_windows_1251_table[256] = {
|
|
398
398
|
|
399
399
|
// Each element of the following table contains a bitfield that indicates a
|
400
400
|
// piece of information about the corresponding windows-1252 character.
|
401
|
-
static uint8_t
|
401
|
+
static uint8_t pm_encoding_windows_1252_table[256] = {
|
402
402
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
403
403
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
404
404
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -419,89 +419,89 @@ static uint8_t yp_encoding_windows_1252_table[256] = {
|
|
419
419
|
};
|
420
420
|
|
421
421
|
static size_t
|
422
|
-
|
422
|
+
pm_encoding_ascii_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
423
423
|
return *b < 0x80 ? 1 : 0;
|
424
424
|
}
|
425
425
|
|
426
426
|
size_t
|
427
|
-
|
428
|
-
return (
|
427
|
+
pm_encoding_ascii_alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
428
|
+
return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT);
|
429
429
|
}
|
430
430
|
|
431
431
|
size_t
|
432
|
-
|
433
|
-
return (
|
432
|
+
pm_encoding_ascii_alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
433
|
+
return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
|
434
434
|
}
|
435
435
|
|
436
436
|
bool
|
437
|
-
|
438
|
-
return (
|
437
|
+
pm_encoding_ascii_isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
438
|
+
return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_UPPERCASE_BIT);
|
439
439
|
}
|
440
440
|
|
441
441
|
static size_t
|
442
|
-
|
442
|
+
pm_encoding_koi8_r_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
443
443
|
return ((*b >= 0x20 && *b <= 0x7E) || (*b >= 0x80)) ? 1 : 0;
|
444
444
|
}
|
445
445
|
|
446
446
|
static size_t
|
447
|
-
|
447
|
+
pm_encoding_single_char_width(PRISM_ATTRIBUTE_UNUSED const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
448
448
|
return 1;
|
449
449
|
}
|
450
450
|
|
451
|
-
|
451
|
+
pm_encoding_t pm_encoding_ascii = {
|
452
452
|
.name = "ascii",
|
453
|
-
.char_width =
|
454
|
-
.alnum_char =
|
455
|
-
.alpha_char =
|
456
|
-
.isupper_char =
|
453
|
+
.char_width = pm_encoding_ascii_char_width,
|
454
|
+
.alnum_char = pm_encoding_ascii_alnum_char,
|
455
|
+
.alpha_char = pm_encoding_ascii_alpha_char,
|
456
|
+
.isupper_char = pm_encoding_ascii_isupper_char,
|
457
457
|
.multibyte = false
|
458
458
|
};
|
459
459
|
|
460
|
-
|
460
|
+
pm_encoding_t pm_encoding_ascii_8bit = {
|
461
461
|
.name = "ascii-8bit",
|
462
|
-
.char_width =
|
463
|
-
.alnum_char =
|
464
|
-
.alpha_char =
|
465
|
-
.isupper_char =
|
462
|
+
.char_width = pm_encoding_single_char_width,
|
463
|
+
.alnum_char = pm_encoding_ascii_alnum_char,
|
464
|
+
.alpha_char = pm_encoding_ascii_alpha_char,
|
465
|
+
.isupper_char = pm_encoding_ascii_isupper_char,
|
466
466
|
.multibyte = false
|
467
467
|
};
|
468
468
|
|
469
|
-
#define
|
470
|
-
static size_t
|
471
|
-
return (
|
469
|
+
#define PRISM_ENCODING_TABLE(s, i, w) \
|
470
|
+
static size_t pm_encoding_ ##i ## _alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
|
471
|
+
return (pm_encoding_ ##i ## _table[*b] & PRISM_ENCODING_ALPHABETIC_BIT); \
|
472
472
|
} \
|
473
|
-
static size_t
|
474
|
-
return (
|
473
|
+
static size_t pm_encoding_ ##i ## _alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
|
474
|
+
return (pm_encoding_ ##i ## _table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
|
475
475
|
} \
|
476
|
-
static bool
|
477
|
-
return (
|
476
|
+
static bool pm_encoding_ ##i ## _isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
|
477
|
+
return (pm_encoding_ ##i ## _table[*b] & PRISM_ENCODING_UPPERCASE_BIT); \
|
478
478
|
} \
|
479
|
-
|
479
|
+
pm_encoding_t pm_encoding_ ##i = { \
|
480
480
|
.name = s, \
|
481
481
|
.char_width = w, \
|
482
|
-
.alnum_char =
|
483
|
-
.alpha_char =
|
484
|
-
.isupper_char =
|
482
|
+
.alnum_char = pm_encoding_ ##i ## _alnum_char, \
|
483
|
+
.alpha_char = pm_encoding_ ##i ## _alpha_char, \
|
484
|
+
.isupper_char = pm_encoding_ ##i ## _isupper_char, \
|
485
485
|
.multibyte = false, \
|
486
486
|
};
|
487
487
|
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
488
|
+
PRISM_ENCODING_TABLE("iso-8859-1", iso_8859_1, pm_encoding_single_char_width)
|
489
|
+
PRISM_ENCODING_TABLE("iso-8859-2", iso_8859_2, pm_encoding_single_char_width)
|
490
|
+
PRISM_ENCODING_TABLE("iso-8859-3", iso_8859_3, pm_encoding_single_char_width)
|
491
|
+
PRISM_ENCODING_TABLE("iso-8859-4", iso_8859_4, pm_encoding_single_char_width)
|
492
|
+
PRISM_ENCODING_TABLE("iso-8859-5", iso_8859_5, pm_encoding_single_char_width)
|
493
|
+
PRISM_ENCODING_TABLE("iso-8859-6", iso_8859_6, pm_encoding_single_char_width)
|
494
|
+
PRISM_ENCODING_TABLE("iso-8859-7", iso_8859_7, pm_encoding_single_char_width)
|
495
|
+
PRISM_ENCODING_TABLE("iso-8859-8", iso_8859_8, pm_encoding_single_char_width)
|
496
|
+
PRISM_ENCODING_TABLE("iso-8859-9", iso_8859_9, pm_encoding_single_char_width)
|
497
|
+
PRISM_ENCODING_TABLE("iso-8859-10", iso_8859_10, pm_encoding_single_char_width)
|
498
|
+
PRISM_ENCODING_TABLE("iso-8859-11", iso_8859_11, pm_encoding_single_char_width)
|
499
|
+
PRISM_ENCODING_TABLE("iso-8859-13", iso_8859_13, pm_encoding_single_char_width)
|
500
|
+
PRISM_ENCODING_TABLE("iso-8859-14", iso_8859_14, pm_encoding_single_char_width)
|
501
|
+
PRISM_ENCODING_TABLE("iso-8859-15", iso_8859_15, pm_encoding_single_char_width)
|
502
|
+
PRISM_ENCODING_TABLE("iso-8859-16", iso_8859_16, pm_encoding_single_char_width)
|
503
|
+
PRISM_ENCODING_TABLE("koi8-r", koi8_r, pm_encoding_koi8_r_char_width)
|
504
|
+
PRISM_ENCODING_TABLE("windows-1251", windows_1251, pm_encoding_single_char_width)
|
505
|
+
PRISM_ENCODING_TABLE("windows-1252", windows_1252, pm_encoding_single_char_width)
|
506
506
|
|
507
|
-
#undef
|
507
|
+
#undef PRISM_ENCODING_TABLE
|