yarp 0.8.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +48 -1
- data/Makefile +5 -1
- data/README.md +4 -3
- data/config.yml +461 -150
- data/docs/configuration.md +1 -0
- data/docs/encoding.md +5 -5
- data/docs/ruby_api.md +2 -0
- data/docs/serialization.md +3 -3
- data/docs/testing.md +2 -2
- data/ext/yarp/api_node.c +810 -199
- data/ext/yarp/extension.c +94 -31
- data/ext/yarp/extension.h +2 -2
- data/include/yarp/ast.h +653 -150
- data/include/yarp/defines.h +2 -1
- data/include/yarp/diagnostic.h +3 -3
- data/include/yarp/enc/yp_encoding.h +10 -10
- data/include/yarp/node.h +10 -0
- data/include/yarp/parser.h +19 -19
- data/include/yarp/regexp.h +1 -1
- data/include/yarp/unescape.h +7 -5
- data/include/yarp/util/yp_buffer.h +3 -0
- data/include/yarp/util/yp_char.h +16 -16
- data/include/yarp/util/yp_constant_pool.h +2 -2
- data/include/yarp/util/yp_newline_list.h +7 -4
- data/include/yarp/util/yp_string.h +4 -4
- data/include/yarp/util/yp_string_list.h +0 -3
- data/include/yarp/util/yp_strpbrk.h +1 -1
- data/include/yarp/version.h +2 -2
- data/include/yarp.h +14 -3
- data/lib/yarp/desugar_visitor.rb +204 -0
- data/lib/yarp/ffi.rb +27 -1
- data/lib/yarp/lex_compat.rb +93 -25
- data/lib/yarp/mutation_visitor.rb +683 -0
- data/lib/yarp/node.rb +3121 -597
- data/lib/yarp/serialize.rb +198 -126
- data/lib/yarp.rb +53 -7
- data/src/diagnostic.c +1 -1
- data/src/enc/yp_big5.c +15 -42
- data/src/enc/yp_euc_jp.c +16 -43
- data/src/enc/yp_gbk.c +19 -46
- data/src/enc/yp_shift_jis.c +16 -43
- data/src/enc/yp_tables.c +36 -38
- data/src/enc/yp_unicode.c +20 -25
- data/src/enc/yp_windows_31j.c +16 -43
- data/src/node.c +1444 -836
- data/src/prettyprint.c +324 -103
- data/src/regexp.c +21 -21
- data/src/serialize.c +429 -276
- data/src/token_type.c +2 -2
- data/src/unescape.c +184 -136
- data/src/util/yp_buffer.c +7 -2
- data/src/util/yp_char.c +34 -34
- data/src/util/yp_constant_pool.c +4 -4
- data/src/util/yp_memchr.c +1 -1
- data/src/util/yp_newline_list.c +14 -3
- data/src/util/yp_string.c +22 -20
- data/src/util/yp_string_list.c +0 -6
- data/src/util/yp_strncasecmp.c +3 -6
- data/src/util/yp_strpbrk.c +8 -8
- data/src/yarp.c +1504 -615
- data/yarp.gemspec +3 -1
- metadata +4 -2
data/src/enc/yp_big5.c
CHANGED
@@ -1,69 +1,42 @@
|
|
1
1
|
#include "yarp/enc/yp_encoding.h"
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
static yp_big5_codepoint_t
|
6
|
-
yp_big5_codepoint(const char *c, ptrdiff_t n, size_t *width) {
|
7
|
-
const unsigned char *uc = (const unsigned char *) c;
|
8
|
-
|
3
|
+
static size_t
|
4
|
+
yp_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
|
9
5
|
// These are the single byte characters.
|
10
|
-
if (*
|
11
|
-
|
12
|
-
return *uc;
|
6
|
+
if (*b < 0x80) {
|
7
|
+
return 1;
|
13
8
|
}
|
14
9
|
|
15
10
|
// These are the double byte characters.
|
16
|
-
if ((n > 1) && (
|
17
|
-
|
18
|
-
return (yp_big5_codepoint_t) (uc[0] << 8 | uc[1]);
|
11
|
+
if ((n > 1) && (b[0] >= 0xA1 && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xFE)) {
|
12
|
+
return 2;
|
19
13
|
}
|
20
14
|
|
21
|
-
*width = 0;
|
22
15
|
return 0;
|
23
16
|
}
|
24
17
|
|
25
18
|
static size_t
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
return width;
|
31
|
-
}
|
32
|
-
|
33
|
-
static size_t
|
34
|
-
yp_encoding_big5_alpha_char(const char *c, ptrdiff_t n) {
|
35
|
-
size_t width;
|
36
|
-
yp_big5_codepoint_t codepoint = yp_big5_codepoint(c, n, &width);
|
37
|
-
|
38
|
-
if (width == 1) {
|
39
|
-
const char value = (const char) codepoint;
|
40
|
-
return yp_encoding_ascii_alpha_char(&value, n);
|
19
|
+
yp_encoding_big5_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
20
|
+
if (yp_encoding_big5_char_width(b, n) == 1) {
|
21
|
+
return yp_encoding_ascii_alpha_char(b, n);
|
41
22
|
} else {
|
42
23
|
return 0;
|
43
24
|
}
|
44
25
|
}
|
45
26
|
|
46
27
|
static size_t
|
47
|
-
yp_encoding_big5_alnum_char(const
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
if (width == 1) {
|
52
|
-
const char value = (const char) codepoint;
|
53
|
-
return yp_encoding_ascii_alnum_char(&value, n);
|
28
|
+
yp_encoding_big5_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
29
|
+
if (yp_encoding_big5_char_width(b, n) == 1) {
|
30
|
+
return yp_encoding_ascii_alnum_char(b, n);
|
54
31
|
} else {
|
55
32
|
return 0;
|
56
33
|
}
|
57
34
|
}
|
58
35
|
|
59
36
|
static bool
|
60
|
-
yp_encoding_big5_isupper_char(const
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
if (width == 1) {
|
65
|
-
const char value = (const char) codepoint;
|
66
|
-
return yp_encoding_ascii_isupper_char(&value, n);
|
37
|
+
yp_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
38
|
+
if (yp_encoding_big5_char_width(b, n) == 1) {
|
39
|
+
return yp_encoding_ascii_isupper_char(b, n);
|
67
40
|
} else {
|
68
41
|
return false;
|
69
42
|
}
|
data/src/enc/yp_euc_jp.c
CHANGED
@@ -1,75 +1,48 @@
|
|
1
1
|
#include "yarp/enc/yp_encoding.h"
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
static yp_euc_jp_codepoint_t
|
6
|
-
yp_euc_jp_codepoint(const char *c, ptrdiff_t n, size_t *width) {
|
7
|
-
const unsigned char *uc = (const unsigned char *) c;
|
8
|
-
|
3
|
+
static size_t
|
4
|
+
yp_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
|
9
5
|
// These are the single byte characters.
|
10
|
-
if (*
|
11
|
-
|
12
|
-
return *uc;
|
6
|
+
if (*b < 0x80) {
|
7
|
+
return 1;
|
13
8
|
}
|
14
9
|
|
15
10
|
// These are the double byte characters.
|
16
11
|
if (
|
17
12
|
(n > 1) &&
|
18
13
|
(
|
19
|
-
((
|
20
|
-
((
|
14
|
+
((b[0] == 0x8E) && (b[1] >= 0xA1 && b[1] <= 0xFE)) ||
|
15
|
+
((b[0] >= 0xA1 && b[0] <= 0xFE) && (b[1] >= 0xA1 && b[1] <= 0xFE))
|
21
16
|
)
|
22
17
|
) {
|
23
|
-
|
24
|
-
return (yp_euc_jp_codepoint_t) (uc[0] << 8 | uc[1]);
|
18
|
+
return 2;
|
25
19
|
}
|
26
20
|
|
27
|
-
*width = 0;
|
28
21
|
return 0;
|
29
22
|
}
|
30
23
|
|
31
24
|
static size_t
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
return width;
|
37
|
-
}
|
38
|
-
|
39
|
-
static size_t
|
40
|
-
yp_encoding_euc_jp_alpha_char(const char *c, ptrdiff_t n) {
|
41
|
-
size_t width;
|
42
|
-
yp_euc_jp_codepoint_t codepoint = yp_euc_jp_codepoint(c, n, &width);
|
43
|
-
|
44
|
-
if (width == 1) {
|
45
|
-
const char value = (const char) codepoint;
|
46
|
-
return yp_encoding_ascii_alpha_char(&value, n);
|
25
|
+
yp_encoding_euc_jp_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
26
|
+
if (yp_encoding_euc_jp_char_width(b, n) == 1) {
|
27
|
+
return yp_encoding_ascii_alpha_char(b, n);
|
47
28
|
} else {
|
48
29
|
return 0;
|
49
30
|
}
|
50
31
|
}
|
51
32
|
|
52
33
|
static size_t
|
53
|
-
yp_encoding_euc_jp_alnum_char(const
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
if (width == 1) {
|
58
|
-
const char value = (const char) codepoint;
|
59
|
-
return yp_encoding_ascii_alnum_char(&value, n);
|
34
|
+
yp_encoding_euc_jp_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
35
|
+
if (yp_encoding_euc_jp_char_width(b, n) == 1) {
|
36
|
+
return yp_encoding_ascii_alnum_char(b, n);
|
60
37
|
} else {
|
61
38
|
return 0;
|
62
39
|
}
|
63
40
|
}
|
64
41
|
|
65
42
|
static bool
|
66
|
-
yp_encoding_euc_jp_isupper_char(const
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
if (width == 1) {
|
71
|
-
const char value = (const char) codepoint;
|
72
|
-
return yp_encoding_ascii_isupper_char(&value, n);
|
43
|
+
yp_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
44
|
+
if (yp_encoding_euc_jp_char_width(b, n) == 1) {
|
45
|
+
return yp_encoding_ascii_isupper_char(b, n);
|
73
46
|
} else {
|
74
47
|
return 0;
|
75
48
|
}
|
data/src/enc/yp_gbk.c
CHANGED
@@ -1,78 +1,51 @@
|
|
1
1
|
#include "yarp/enc/yp_encoding.h"
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
static yp_gbk_codepoint_t
|
6
|
-
yp_gbk_codepoint(const char *c, ptrdiff_t n, size_t *width) {
|
7
|
-
const unsigned char *uc = (const unsigned char *) c;
|
8
|
-
|
3
|
+
static size_t
|
4
|
+
yp_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
|
9
5
|
// These are the single byte characters.
|
10
|
-
if (*
|
11
|
-
|
12
|
-
return *uc;
|
6
|
+
if (*b < 0x80) {
|
7
|
+
return 1;
|
13
8
|
}
|
14
9
|
|
15
10
|
// These are the double byte characters.
|
16
11
|
if (
|
17
12
|
(n > 1) &&
|
18
13
|
(
|
19
|
-
((
|
20
|
-
((
|
21
|
-
((
|
22
|
-
((
|
23
|
-
((
|
14
|
+
((b[0] >= 0xA1 && b[0] <= 0xA9) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/1
|
15
|
+
((b[0] >= 0xB0 && b[0] <= 0xF7) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/2
|
16
|
+
((b[0] >= 0x81 && b[0] <= 0xA0) && (b[1] >= 0x40 && b[1] <= 0xFE) && (b[1] != 0x7F)) || // GBK/3
|
17
|
+
((b[0] >= 0xAA && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) || // GBK/4
|
18
|
+
((b[0] >= 0xA8 && b[0] <= 0xA9) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) // GBK/5
|
24
19
|
)
|
25
20
|
) {
|
26
|
-
|
27
|
-
return (yp_gbk_codepoint_t) (uc[0] << 8 | uc[1]);
|
21
|
+
return 2;
|
28
22
|
}
|
29
23
|
|
30
|
-
*width = 0;
|
31
24
|
return 0;
|
32
25
|
}
|
33
26
|
|
34
27
|
static size_t
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
return width;
|
40
|
-
}
|
41
|
-
|
42
|
-
static size_t
|
43
|
-
yp_encoding_gbk_alpha_char(const char *c, ptrdiff_t n) {
|
44
|
-
size_t width;
|
45
|
-
yp_gbk_codepoint_t codepoint = yp_gbk_codepoint(c, n, &width);
|
46
|
-
|
47
|
-
if (width == 1) {
|
48
|
-
const char value = (const char) codepoint;
|
49
|
-
return yp_encoding_ascii_alpha_char(&value, n);
|
28
|
+
yp_encoding_gbk_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
29
|
+
if (yp_encoding_gbk_char_width(b, n) == 1) {
|
30
|
+
return yp_encoding_ascii_alpha_char(b, n);
|
50
31
|
} else {
|
51
32
|
return 0;
|
52
33
|
}
|
53
34
|
}
|
54
35
|
|
55
36
|
static size_t
|
56
|
-
yp_encoding_gbk_alnum_char(const
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
if (width == 1) {
|
61
|
-
const char value = (const char) codepoint;
|
62
|
-
return yp_encoding_ascii_alnum_char(&value, n);
|
37
|
+
yp_encoding_gbk_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
38
|
+
if (yp_encoding_gbk_char_width(b, n) == 1) {
|
39
|
+
return yp_encoding_ascii_alnum_char(b, n);
|
63
40
|
} else {
|
64
41
|
return 0;
|
65
42
|
}
|
66
43
|
}
|
67
44
|
|
68
45
|
static bool
|
69
|
-
yp_encoding_gbk_isupper_char(const
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
if (width == 1) {
|
74
|
-
const char value = (const char) codepoint;
|
75
|
-
return yp_encoding_ascii_isupper_char(&value, n);
|
46
|
+
yp_encoding_gbk_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
47
|
+
if (yp_encoding_gbk_char_width(b, n) == 1) {
|
48
|
+
return yp_encoding_ascii_isupper_char(b, n);
|
76
49
|
} else {
|
77
50
|
return false;
|
78
51
|
}
|
data/src/enc/yp_shift_jis.c
CHANGED
@@ -1,73 +1,46 @@
|
|
1
1
|
#include "yarp/enc/yp_encoding.h"
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
static yp_shift_jis_codepoint_t
|
6
|
-
yp_shift_jis_codepoint(const char *c, ptrdiff_t n, size_t *width) {
|
7
|
-
const unsigned char *uc = (const unsigned char *) c;
|
8
|
-
|
3
|
+
static size_t
|
4
|
+
yp_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) {
|
9
5
|
// These are the single byte characters.
|
10
|
-
if (*
|
11
|
-
|
12
|
-
return *uc;
|
6
|
+
if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
|
7
|
+
return 1;
|
13
8
|
}
|
14
9
|
|
15
10
|
// These are the double byte characters.
|
16
11
|
if (
|
17
12
|
(n > 1) &&
|
18
|
-
((
|
19
|
-
(
|
13
|
+
((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) &&
|
14
|
+
(b[1] >= 0x40 && b[1] <= 0xFC)
|
20
15
|
) {
|
21
|
-
|
22
|
-
return (yp_shift_jis_codepoint_t) (uc[0] << 8 | uc[1]);
|
16
|
+
return 2;
|
23
17
|
}
|
24
18
|
|
25
|
-
*width = 0;
|
26
19
|
return 0;
|
27
20
|
}
|
28
21
|
|
29
22
|
static size_t
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
return width;
|
35
|
-
}
|
36
|
-
|
37
|
-
static size_t
|
38
|
-
yp_encoding_shift_jis_alpha_char(const char *c, ptrdiff_t n) {
|
39
|
-
size_t width;
|
40
|
-
yp_shift_jis_codepoint_t codepoint = yp_shift_jis_codepoint(c, n, &width);
|
41
|
-
|
42
|
-
if (width == 1) {
|
43
|
-
const char value = (const char) codepoint;
|
44
|
-
return yp_encoding_ascii_alpha_char(&value, n);
|
23
|
+
yp_encoding_shift_jis_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
24
|
+
if (yp_encoding_shift_jis_char_width(b, n) == 1) {
|
25
|
+
return yp_encoding_ascii_alpha_char(b, n);
|
45
26
|
} else {
|
46
27
|
return 0;
|
47
28
|
}
|
48
29
|
}
|
49
30
|
|
50
31
|
static size_t
|
51
|
-
yp_encoding_shift_jis_alnum_char(const
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
if (width == 1) {
|
56
|
-
const char value = (const char) codepoint;
|
57
|
-
return yp_encoding_ascii_alnum_char(&value, n);
|
32
|
+
yp_encoding_shift_jis_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
33
|
+
if (yp_encoding_shift_jis_char_width(b, n) == 1) {
|
34
|
+
return yp_encoding_ascii_alnum_char(b, n);
|
58
35
|
} else {
|
59
36
|
return 0;
|
60
37
|
}
|
61
38
|
}
|
62
39
|
|
63
40
|
static bool
|
64
|
-
yp_encoding_shift_jis_isupper_char(const
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
if (width == 1) {
|
69
|
-
const char value = (const char) codepoint;
|
70
|
-
return yp_encoding_ascii_isupper_char(&value, n);
|
41
|
+
yp_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
42
|
+
if (yp_encoding_shift_jis_char_width(b, n) == 1) {
|
43
|
+
return yp_encoding_ascii_isupper_char(b, n);
|
71
44
|
} else {
|
72
45
|
return 0;
|
73
46
|
}
|
data/src/enc/yp_tables.c
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
// Each element of the following table contains a bitfield that indicates a
|
4
4
|
// piece of information about the corresponding ASCII character.
|
5
|
-
static
|
5
|
+
static uint8_t yp_encoding_ascii_table[256] = {
|
6
6
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
7
7
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
8
8
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -24,7 +24,7 @@ static unsigned char yp_encoding_ascii_table[256] = {
|
|
24
24
|
|
25
25
|
// Each element of the following table contains a bitfield that indicates a
|
26
26
|
// piece of information about the corresponding ISO-8859-1 character.
|
27
|
-
static
|
27
|
+
static uint8_t yp_encoding_iso_8859_1_table[256] = {
|
28
28
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
29
29
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
30
30
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -46,7 +46,7 @@ static unsigned char yp_encoding_iso_8859_1_table[256] = {
|
|
46
46
|
|
47
47
|
// Each element of the following table contains a bitfield that indicates a
|
48
48
|
// piece of information about the corresponding ISO-8859-2 character.
|
49
|
-
static
|
49
|
+
static uint8_t yp_encoding_iso_8859_2_table[256] = {
|
50
50
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
51
51
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
52
52
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -68,7 +68,7 @@ static unsigned char yp_encoding_iso_8859_2_table[256] = {
|
|
68
68
|
|
69
69
|
// Each element of the following table contains a bitfield that indicates a
|
70
70
|
// piece of information about the corresponding ISO-8859-3 character.
|
71
|
-
static
|
71
|
+
static uint8_t yp_encoding_iso_8859_3_table[256] = {
|
72
72
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
73
73
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
74
74
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -90,7 +90,7 @@ static unsigned char yp_encoding_iso_8859_3_table[256] = {
|
|
90
90
|
|
91
91
|
// Each element of the following table contains a bitfield that indicates a
|
92
92
|
// piece of information about the corresponding ISO-8859-4 character.
|
93
|
-
static
|
93
|
+
static uint8_t yp_encoding_iso_8859_4_table[256] = {
|
94
94
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
95
95
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
96
96
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -112,7 +112,7 @@ static unsigned char yp_encoding_iso_8859_4_table[256] = {
|
|
112
112
|
|
113
113
|
// Each element of the following table contains a bitfield that indicates a
|
114
114
|
// piece of information about the corresponding ISO-8859-5 character.
|
115
|
-
static
|
115
|
+
static uint8_t yp_encoding_iso_8859_5_table[256] = {
|
116
116
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
117
117
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
118
118
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -134,7 +134,7 @@ static unsigned char yp_encoding_iso_8859_5_table[256] = {
|
|
134
134
|
|
135
135
|
// Each element of the following table contains a bitfield that indicates a
|
136
136
|
// piece of information about the corresponding ISO-8859-6 character.
|
137
|
-
static
|
137
|
+
static uint8_t yp_encoding_iso_8859_6_table[256] = {
|
138
138
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
139
139
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
140
140
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -156,7 +156,7 @@ static unsigned char yp_encoding_iso_8859_6_table[256] = {
|
|
156
156
|
|
157
157
|
// Each element of the following table contains a bitfield that indicates a
|
158
158
|
// piece of information about the corresponding ISO-8859-7 character.
|
159
|
-
static
|
159
|
+
static uint8_t yp_encoding_iso_8859_7_table[256] = {
|
160
160
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
161
161
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
162
162
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -178,7 +178,7 @@ static unsigned char yp_encoding_iso_8859_7_table[256] = {
|
|
178
178
|
|
179
179
|
// Each element of the following table contains a bitfield that indicates a
|
180
180
|
// piece of information about the corresponding ISO-8859-8 character.
|
181
|
-
static
|
181
|
+
static uint8_t yp_encoding_iso_8859_8_table[256] = {
|
182
182
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
183
183
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
184
184
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -200,7 +200,7 @@ static unsigned char yp_encoding_iso_8859_8_table[256] = {
|
|
200
200
|
|
201
201
|
// Each element of the following table contains a bitfield that indicates a
|
202
202
|
// piece of information about the corresponding ISO-8859-9 character.
|
203
|
-
static
|
203
|
+
static uint8_t yp_encoding_iso_8859_9_table[256] = {
|
204
204
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
205
205
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
206
206
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -222,7 +222,7 @@ static unsigned char yp_encoding_iso_8859_9_table[256] = {
|
|
222
222
|
|
223
223
|
// Each element of the following table contains a bitfield that indicates a
|
224
224
|
// piece of information about the corresponding ISO-8859-10 character.
|
225
|
-
static
|
225
|
+
static uint8_t yp_encoding_iso_8859_10_table[256] = {
|
226
226
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
227
227
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
228
228
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -244,7 +244,7 @@ static unsigned char yp_encoding_iso_8859_10_table[256] = {
|
|
244
244
|
|
245
245
|
// Each element of the following table contains a bitfield that indicates a
|
246
246
|
// piece of information about the corresponding ISO-8859-11 character.
|
247
|
-
static
|
247
|
+
static uint8_t yp_encoding_iso_8859_11_table[256] = {
|
248
248
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
249
249
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
250
250
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -266,7 +266,7 @@ static unsigned char yp_encoding_iso_8859_11_table[256] = {
|
|
266
266
|
|
267
267
|
// Each element of the following table contains a bitfield that indicates a
|
268
268
|
// piece of information about the corresponding ISO-8859-13 character.
|
269
|
-
static
|
269
|
+
static uint8_t yp_encoding_iso_8859_13_table[256] = {
|
270
270
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
271
271
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
272
272
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -288,7 +288,7 @@ static unsigned char yp_encoding_iso_8859_13_table[256] = {
|
|
288
288
|
|
289
289
|
// Each element of the following table contains a bitfield that indicates a
|
290
290
|
// piece of information about the corresponding ISO-8859-14 character.
|
291
|
-
static
|
291
|
+
static uint8_t yp_encoding_iso_8859_14_table[256] = {
|
292
292
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
293
293
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
294
294
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -310,7 +310,7 @@ static unsigned char yp_encoding_iso_8859_14_table[256] = {
|
|
310
310
|
|
311
311
|
// Each element of the following table contains a bitfield that indicates a
|
312
312
|
// piece of information about the corresponding ISO-8859-15 character.
|
313
|
-
static
|
313
|
+
static uint8_t yp_encoding_iso_8859_15_table[256] = {
|
314
314
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
315
315
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
316
316
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -332,7 +332,7 @@ static unsigned char yp_encoding_iso_8859_15_table[256] = {
|
|
332
332
|
|
333
333
|
// Each element of the following table contains a bitfield that indicates a
|
334
334
|
// piece of information about the corresponding ISO-8859-16 character.
|
335
|
-
static
|
335
|
+
static uint8_t yp_encoding_iso_8859_16_table[256] = {
|
336
336
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
337
337
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
338
338
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -354,7 +354,7 @@ static unsigned char yp_encoding_iso_8859_16_table[256] = {
|
|
354
354
|
|
355
355
|
// Each element of the following table contains a bitfield that indicates a
|
356
356
|
// piece of information about the corresponding KOI8-R character.
|
357
|
-
static
|
357
|
+
static uint8_t yp_encoding_koi8_r_table[256] = {
|
358
358
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
359
359
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
360
360
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -376,7 +376,7 @@ static unsigned char yp_encoding_koi8_r_table[256] = {
|
|
376
376
|
|
377
377
|
// Each element of the following table contains a bitfield that indicates a
|
378
378
|
// piece of information about the corresponding windows-1251 character.
|
379
|
-
static
|
379
|
+
static uint8_t yp_encoding_windows_1251_table[256] = {
|
380
380
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
381
381
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
382
382
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -398,7 +398,7 @@ static unsigned char yp_encoding_windows_1251_table[256] = {
|
|
398
398
|
|
399
399
|
// Each element of the following table contains a bitfield that indicates a
|
400
400
|
// piece of information about the corresponding windows-1252 character.
|
401
|
-
static
|
401
|
+
static uint8_t yp_encoding_windows_1252_table[256] = {
|
402
402
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
403
403
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
404
404
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -419,34 +419,32 @@ static unsigned char yp_encoding_windows_1252_table[256] = {
|
|
419
419
|
};
|
420
420
|
|
421
421
|
static size_t
|
422
|
-
yp_encoding_ascii_char_width(const
|
423
|
-
|
424
|
-
return v < 0x80 ? 1 : 0;
|
422
|
+
yp_encoding_ascii_char_width(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
423
|
+
return *b < 0x80 ? 1 : 0;
|
425
424
|
}
|
426
425
|
|
427
426
|
size_t
|
428
|
-
yp_encoding_ascii_alpha_char(const
|
429
|
-
return (yp_encoding_ascii_table[
|
427
|
+
yp_encoding_ascii_alpha_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
428
|
+
return (yp_encoding_ascii_table[*b] & YP_ENCODING_ALPHABETIC_BIT);
|
430
429
|
}
|
431
430
|
|
432
431
|
size_t
|
433
|
-
yp_encoding_ascii_alnum_char(const
|
434
|
-
return (yp_encoding_ascii_table[
|
432
|
+
yp_encoding_ascii_alnum_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
433
|
+
return (yp_encoding_ascii_table[*b] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
|
435
434
|
}
|
436
435
|
|
437
436
|
bool
|
438
|
-
yp_encoding_ascii_isupper_char(const
|
439
|
-
return (yp_encoding_ascii_table[
|
437
|
+
yp_encoding_ascii_isupper_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
438
|
+
return (yp_encoding_ascii_table[*b] & YP_ENCODING_UPPERCASE_BIT);
|
440
439
|
}
|
441
440
|
|
442
441
|
static size_t
|
443
|
-
yp_encoding_koi8_r_char_width(const
|
444
|
-
|
445
|
-
return ((v >= 0x20 && v <= 0x7E) || (v >= 0x80)) ? 1 : 0;
|
442
|
+
yp_encoding_koi8_r_char_width(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
443
|
+
return ((*b >= 0x20 && *b <= 0x7E) || (*b >= 0x80)) ? 1 : 0;
|
446
444
|
}
|
447
445
|
|
448
446
|
static size_t
|
449
|
-
yp_encoding_single_char_width(YP_ATTRIBUTE_UNUSED const
|
447
|
+
yp_encoding_single_char_width(YP_ATTRIBUTE_UNUSED const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
450
448
|
return 1;
|
451
449
|
}
|
452
450
|
|
@@ -469,14 +467,14 @@ yp_encoding_t yp_encoding_ascii_8bit = {
|
|
469
467
|
};
|
470
468
|
|
471
469
|
#define YP_ENCODING_TABLE(s, i, w) \
|
472
|
-
static size_t yp_encoding_ ##i ## _alpha_char(const
|
473
|
-
return (yp_encoding_ ##i ## _table[
|
470
|
+
static size_t yp_encoding_ ##i ## _alpha_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
|
471
|
+
return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_ALPHABETIC_BIT); \
|
474
472
|
} \
|
475
|
-
static size_t yp_encoding_ ##i ## _alnum_char(const
|
476
|
-
return (yp_encoding_ ##i ## _table[
|
473
|
+
static size_t yp_encoding_ ##i ## _alnum_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
|
474
|
+
return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
|
477
475
|
} \
|
478
|
-
static bool yp_encoding_ ##i ## _isupper_char(const
|
479
|
-
return (yp_encoding_ ##i ## _table[
|
476
|
+
static bool yp_encoding_ ##i ## _isupper_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
|
477
|
+
return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_UPPERCASE_BIT); \
|
480
478
|
} \
|
481
479
|
yp_encoding_t yp_encoding_ ##i = { \
|
482
480
|
.name = s, \
|