yarp 0.8.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +48 -1
- data/Makefile +5 -1
- data/README.md +4 -3
- data/config.yml +461 -150
- data/docs/configuration.md +1 -0
- data/docs/encoding.md +5 -5
- data/docs/ruby_api.md +2 -0
- data/docs/serialization.md +3 -3
- data/docs/testing.md +2 -2
- data/ext/yarp/api_node.c +810 -199
- data/ext/yarp/extension.c +94 -31
- data/ext/yarp/extension.h +2 -2
- data/include/yarp/ast.h +653 -150
- data/include/yarp/defines.h +2 -1
- data/include/yarp/diagnostic.h +3 -3
- data/include/yarp/enc/yp_encoding.h +10 -10
- data/include/yarp/node.h +10 -0
- data/include/yarp/parser.h +19 -19
- data/include/yarp/regexp.h +1 -1
- data/include/yarp/unescape.h +7 -5
- data/include/yarp/util/yp_buffer.h +3 -0
- data/include/yarp/util/yp_char.h +16 -16
- data/include/yarp/util/yp_constant_pool.h +2 -2
- data/include/yarp/util/yp_newline_list.h +7 -4
- data/include/yarp/util/yp_string.h +4 -4
- data/include/yarp/util/yp_string_list.h +0 -3
- data/include/yarp/util/yp_strpbrk.h +1 -1
- data/include/yarp/version.h +2 -2
- data/include/yarp.h +14 -3
- data/lib/yarp/desugar_visitor.rb +204 -0
- data/lib/yarp/ffi.rb +27 -1
- data/lib/yarp/lex_compat.rb +93 -25
- data/lib/yarp/mutation_visitor.rb +683 -0
- data/lib/yarp/node.rb +3121 -597
- data/lib/yarp/serialize.rb +198 -126
- data/lib/yarp.rb +53 -7
- data/src/diagnostic.c +1 -1
- data/src/enc/yp_big5.c +15 -42
- data/src/enc/yp_euc_jp.c +16 -43
- data/src/enc/yp_gbk.c +19 -46
- data/src/enc/yp_shift_jis.c +16 -43
- data/src/enc/yp_tables.c +36 -38
- data/src/enc/yp_unicode.c +20 -25
- data/src/enc/yp_windows_31j.c +16 -43
- data/src/node.c +1444 -836
- data/src/prettyprint.c +324 -103
- data/src/regexp.c +21 -21
- data/src/serialize.c +429 -276
- data/src/token_type.c +2 -2
- data/src/unescape.c +184 -136
- data/src/util/yp_buffer.c +7 -2
- data/src/util/yp_char.c +34 -34
- data/src/util/yp_constant_pool.c +4 -4
- data/src/util/yp_memchr.c +1 -1
- data/src/util/yp_newline_list.c +14 -3
- data/src/util/yp_string.c +22 -20
- data/src/util/yp_string_list.c +0 -6
- data/src/util/yp_strncasecmp.c +3 -6
- data/src/util/yp_strpbrk.c +8 -8
- data/src/yarp.c +1504 -615
- data/yarp.gemspec +3 -1
- metadata +4 -2
data/src/enc/yp_big5.c
CHANGED
@@ -1,69 +1,42 @@
|
|
1
1
|
#include "yarp/enc/yp_encoding.h"
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
static yp_big5_codepoint_t
|
6
|
-
yp_big5_codepoint(const char *c, ptrdiff_t n, size_t *width) {
|
7
|
-
const unsigned char *uc = (const unsigned char *) c;
|
8
|
-
|
3
|
+
static size_t
|
4
|
+
yp_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
|
9
5
|
// These are the single byte characters.
|
10
|
-
if (*
|
11
|
-
|
12
|
-
return *uc;
|
6
|
+
if (*b < 0x80) {
|
7
|
+
return 1;
|
13
8
|
}
|
14
9
|
|
15
10
|
// These are the double byte characters.
|
16
|
-
if ((n > 1) && (
|
17
|
-
|
18
|
-
return (yp_big5_codepoint_t) (uc[0] << 8 | uc[1]);
|
11
|
+
if ((n > 1) && (b[0] >= 0xA1 && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xFE)) {
|
12
|
+
return 2;
|
19
13
|
}
|
20
14
|
|
21
|
-
*width = 0;
|
22
15
|
return 0;
|
23
16
|
}
|
24
17
|
|
25
18
|
static size_t
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
return width;
|
31
|
-
}
|
32
|
-
|
33
|
-
static size_t
|
34
|
-
yp_encoding_big5_alpha_char(const char *c, ptrdiff_t n) {
|
35
|
-
size_t width;
|
36
|
-
yp_big5_codepoint_t codepoint = yp_big5_codepoint(c, n, &width);
|
37
|
-
|
38
|
-
if (width == 1) {
|
39
|
-
const char value = (const char) codepoint;
|
40
|
-
return yp_encoding_ascii_alpha_char(&value, n);
|
19
|
+
yp_encoding_big5_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
20
|
+
if (yp_encoding_big5_char_width(b, n) == 1) {
|
21
|
+
return yp_encoding_ascii_alpha_char(b, n);
|
41
22
|
} else {
|
42
23
|
return 0;
|
43
24
|
}
|
44
25
|
}
|
45
26
|
|
46
27
|
static size_t
|
47
|
-
yp_encoding_big5_alnum_char(const
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
if (width == 1) {
|
52
|
-
const char value = (const char) codepoint;
|
53
|
-
return yp_encoding_ascii_alnum_char(&value, n);
|
28
|
+
yp_encoding_big5_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
29
|
+
if (yp_encoding_big5_char_width(b, n) == 1) {
|
30
|
+
return yp_encoding_ascii_alnum_char(b, n);
|
54
31
|
} else {
|
55
32
|
return 0;
|
56
33
|
}
|
57
34
|
}
|
58
35
|
|
59
36
|
static bool
|
60
|
-
yp_encoding_big5_isupper_char(const
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
if (width == 1) {
|
65
|
-
const char value = (const char) codepoint;
|
66
|
-
return yp_encoding_ascii_isupper_char(&value, n);
|
37
|
+
yp_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
38
|
+
if (yp_encoding_big5_char_width(b, n) == 1) {
|
39
|
+
return yp_encoding_ascii_isupper_char(b, n);
|
67
40
|
} else {
|
68
41
|
return false;
|
69
42
|
}
|
data/src/enc/yp_euc_jp.c
CHANGED
@@ -1,75 +1,48 @@
|
|
1
1
|
#include "yarp/enc/yp_encoding.h"
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
static yp_euc_jp_codepoint_t
|
6
|
-
yp_euc_jp_codepoint(const char *c, ptrdiff_t n, size_t *width) {
|
7
|
-
const unsigned char *uc = (const unsigned char *) c;
|
8
|
-
|
3
|
+
static size_t
|
4
|
+
yp_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
|
9
5
|
// These are the single byte characters.
|
10
|
-
if (*
|
11
|
-
|
12
|
-
return *uc;
|
6
|
+
if (*b < 0x80) {
|
7
|
+
return 1;
|
13
8
|
}
|
14
9
|
|
15
10
|
// These are the double byte characters.
|
16
11
|
if (
|
17
12
|
(n > 1) &&
|
18
13
|
(
|
19
|
-
((
|
20
|
-
((
|
14
|
+
((b[0] == 0x8E) && (b[1] >= 0xA1 && b[1] <= 0xFE)) ||
|
15
|
+
((b[0] >= 0xA1 && b[0] <= 0xFE) && (b[1] >= 0xA1 && b[1] <= 0xFE))
|
21
16
|
)
|
22
17
|
) {
|
23
|
-
|
24
|
-
return (yp_euc_jp_codepoint_t) (uc[0] << 8 | uc[1]);
|
18
|
+
return 2;
|
25
19
|
}
|
26
20
|
|
27
|
-
*width = 0;
|
28
21
|
return 0;
|
29
22
|
}
|
30
23
|
|
31
24
|
static size_t
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
return width;
|
37
|
-
}
|
38
|
-
|
39
|
-
static size_t
|
40
|
-
yp_encoding_euc_jp_alpha_char(const char *c, ptrdiff_t n) {
|
41
|
-
size_t width;
|
42
|
-
yp_euc_jp_codepoint_t codepoint = yp_euc_jp_codepoint(c, n, &width);
|
43
|
-
|
44
|
-
if (width == 1) {
|
45
|
-
const char value = (const char) codepoint;
|
46
|
-
return yp_encoding_ascii_alpha_char(&value, n);
|
25
|
+
yp_encoding_euc_jp_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
26
|
+
if (yp_encoding_euc_jp_char_width(b, n) == 1) {
|
27
|
+
return yp_encoding_ascii_alpha_char(b, n);
|
47
28
|
} else {
|
48
29
|
return 0;
|
49
30
|
}
|
50
31
|
}
|
51
32
|
|
52
33
|
static size_t
|
53
|
-
yp_encoding_euc_jp_alnum_char(const
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
if (width == 1) {
|
58
|
-
const char value = (const char) codepoint;
|
59
|
-
return yp_encoding_ascii_alnum_char(&value, n);
|
34
|
+
yp_encoding_euc_jp_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
35
|
+
if (yp_encoding_euc_jp_char_width(b, n) == 1) {
|
36
|
+
return yp_encoding_ascii_alnum_char(b, n);
|
60
37
|
} else {
|
61
38
|
return 0;
|
62
39
|
}
|
63
40
|
}
|
64
41
|
|
65
42
|
static bool
|
66
|
-
yp_encoding_euc_jp_isupper_char(const
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
if (width == 1) {
|
71
|
-
const char value = (const char) codepoint;
|
72
|
-
return yp_encoding_ascii_isupper_char(&value, n);
|
43
|
+
yp_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
44
|
+
if (yp_encoding_euc_jp_char_width(b, n) == 1) {
|
45
|
+
return yp_encoding_ascii_isupper_char(b, n);
|
73
46
|
} else {
|
74
47
|
return 0;
|
75
48
|
}
|
data/src/enc/yp_gbk.c
CHANGED
@@ -1,78 +1,51 @@
|
|
1
1
|
#include "yarp/enc/yp_encoding.h"
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
static yp_gbk_codepoint_t
|
6
|
-
yp_gbk_codepoint(const char *c, ptrdiff_t n, size_t *width) {
|
7
|
-
const unsigned char *uc = (const unsigned char *) c;
|
8
|
-
|
3
|
+
static size_t
|
4
|
+
yp_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
|
9
5
|
// These are the single byte characters.
|
10
|
-
if (*
|
11
|
-
|
12
|
-
return *uc;
|
6
|
+
if (*b < 0x80) {
|
7
|
+
return 1;
|
13
8
|
}
|
14
9
|
|
15
10
|
// These are the double byte characters.
|
16
11
|
if (
|
17
12
|
(n > 1) &&
|
18
13
|
(
|
19
|
-
((
|
20
|
-
((
|
21
|
-
((
|
22
|
-
((
|
23
|
-
((
|
14
|
+
((b[0] >= 0xA1 && b[0] <= 0xA9) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/1
|
15
|
+
((b[0] >= 0xB0 && b[0] <= 0xF7) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/2
|
16
|
+
((b[0] >= 0x81 && b[0] <= 0xA0) && (b[1] >= 0x40 && b[1] <= 0xFE) && (b[1] != 0x7F)) || // GBK/3
|
17
|
+
((b[0] >= 0xAA && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) || // GBK/4
|
18
|
+
((b[0] >= 0xA8 && b[0] <= 0xA9) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) // GBK/5
|
24
19
|
)
|
25
20
|
) {
|
26
|
-
|
27
|
-
return (yp_gbk_codepoint_t) (uc[0] << 8 | uc[1]);
|
21
|
+
return 2;
|
28
22
|
}
|
29
23
|
|
30
|
-
*width = 0;
|
31
24
|
return 0;
|
32
25
|
}
|
33
26
|
|
34
27
|
static size_t
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
return width;
|
40
|
-
}
|
41
|
-
|
42
|
-
static size_t
|
43
|
-
yp_encoding_gbk_alpha_char(const char *c, ptrdiff_t n) {
|
44
|
-
size_t width;
|
45
|
-
yp_gbk_codepoint_t codepoint = yp_gbk_codepoint(c, n, &width);
|
46
|
-
|
47
|
-
if (width == 1) {
|
48
|
-
const char value = (const char) codepoint;
|
49
|
-
return yp_encoding_ascii_alpha_char(&value, n);
|
28
|
+
yp_encoding_gbk_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
29
|
+
if (yp_encoding_gbk_char_width(b, n) == 1) {
|
30
|
+
return yp_encoding_ascii_alpha_char(b, n);
|
50
31
|
} else {
|
51
32
|
return 0;
|
52
33
|
}
|
53
34
|
}
|
54
35
|
|
55
36
|
static size_t
|
56
|
-
yp_encoding_gbk_alnum_char(const
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
if (width == 1) {
|
61
|
-
const char value = (const char) codepoint;
|
62
|
-
return yp_encoding_ascii_alnum_char(&value, n);
|
37
|
+
yp_encoding_gbk_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
38
|
+
if (yp_encoding_gbk_char_width(b, n) == 1) {
|
39
|
+
return yp_encoding_ascii_alnum_char(b, n);
|
63
40
|
} else {
|
64
41
|
return 0;
|
65
42
|
}
|
66
43
|
}
|
67
44
|
|
68
45
|
static bool
|
69
|
-
yp_encoding_gbk_isupper_char(const
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
if (width == 1) {
|
74
|
-
const char value = (const char) codepoint;
|
75
|
-
return yp_encoding_ascii_isupper_char(&value, n);
|
46
|
+
yp_encoding_gbk_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
47
|
+
if (yp_encoding_gbk_char_width(b, n) == 1) {
|
48
|
+
return yp_encoding_ascii_isupper_char(b, n);
|
76
49
|
} else {
|
77
50
|
return false;
|
78
51
|
}
|
data/src/enc/yp_shift_jis.c
CHANGED
@@ -1,73 +1,46 @@
|
|
1
1
|
#include "yarp/enc/yp_encoding.h"
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
static yp_shift_jis_codepoint_t
|
6
|
-
yp_shift_jis_codepoint(const char *c, ptrdiff_t n, size_t *width) {
|
7
|
-
const unsigned char *uc = (const unsigned char *) c;
|
8
|
-
|
3
|
+
static size_t
|
4
|
+
yp_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) {
|
9
5
|
// These are the single byte characters.
|
10
|
-
if (*
|
11
|
-
|
12
|
-
return *uc;
|
6
|
+
if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
|
7
|
+
return 1;
|
13
8
|
}
|
14
9
|
|
15
10
|
// These are the double byte characters.
|
16
11
|
if (
|
17
12
|
(n > 1) &&
|
18
|
-
((
|
19
|
-
(
|
13
|
+
((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) &&
|
14
|
+
(b[1] >= 0x40 && b[1] <= 0xFC)
|
20
15
|
) {
|
21
|
-
|
22
|
-
return (yp_shift_jis_codepoint_t) (uc[0] << 8 | uc[1]);
|
16
|
+
return 2;
|
23
17
|
}
|
24
18
|
|
25
|
-
*width = 0;
|
26
19
|
return 0;
|
27
20
|
}
|
28
21
|
|
29
22
|
static size_t
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
return width;
|
35
|
-
}
|
36
|
-
|
37
|
-
static size_t
|
38
|
-
yp_encoding_shift_jis_alpha_char(const char *c, ptrdiff_t n) {
|
39
|
-
size_t width;
|
40
|
-
yp_shift_jis_codepoint_t codepoint = yp_shift_jis_codepoint(c, n, &width);
|
41
|
-
|
42
|
-
if (width == 1) {
|
43
|
-
const char value = (const char) codepoint;
|
44
|
-
return yp_encoding_ascii_alpha_char(&value, n);
|
23
|
+
yp_encoding_shift_jis_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
24
|
+
if (yp_encoding_shift_jis_char_width(b, n) == 1) {
|
25
|
+
return yp_encoding_ascii_alpha_char(b, n);
|
45
26
|
} else {
|
46
27
|
return 0;
|
47
28
|
}
|
48
29
|
}
|
49
30
|
|
50
31
|
static size_t
|
51
|
-
yp_encoding_shift_jis_alnum_char(const
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
if (width == 1) {
|
56
|
-
const char value = (const char) codepoint;
|
57
|
-
return yp_encoding_ascii_alnum_char(&value, n);
|
32
|
+
yp_encoding_shift_jis_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
33
|
+
if (yp_encoding_shift_jis_char_width(b, n) == 1) {
|
34
|
+
return yp_encoding_ascii_alnum_char(b, n);
|
58
35
|
} else {
|
59
36
|
return 0;
|
60
37
|
}
|
61
38
|
}
|
62
39
|
|
63
40
|
static bool
|
64
|
-
yp_encoding_shift_jis_isupper_char(const
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
if (width == 1) {
|
69
|
-
const char value = (const char) codepoint;
|
70
|
-
return yp_encoding_ascii_isupper_char(&value, n);
|
41
|
+
yp_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
42
|
+
if (yp_encoding_shift_jis_char_width(b, n) == 1) {
|
43
|
+
return yp_encoding_ascii_isupper_char(b, n);
|
71
44
|
} else {
|
72
45
|
return 0;
|
73
46
|
}
|
data/src/enc/yp_tables.c
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
// Each element of the following table contains a bitfield that indicates a
|
4
4
|
// piece of information about the corresponding ASCII character.
|
5
|
-
static
|
5
|
+
static uint8_t yp_encoding_ascii_table[256] = {
|
6
6
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
7
7
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
8
8
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -24,7 +24,7 @@ static unsigned char yp_encoding_ascii_table[256] = {
|
|
24
24
|
|
25
25
|
// Each element of the following table contains a bitfield that indicates a
|
26
26
|
// piece of information about the corresponding ISO-8859-1 character.
|
27
|
-
static
|
27
|
+
static uint8_t yp_encoding_iso_8859_1_table[256] = {
|
28
28
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
29
29
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
30
30
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -46,7 +46,7 @@ static unsigned char yp_encoding_iso_8859_1_table[256] = {
|
|
46
46
|
|
47
47
|
// Each element of the following table contains a bitfield that indicates a
|
48
48
|
// piece of information about the corresponding ISO-8859-2 character.
|
49
|
-
static
|
49
|
+
static uint8_t yp_encoding_iso_8859_2_table[256] = {
|
50
50
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
51
51
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
52
52
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -68,7 +68,7 @@ static unsigned char yp_encoding_iso_8859_2_table[256] = {
|
|
68
68
|
|
69
69
|
// Each element of the following table contains a bitfield that indicates a
|
70
70
|
// piece of information about the corresponding ISO-8859-3 character.
|
71
|
-
static
|
71
|
+
static uint8_t yp_encoding_iso_8859_3_table[256] = {
|
72
72
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
73
73
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
74
74
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -90,7 +90,7 @@ static unsigned char yp_encoding_iso_8859_3_table[256] = {
|
|
90
90
|
|
91
91
|
// Each element of the following table contains a bitfield that indicates a
|
92
92
|
// piece of information about the corresponding ISO-8859-4 character.
|
93
|
-
static
|
93
|
+
static uint8_t yp_encoding_iso_8859_4_table[256] = {
|
94
94
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
95
95
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
96
96
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -112,7 +112,7 @@ static unsigned char yp_encoding_iso_8859_4_table[256] = {
|
|
112
112
|
|
113
113
|
// Each element of the following table contains a bitfield that indicates a
|
114
114
|
// piece of information about the corresponding ISO-8859-5 character.
|
115
|
-
static
|
115
|
+
static uint8_t yp_encoding_iso_8859_5_table[256] = {
|
116
116
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
117
117
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
118
118
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -134,7 +134,7 @@ static unsigned char yp_encoding_iso_8859_5_table[256] = {
|
|
134
134
|
|
135
135
|
// Each element of the following table contains a bitfield that indicates a
|
136
136
|
// piece of information about the corresponding ISO-8859-6 character.
|
137
|
-
static
|
137
|
+
static uint8_t yp_encoding_iso_8859_6_table[256] = {
|
138
138
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
139
139
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
140
140
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -156,7 +156,7 @@ static unsigned char yp_encoding_iso_8859_6_table[256] = {
|
|
156
156
|
|
157
157
|
// Each element of the following table contains a bitfield that indicates a
|
158
158
|
// piece of information about the corresponding ISO-8859-7 character.
|
159
|
-
static
|
159
|
+
static uint8_t yp_encoding_iso_8859_7_table[256] = {
|
160
160
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
161
161
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
162
162
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -178,7 +178,7 @@ static unsigned char yp_encoding_iso_8859_7_table[256] = {
|
|
178
178
|
|
179
179
|
// Each element of the following table contains a bitfield that indicates a
|
180
180
|
// piece of information about the corresponding ISO-8859-8 character.
|
181
|
-
static
|
181
|
+
static uint8_t yp_encoding_iso_8859_8_table[256] = {
|
182
182
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
183
183
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
184
184
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -200,7 +200,7 @@ static unsigned char yp_encoding_iso_8859_8_table[256] = {
|
|
200
200
|
|
201
201
|
// Each element of the following table contains a bitfield that indicates a
|
202
202
|
// piece of information about the corresponding ISO-8859-9 character.
|
203
|
-
static
|
203
|
+
static uint8_t yp_encoding_iso_8859_9_table[256] = {
|
204
204
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
205
205
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
206
206
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -222,7 +222,7 @@ static unsigned char yp_encoding_iso_8859_9_table[256] = {
|
|
222
222
|
|
223
223
|
// Each element of the following table contains a bitfield that indicates a
|
224
224
|
// piece of information about the corresponding ISO-8859-10 character.
|
225
|
-
static
|
225
|
+
static uint8_t yp_encoding_iso_8859_10_table[256] = {
|
226
226
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
227
227
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
228
228
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -244,7 +244,7 @@ static unsigned char yp_encoding_iso_8859_10_table[256] = {
|
|
244
244
|
|
245
245
|
// Each element of the following table contains a bitfield that indicates a
|
246
246
|
// piece of information about the corresponding ISO-8859-11 character.
|
247
|
-
static
|
247
|
+
static uint8_t yp_encoding_iso_8859_11_table[256] = {
|
248
248
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
249
249
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
250
250
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -266,7 +266,7 @@ static unsigned char yp_encoding_iso_8859_11_table[256] = {
|
|
266
266
|
|
267
267
|
// Each element of the following table contains a bitfield that indicates a
|
268
268
|
// piece of information about the corresponding ISO-8859-13 character.
|
269
|
-
static
|
269
|
+
static uint8_t yp_encoding_iso_8859_13_table[256] = {
|
270
270
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
271
271
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
272
272
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -288,7 +288,7 @@ static unsigned char yp_encoding_iso_8859_13_table[256] = {
|
|
288
288
|
|
289
289
|
// Each element of the following table contains a bitfield that indicates a
|
290
290
|
// piece of information about the corresponding ISO-8859-14 character.
|
291
|
-
static
|
291
|
+
static uint8_t yp_encoding_iso_8859_14_table[256] = {
|
292
292
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
293
293
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
294
294
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -310,7 +310,7 @@ static unsigned char yp_encoding_iso_8859_14_table[256] = {
|
|
310
310
|
|
311
311
|
// Each element of the following table contains a bitfield that indicates a
|
312
312
|
// piece of information about the corresponding ISO-8859-15 character.
|
313
|
-
static
|
313
|
+
static uint8_t yp_encoding_iso_8859_15_table[256] = {
|
314
314
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
315
315
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
316
316
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -332,7 +332,7 @@ static unsigned char yp_encoding_iso_8859_15_table[256] = {
|
|
332
332
|
|
333
333
|
// Each element of the following table contains a bitfield that indicates a
|
334
334
|
// piece of information about the corresponding ISO-8859-16 character.
|
335
|
-
static
|
335
|
+
static uint8_t yp_encoding_iso_8859_16_table[256] = {
|
336
336
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
337
337
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
338
338
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -354,7 +354,7 @@ static unsigned char yp_encoding_iso_8859_16_table[256] = {
|
|
354
354
|
|
355
355
|
// Each element of the following table contains a bitfield that indicates a
|
356
356
|
// piece of information about the corresponding KOI8-R character.
|
357
|
-
static
|
357
|
+
static uint8_t yp_encoding_koi8_r_table[256] = {
|
358
358
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
359
359
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
360
360
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -376,7 +376,7 @@ static unsigned char yp_encoding_koi8_r_table[256] = {
|
|
376
376
|
|
377
377
|
// Each element of the following table contains a bitfield that indicates a
|
378
378
|
// piece of information about the corresponding windows-1251 character.
|
379
|
-
static
|
379
|
+
static uint8_t yp_encoding_windows_1251_table[256] = {
|
380
380
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
381
381
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
382
382
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -398,7 +398,7 @@ static unsigned char yp_encoding_windows_1251_table[256] = {
|
|
398
398
|
|
399
399
|
// Each element of the following table contains a bitfield that indicates a
|
400
400
|
// piece of information about the corresponding windows-1252 character.
|
401
|
-
static
|
401
|
+
static uint8_t yp_encoding_windows_1252_table[256] = {
|
402
402
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
403
403
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
404
404
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -419,34 +419,32 @@ static unsigned char yp_encoding_windows_1252_table[256] = {
|
|
419
419
|
};
|
420
420
|
|
421
421
|
static size_t
|
422
|
-
yp_encoding_ascii_char_width(const
|
423
|
-
|
424
|
-
return v < 0x80 ? 1 : 0;
|
422
|
+
yp_encoding_ascii_char_width(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
423
|
+
return *b < 0x80 ? 1 : 0;
|
425
424
|
}
|
426
425
|
|
427
426
|
size_t
|
428
|
-
yp_encoding_ascii_alpha_char(const
|
429
|
-
return (yp_encoding_ascii_table[
|
427
|
+
yp_encoding_ascii_alpha_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
428
|
+
return (yp_encoding_ascii_table[*b] & YP_ENCODING_ALPHABETIC_BIT);
|
430
429
|
}
|
431
430
|
|
432
431
|
size_t
|
433
|
-
yp_encoding_ascii_alnum_char(const
|
434
|
-
return (yp_encoding_ascii_table[
|
432
|
+
yp_encoding_ascii_alnum_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
433
|
+
return (yp_encoding_ascii_table[*b] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
|
435
434
|
}
|
436
435
|
|
437
436
|
bool
|
438
|
-
yp_encoding_ascii_isupper_char(const
|
439
|
-
return (yp_encoding_ascii_table[
|
437
|
+
yp_encoding_ascii_isupper_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
438
|
+
return (yp_encoding_ascii_table[*b] & YP_ENCODING_UPPERCASE_BIT);
|
440
439
|
}
|
441
440
|
|
442
441
|
static size_t
|
443
|
-
yp_encoding_koi8_r_char_width(const
|
444
|
-
|
445
|
-
return ((v >= 0x20 && v <= 0x7E) || (v >= 0x80)) ? 1 : 0;
|
442
|
+
yp_encoding_koi8_r_char_width(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
443
|
+
return ((*b >= 0x20 && *b <= 0x7E) || (*b >= 0x80)) ? 1 : 0;
|
446
444
|
}
|
447
445
|
|
448
446
|
static size_t
|
449
|
-
yp_encoding_single_char_width(YP_ATTRIBUTE_UNUSED const
|
447
|
+
yp_encoding_single_char_width(YP_ATTRIBUTE_UNUSED const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
450
448
|
return 1;
|
451
449
|
}
|
452
450
|
|
@@ -469,14 +467,14 @@ yp_encoding_t yp_encoding_ascii_8bit = {
|
|
469
467
|
};
|
470
468
|
|
471
469
|
#define YP_ENCODING_TABLE(s, i, w) \
|
472
|
-
static size_t yp_encoding_ ##i ## _alpha_char(const
|
473
|
-
return (yp_encoding_ ##i ## _table[
|
470
|
+
static size_t yp_encoding_ ##i ## _alpha_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
|
471
|
+
return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_ALPHABETIC_BIT); \
|
474
472
|
} \
|
475
|
-
static size_t yp_encoding_ ##i ## _alnum_char(const
|
476
|
-
return (yp_encoding_ ##i ## _table[
|
473
|
+
static size_t yp_encoding_ ##i ## _alnum_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
|
474
|
+
return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
|
477
475
|
} \
|
478
|
-
static bool yp_encoding_ ##i ## _isupper_char(const
|
479
|
-
return (yp_encoding_ ##i ## _table[
|
476
|
+
static bool yp_encoding_ ##i ## _isupper_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
|
477
|
+
return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_UPPERCASE_BIT); \
|
480
478
|
} \
|
481
479
|
yp_encoding_t yp_encoding_ ##i = { \
|
482
480
|
.name = s, \
|