yarp 0.8.0 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +48 -1
  3. data/Makefile +5 -1
  4. data/README.md +4 -3
  5. data/config.yml +461 -150
  6. data/docs/configuration.md +1 -0
  7. data/docs/encoding.md +5 -5
  8. data/docs/ruby_api.md +2 -0
  9. data/docs/serialization.md +3 -3
  10. data/docs/testing.md +2 -2
  11. data/ext/yarp/api_node.c +810 -199
  12. data/ext/yarp/extension.c +94 -31
  13. data/ext/yarp/extension.h +2 -2
  14. data/include/yarp/ast.h +653 -150
  15. data/include/yarp/defines.h +2 -1
  16. data/include/yarp/diagnostic.h +3 -3
  17. data/include/yarp/enc/yp_encoding.h +10 -10
  18. data/include/yarp/node.h +10 -0
  19. data/include/yarp/parser.h +19 -19
  20. data/include/yarp/regexp.h +1 -1
  21. data/include/yarp/unescape.h +7 -5
  22. data/include/yarp/util/yp_buffer.h +3 -0
  23. data/include/yarp/util/yp_char.h +16 -16
  24. data/include/yarp/util/yp_constant_pool.h +2 -2
  25. data/include/yarp/util/yp_newline_list.h +7 -4
  26. data/include/yarp/util/yp_string.h +4 -4
  27. data/include/yarp/util/yp_string_list.h +0 -3
  28. data/include/yarp/util/yp_strpbrk.h +1 -1
  29. data/include/yarp/version.h +2 -2
  30. data/include/yarp.h +14 -3
  31. data/lib/yarp/desugar_visitor.rb +204 -0
  32. data/lib/yarp/ffi.rb +27 -1
  33. data/lib/yarp/lex_compat.rb +93 -25
  34. data/lib/yarp/mutation_visitor.rb +683 -0
  35. data/lib/yarp/node.rb +3121 -597
  36. data/lib/yarp/serialize.rb +198 -126
  37. data/lib/yarp.rb +53 -7
  38. data/src/diagnostic.c +1 -1
  39. data/src/enc/yp_big5.c +15 -42
  40. data/src/enc/yp_euc_jp.c +16 -43
  41. data/src/enc/yp_gbk.c +19 -46
  42. data/src/enc/yp_shift_jis.c +16 -43
  43. data/src/enc/yp_tables.c +36 -38
  44. data/src/enc/yp_unicode.c +20 -25
  45. data/src/enc/yp_windows_31j.c +16 -43
  46. data/src/node.c +1444 -836
  47. data/src/prettyprint.c +324 -103
  48. data/src/regexp.c +21 -21
  49. data/src/serialize.c +429 -276
  50. data/src/token_type.c +2 -2
  51. data/src/unescape.c +184 -136
  52. data/src/util/yp_buffer.c +7 -2
  53. data/src/util/yp_char.c +34 -34
  54. data/src/util/yp_constant_pool.c +4 -4
  55. data/src/util/yp_memchr.c +1 -1
  56. data/src/util/yp_newline_list.c +14 -3
  57. data/src/util/yp_string.c +22 -20
  58. data/src/util/yp_string_list.c +0 -6
  59. data/src/util/yp_strncasecmp.c +3 -6
  60. data/src/util/yp_strpbrk.c +8 -8
  61. data/src/yarp.c +1504 -615
  62. data/yarp.gemspec +3 -1
  63. metadata +4 -2
data/src/enc/yp_big5.c CHANGED
@@ -1,69 +1,42 @@
1
1
  #include "yarp/enc/yp_encoding.h"
2
2
 
3
- typedef uint16_t yp_big5_codepoint_t;
4
-
5
- static yp_big5_codepoint_t
6
- yp_big5_codepoint(const char *c, ptrdiff_t n, size_t *width) {
7
- const unsigned char *uc = (const unsigned char *) c;
8
-
3
+ static size_t
4
+ yp_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
9
5
  // These are the single byte characters.
10
- if (*uc < 0x80) {
11
- *width = 1;
12
- return *uc;
6
+ if (*b < 0x80) {
7
+ return 1;
13
8
  }
14
9
 
15
10
  // These are the double byte characters.
16
- if ((n > 1) && (uc[0] >= 0xA1 && uc[0] <= 0xFE) && (uc[1] >= 0x40 && uc[1] <= 0xFE)) {
17
- *width = 2;
18
- return (yp_big5_codepoint_t) (uc[0] << 8 | uc[1]);
11
+ if ((n > 1) && (b[0] >= 0xA1 && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xFE)) {
12
+ return 2;
19
13
  }
20
14
 
21
- *width = 0;
22
15
  return 0;
23
16
  }
24
17
 
25
18
  static size_t
26
- yp_encoding_big5_char_width(const char *c, ptrdiff_t n) {
27
- size_t width;
28
- yp_big5_codepoint(c, n, &width);
29
-
30
- return width;
31
- }
32
-
33
- static size_t
34
- yp_encoding_big5_alpha_char(const char *c, ptrdiff_t n) {
35
- size_t width;
36
- yp_big5_codepoint_t codepoint = yp_big5_codepoint(c, n, &width);
37
-
38
- if (width == 1) {
39
- const char value = (const char) codepoint;
40
- return yp_encoding_ascii_alpha_char(&value, n);
19
+ yp_encoding_big5_alpha_char(const uint8_t *b, ptrdiff_t n) {
20
+ if (yp_encoding_big5_char_width(b, n) == 1) {
21
+ return yp_encoding_ascii_alpha_char(b, n);
41
22
  } else {
42
23
  return 0;
43
24
  }
44
25
  }
45
26
 
46
27
  static size_t
47
- yp_encoding_big5_alnum_char(const char *c, ptrdiff_t n) {
48
- size_t width;
49
- yp_big5_codepoint_t codepoint = yp_big5_codepoint(c, n, &width);
50
-
51
- if (width == 1) {
52
- const char value = (const char) codepoint;
53
- return yp_encoding_ascii_alnum_char(&value, n);
28
+ yp_encoding_big5_alnum_char(const uint8_t *b, ptrdiff_t n) {
29
+ if (yp_encoding_big5_char_width(b, n) == 1) {
30
+ return yp_encoding_ascii_alnum_char(b, n);
54
31
  } else {
55
32
  return 0;
56
33
  }
57
34
  }
58
35
 
59
36
  static bool
60
- yp_encoding_big5_isupper_char(const char *c, ptrdiff_t n) {
61
- size_t width;
62
- yp_big5_codepoint_t codepoint = yp_big5_codepoint(c, n, &width);
63
-
64
- if (width == 1) {
65
- const char value = (const char) codepoint;
66
- return yp_encoding_ascii_isupper_char(&value, n);
37
+ yp_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
38
+ if (yp_encoding_big5_char_width(b, n) == 1) {
39
+ return yp_encoding_ascii_isupper_char(b, n);
67
40
  } else {
68
41
  return false;
69
42
  }
data/src/enc/yp_euc_jp.c CHANGED
@@ -1,75 +1,48 @@
1
1
  #include "yarp/enc/yp_encoding.h"
2
2
 
3
- typedef uint16_t yp_euc_jp_codepoint_t;
4
-
5
- static yp_euc_jp_codepoint_t
6
- yp_euc_jp_codepoint(const char *c, ptrdiff_t n, size_t *width) {
7
- const unsigned char *uc = (const unsigned char *) c;
8
-
3
+ static size_t
4
+ yp_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
9
5
  // These are the single byte characters.
10
- if (*uc < 0x80) {
11
- *width = 1;
12
- return *uc;
6
+ if (*b < 0x80) {
7
+ return 1;
13
8
  }
14
9
 
15
10
  // These are the double byte characters.
16
11
  if (
17
12
  (n > 1) &&
18
13
  (
19
- ((uc[0] == 0x8E) && (uc[1] >= 0xA1 && uc[1] <= 0xFE)) ||
20
- ((uc[0] >= 0xA1 && uc[0] <= 0xFE) && (uc[1] >= 0xA1 && uc[1] <= 0xFE))
14
+ ((b[0] == 0x8E) && (b[1] >= 0xA1 && b[1] <= 0xFE)) ||
15
+ ((b[0] >= 0xA1 && b[0] <= 0xFE) && (b[1] >= 0xA1 && b[1] <= 0xFE))
21
16
  )
22
17
  ) {
23
- *width = 2;
24
- return (yp_euc_jp_codepoint_t) (uc[0] << 8 | uc[1]);
18
+ return 2;
25
19
  }
26
20
 
27
- *width = 0;
28
21
  return 0;
29
22
  }
30
23
 
31
24
  static size_t
32
- yp_encoding_euc_jp_char_width(const char *c, ptrdiff_t n) {
33
- size_t width;
34
- yp_euc_jp_codepoint(c, n, &width);
35
-
36
- return width;
37
- }
38
-
39
- static size_t
40
- yp_encoding_euc_jp_alpha_char(const char *c, ptrdiff_t n) {
41
- size_t width;
42
- yp_euc_jp_codepoint_t codepoint = yp_euc_jp_codepoint(c, n, &width);
43
-
44
- if (width == 1) {
45
- const char value = (const char) codepoint;
46
- return yp_encoding_ascii_alpha_char(&value, n);
25
+ yp_encoding_euc_jp_alpha_char(const uint8_t *b, ptrdiff_t n) {
26
+ if (yp_encoding_euc_jp_char_width(b, n) == 1) {
27
+ return yp_encoding_ascii_alpha_char(b, n);
47
28
  } else {
48
29
  return 0;
49
30
  }
50
31
  }
51
32
 
52
33
  static size_t
53
- yp_encoding_euc_jp_alnum_char(const char *c, ptrdiff_t n) {
54
- size_t width;
55
- yp_euc_jp_codepoint_t codepoint = yp_euc_jp_codepoint(c, n, &width);
56
-
57
- if (width == 1) {
58
- const char value = (const char) codepoint;
59
- return yp_encoding_ascii_alnum_char(&value, n);
34
+ yp_encoding_euc_jp_alnum_char(const uint8_t *b, ptrdiff_t n) {
35
+ if (yp_encoding_euc_jp_char_width(b, n) == 1) {
36
+ return yp_encoding_ascii_alnum_char(b, n);
60
37
  } else {
61
38
  return 0;
62
39
  }
63
40
  }
64
41
 
65
42
  static bool
66
- yp_encoding_euc_jp_isupper_char(const char *c, ptrdiff_t n) {
67
- size_t width;
68
- yp_euc_jp_codepoint_t codepoint = yp_euc_jp_codepoint(c, n, &width);
69
-
70
- if (width == 1) {
71
- const char value = (const char) codepoint;
72
- return yp_encoding_ascii_isupper_char(&value, n);
43
+ yp_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
44
+ if (yp_encoding_euc_jp_char_width(b, n) == 1) {
45
+ return yp_encoding_ascii_isupper_char(b, n);
73
46
  } else {
74
47
  return 0;
75
48
  }
data/src/enc/yp_gbk.c CHANGED
@@ -1,78 +1,51 @@
1
1
  #include "yarp/enc/yp_encoding.h"
2
2
 
3
- typedef uint16_t yp_gbk_codepoint_t;
4
-
5
- static yp_gbk_codepoint_t
6
- yp_gbk_codepoint(const char *c, ptrdiff_t n, size_t *width) {
7
- const unsigned char *uc = (const unsigned char *) c;
8
-
3
+ static size_t
4
+ yp_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
9
5
  // These are the single byte characters.
10
- if (*uc < 0x80) {
11
- *width = 1;
12
- return *uc;
6
+ if (*b < 0x80) {
7
+ return 1;
13
8
  }
14
9
 
15
10
  // These are the double byte characters.
16
11
  if (
17
12
  (n > 1) &&
18
13
  (
19
- ((uc[0] >= 0xA1 && uc[0] <= 0xA9) && (uc[1] >= 0xA1 && uc[1] <= 0xFE)) || // GBK/1
20
- ((uc[0] >= 0xB0 && uc[0] <= 0xF7) && (uc[1] >= 0xA1 && uc[1] <= 0xFE)) || // GBK/2
21
- ((uc[0] >= 0x81 && uc[0] <= 0xA0) && (uc[1] >= 0x40 && uc[1] <= 0xFE) && (uc[1] != 0x7F)) || // GBK/3
22
- ((uc[0] >= 0xAA && uc[0] <= 0xFE) && (uc[1] >= 0x40 && uc[1] <= 0xA0) && (uc[1] != 0x7F)) || // GBK/4
23
- ((uc[0] >= 0xA8 && uc[0] <= 0xA9) && (uc[1] >= 0x40 && uc[1] <= 0xA0) && (uc[1] != 0x7F)) // GBK/5
14
+ ((b[0] >= 0xA1 && b[0] <= 0xA9) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/1
15
+ ((b[0] >= 0xB0 && b[0] <= 0xF7) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/2
16
+ ((b[0] >= 0x81 && b[0] <= 0xA0) && (b[1] >= 0x40 && b[1] <= 0xFE) && (b[1] != 0x7F)) || // GBK/3
17
+ ((b[0] >= 0xAA && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) || // GBK/4
18
+ ((b[0] >= 0xA8 && b[0] <= 0xA9) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) // GBK/5
24
19
  )
25
20
  ) {
26
- *width = 2;
27
- return (yp_gbk_codepoint_t) (uc[0] << 8 | uc[1]);
21
+ return 2;
28
22
  }
29
23
 
30
- *width = 0;
31
24
  return 0;
32
25
  }
33
26
 
34
27
  static size_t
35
- yp_encoding_gbk_char_width(const char *c, ptrdiff_t n) {
36
- size_t width;
37
- yp_gbk_codepoint(c, n, &width);
38
-
39
- return width;
40
- }
41
-
42
- static size_t
43
- yp_encoding_gbk_alpha_char(const char *c, ptrdiff_t n) {
44
- size_t width;
45
- yp_gbk_codepoint_t codepoint = yp_gbk_codepoint(c, n, &width);
46
-
47
- if (width == 1) {
48
- const char value = (const char) codepoint;
49
- return yp_encoding_ascii_alpha_char(&value, n);
28
+ yp_encoding_gbk_alpha_char(const uint8_t *b, ptrdiff_t n) {
29
+ if (yp_encoding_gbk_char_width(b, n) == 1) {
30
+ return yp_encoding_ascii_alpha_char(b, n);
50
31
  } else {
51
32
  return 0;
52
33
  }
53
34
  }
54
35
 
55
36
  static size_t
56
- yp_encoding_gbk_alnum_char(const char *c, ptrdiff_t n) {
57
- size_t width;
58
- yp_gbk_codepoint_t codepoint = yp_gbk_codepoint(c, n, &width);
59
-
60
- if (width == 1) {
61
- const char value = (const char) codepoint;
62
- return yp_encoding_ascii_alnum_char(&value, n);
37
+ yp_encoding_gbk_alnum_char(const uint8_t *b, ptrdiff_t n) {
38
+ if (yp_encoding_gbk_char_width(b, n) == 1) {
39
+ return yp_encoding_ascii_alnum_char(b, n);
63
40
  } else {
64
41
  return 0;
65
42
  }
66
43
  }
67
44
 
68
45
  static bool
69
- yp_encoding_gbk_isupper_char(const char *c, ptrdiff_t n) {
70
- size_t width;
71
- yp_gbk_codepoint_t codepoint = yp_gbk_codepoint(c, n, &width);
72
-
73
- if (width == 1) {
74
- const char value = (const char) codepoint;
75
- return yp_encoding_ascii_isupper_char(&value, n);
46
+ yp_encoding_gbk_isupper_char(const uint8_t *b, ptrdiff_t n) {
47
+ if (yp_encoding_gbk_char_width(b, n) == 1) {
48
+ return yp_encoding_ascii_isupper_char(b, n);
76
49
  } else {
77
50
  return false;
78
51
  }
@@ -1,73 +1,46 @@
1
1
  #include "yarp/enc/yp_encoding.h"
2
2
 
3
- typedef uint16_t yp_shift_jis_codepoint_t;
4
-
5
- static yp_shift_jis_codepoint_t
6
- yp_shift_jis_codepoint(const char *c, ptrdiff_t n, size_t *width) {
7
- const unsigned char *uc = (const unsigned char *) c;
8
-
3
+ static size_t
4
+ yp_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) {
9
5
  // These are the single byte characters.
10
- if (*uc < 0x80 || (*uc >= 0xA1 && *uc <= 0xDF)) {
11
- *width = 1;
12
- return *uc;
6
+ if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
7
+ return 1;
13
8
  }
14
9
 
15
10
  // These are the double byte characters.
16
11
  if (
17
12
  (n > 1) &&
18
- ((uc[0] >= 0x81 && uc[0] <= 0x9F) || (uc[0] >= 0xE0 && uc[0] <= 0xFC)) &&
19
- (uc[1] >= 0x40 && uc[1] <= 0xFC)
13
+ ((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) &&
14
+ (b[1] >= 0x40 && b[1] <= 0xFC)
20
15
  ) {
21
- *width = 2;
22
- return (yp_shift_jis_codepoint_t) (uc[0] << 8 | uc[1]);
16
+ return 2;
23
17
  }
24
18
 
25
- *width = 0;
26
19
  return 0;
27
20
  }
28
21
 
29
22
  static size_t
30
- yp_encoding_shift_jis_char_width(const char *c, ptrdiff_t n) {
31
- size_t width;
32
- yp_shift_jis_codepoint(c, n, &width);
33
-
34
- return width;
35
- }
36
-
37
- static size_t
38
- yp_encoding_shift_jis_alpha_char(const char *c, ptrdiff_t n) {
39
- size_t width;
40
- yp_shift_jis_codepoint_t codepoint = yp_shift_jis_codepoint(c, n, &width);
41
-
42
- if (width == 1) {
43
- const char value = (const char) codepoint;
44
- return yp_encoding_ascii_alpha_char(&value, n);
23
+ yp_encoding_shift_jis_alpha_char(const uint8_t *b, ptrdiff_t n) {
24
+ if (yp_encoding_shift_jis_char_width(b, n) == 1) {
25
+ return yp_encoding_ascii_alpha_char(b, n);
45
26
  } else {
46
27
  return 0;
47
28
  }
48
29
  }
49
30
 
50
31
  static size_t
51
- yp_encoding_shift_jis_alnum_char(const char *c, ptrdiff_t n) {
52
- size_t width;
53
- yp_shift_jis_codepoint_t codepoint = yp_shift_jis_codepoint(c, n, &width);
54
-
55
- if (width == 1) {
56
- const char value = (const char) codepoint;
57
- return yp_encoding_ascii_alnum_char(&value, n);
32
+ yp_encoding_shift_jis_alnum_char(const uint8_t *b, ptrdiff_t n) {
33
+ if (yp_encoding_shift_jis_char_width(b, n) == 1) {
34
+ return yp_encoding_ascii_alnum_char(b, n);
58
35
  } else {
59
36
  return 0;
60
37
  }
61
38
  }
62
39
 
63
40
  static bool
64
- yp_encoding_shift_jis_isupper_char(const char *c, ptrdiff_t n) {
65
- size_t width;
66
- yp_shift_jis_codepoint_t codepoint = yp_shift_jis_codepoint(c, n, &width);
67
-
68
- if (width == 1) {
69
- const char value = (const char) codepoint;
70
- return yp_encoding_ascii_isupper_char(&value, n);
41
+ yp_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
42
+ if (yp_encoding_shift_jis_char_width(b, n) == 1) {
43
+ return yp_encoding_ascii_isupper_char(b, n);
71
44
  } else {
72
45
  return 0;
73
46
  }
data/src/enc/yp_tables.c CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  // Each element of the following table contains a bitfield that indicates a
4
4
  // piece of information about the corresponding ASCII character.
5
- static unsigned char yp_encoding_ascii_table[256] = {
5
+ static uint8_t yp_encoding_ascii_table[256] = {
6
6
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
7
7
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
8
8
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -24,7 +24,7 @@ static unsigned char yp_encoding_ascii_table[256] = {
24
24
 
25
25
  // Each element of the following table contains a bitfield that indicates a
26
26
  // piece of information about the corresponding ISO-8859-1 character.
27
- static unsigned char yp_encoding_iso_8859_1_table[256] = {
27
+ static uint8_t yp_encoding_iso_8859_1_table[256] = {
28
28
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
29
29
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
30
30
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -46,7 +46,7 @@ static unsigned char yp_encoding_iso_8859_1_table[256] = {
46
46
 
47
47
  // Each element of the following table contains a bitfield that indicates a
48
48
  // piece of information about the corresponding ISO-8859-2 character.
49
- static unsigned char yp_encoding_iso_8859_2_table[256] = {
49
+ static uint8_t yp_encoding_iso_8859_2_table[256] = {
50
50
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
51
51
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
52
52
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -68,7 +68,7 @@ static unsigned char yp_encoding_iso_8859_2_table[256] = {
68
68
 
69
69
  // Each element of the following table contains a bitfield that indicates a
70
70
  // piece of information about the corresponding ISO-8859-3 character.
71
- static unsigned char yp_encoding_iso_8859_3_table[256] = {
71
+ static uint8_t yp_encoding_iso_8859_3_table[256] = {
72
72
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
73
73
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
74
74
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -90,7 +90,7 @@ static unsigned char yp_encoding_iso_8859_3_table[256] = {
90
90
 
91
91
  // Each element of the following table contains a bitfield that indicates a
92
92
  // piece of information about the corresponding ISO-8859-4 character.
93
- static unsigned char yp_encoding_iso_8859_4_table[256] = {
93
+ static uint8_t yp_encoding_iso_8859_4_table[256] = {
94
94
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
95
95
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
96
96
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -112,7 +112,7 @@ static unsigned char yp_encoding_iso_8859_4_table[256] = {
112
112
 
113
113
  // Each element of the following table contains a bitfield that indicates a
114
114
  // piece of information about the corresponding ISO-8859-5 character.
115
- static unsigned char yp_encoding_iso_8859_5_table[256] = {
115
+ static uint8_t yp_encoding_iso_8859_5_table[256] = {
116
116
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
117
117
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
118
118
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -134,7 +134,7 @@ static unsigned char yp_encoding_iso_8859_5_table[256] = {
134
134
 
135
135
  // Each element of the following table contains a bitfield that indicates a
136
136
  // piece of information about the corresponding ISO-8859-6 character.
137
- static unsigned char yp_encoding_iso_8859_6_table[256] = {
137
+ static uint8_t yp_encoding_iso_8859_6_table[256] = {
138
138
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
139
139
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
140
140
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -156,7 +156,7 @@ static unsigned char yp_encoding_iso_8859_6_table[256] = {
156
156
 
157
157
  // Each element of the following table contains a bitfield that indicates a
158
158
  // piece of information about the corresponding ISO-8859-7 character.
159
- static unsigned char yp_encoding_iso_8859_7_table[256] = {
159
+ static uint8_t yp_encoding_iso_8859_7_table[256] = {
160
160
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
161
161
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
162
162
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -178,7 +178,7 @@ static unsigned char yp_encoding_iso_8859_7_table[256] = {
178
178
 
179
179
  // Each element of the following table contains a bitfield that indicates a
180
180
  // piece of information about the corresponding ISO-8859-8 character.
181
- static unsigned char yp_encoding_iso_8859_8_table[256] = {
181
+ static uint8_t yp_encoding_iso_8859_8_table[256] = {
182
182
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
183
183
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
184
184
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -200,7 +200,7 @@ static unsigned char yp_encoding_iso_8859_8_table[256] = {
200
200
 
201
201
  // Each element of the following table contains a bitfield that indicates a
202
202
  // piece of information about the corresponding ISO-8859-9 character.
203
- static unsigned char yp_encoding_iso_8859_9_table[256] = {
203
+ static uint8_t yp_encoding_iso_8859_9_table[256] = {
204
204
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
205
205
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
206
206
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -222,7 +222,7 @@ static unsigned char yp_encoding_iso_8859_9_table[256] = {
222
222
 
223
223
  // Each element of the following table contains a bitfield that indicates a
224
224
  // piece of information about the corresponding ISO-8859-10 character.
225
- static unsigned char yp_encoding_iso_8859_10_table[256] = {
225
+ static uint8_t yp_encoding_iso_8859_10_table[256] = {
226
226
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
227
227
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
228
228
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -244,7 +244,7 @@ static unsigned char yp_encoding_iso_8859_10_table[256] = {
244
244
 
245
245
  // Each element of the following table contains a bitfield that indicates a
246
246
  // piece of information about the corresponding ISO-8859-11 character.
247
- static unsigned char yp_encoding_iso_8859_11_table[256] = {
247
+ static uint8_t yp_encoding_iso_8859_11_table[256] = {
248
248
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
249
249
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
250
250
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -266,7 +266,7 @@ static unsigned char yp_encoding_iso_8859_11_table[256] = {
266
266
 
267
267
  // Each element of the following table contains a bitfield that indicates a
268
268
  // piece of information about the corresponding ISO-8859-13 character.
269
- static unsigned char yp_encoding_iso_8859_13_table[256] = {
269
+ static uint8_t yp_encoding_iso_8859_13_table[256] = {
270
270
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
271
271
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
272
272
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -288,7 +288,7 @@ static unsigned char yp_encoding_iso_8859_13_table[256] = {
288
288
 
289
289
  // Each element of the following table contains a bitfield that indicates a
290
290
  // piece of information about the corresponding ISO-8859-14 character.
291
- static unsigned char yp_encoding_iso_8859_14_table[256] = {
291
+ static uint8_t yp_encoding_iso_8859_14_table[256] = {
292
292
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
293
293
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
294
294
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -310,7 +310,7 @@ static unsigned char yp_encoding_iso_8859_14_table[256] = {
310
310
 
311
311
  // Each element of the following table contains a bitfield that indicates a
312
312
  // piece of information about the corresponding ISO-8859-15 character.
313
- static unsigned char yp_encoding_iso_8859_15_table[256] = {
313
+ static uint8_t yp_encoding_iso_8859_15_table[256] = {
314
314
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
315
315
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
316
316
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -332,7 +332,7 @@ static unsigned char yp_encoding_iso_8859_15_table[256] = {
332
332
 
333
333
  // Each element of the following table contains a bitfield that indicates a
334
334
  // piece of information about the corresponding ISO-8859-16 character.
335
- static unsigned char yp_encoding_iso_8859_16_table[256] = {
335
+ static uint8_t yp_encoding_iso_8859_16_table[256] = {
336
336
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
337
337
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
338
338
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -354,7 +354,7 @@ static unsigned char yp_encoding_iso_8859_16_table[256] = {
354
354
 
355
355
  // Each element of the following table contains a bitfield that indicates a
356
356
  // piece of information about the corresponding KOI8-R character.
357
- static unsigned char yp_encoding_koi8_r_table[256] = {
357
+ static uint8_t yp_encoding_koi8_r_table[256] = {
358
358
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
359
359
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
360
360
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -376,7 +376,7 @@ static unsigned char yp_encoding_koi8_r_table[256] = {
376
376
 
377
377
  // Each element of the following table contains a bitfield that indicates a
378
378
  // piece of information about the corresponding windows-1251 character.
379
- static unsigned char yp_encoding_windows_1251_table[256] = {
379
+ static uint8_t yp_encoding_windows_1251_table[256] = {
380
380
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
381
381
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
382
382
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -398,7 +398,7 @@ static unsigned char yp_encoding_windows_1251_table[256] = {
398
398
 
399
399
  // Each element of the following table contains a bitfield that indicates a
400
400
  // piece of information about the corresponding windows-1252 character.
401
- static unsigned char yp_encoding_windows_1252_table[256] = {
401
+ static uint8_t yp_encoding_windows_1252_table[256] = {
402
402
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
403
403
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
404
404
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -419,34 +419,32 @@ static unsigned char yp_encoding_windows_1252_table[256] = {
419
419
  };
420
420
 
421
421
  static size_t
422
- yp_encoding_ascii_char_width(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
423
- const unsigned char v = (const unsigned char) *c;
424
- return v < 0x80 ? 1 : 0;
422
+ yp_encoding_ascii_char_width(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
423
+ return *b < 0x80 ? 1 : 0;
425
424
  }
426
425
 
427
426
  size_t
428
- yp_encoding_ascii_alpha_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
429
- return (yp_encoding_ascii_table[(const unsigned char) *c] & YP_ENCODING_ALPHABETIC_BIT);
427
+ yp_encoding_ascii_alpha_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
428
+ return (yp_encoding_ascii_table[*b] & YP_ENCODING_ALPHABETIC_BIT);
430
429
  }
431
430
 
432
431
  size_t
433
- yp_encoding_ascii_alnum_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
434
- return (yp_encoding_ascii_table[(const unsigned char) *c] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
432
+ yp_encoding_ascii_alnum_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
433
+ return (yp_encoding_ascii_table[*b] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
435
434
  }
436
435
 
437
436
  bool
438
- yp_encoding_ascii_isupper_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
439
- return (yp_encoding_ascii_table[(const unsigned char) *c] & YP_ENCODING_UPPERCASE_BIT);
437
+ yp_encoding_ascii_isupper_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
438
+ return (yp_encoding_ascii_table[*b] & YP_ENCODING_UPPERCASE_BIT);
440
439
  }
441
440
 
442
441
  static size_t
443
- yp_encoding_koi8_r_char_width(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
444
- const unsigned char v = (const unsigned char) *c;
445
- return ((v >= 0x20 && v <= 0x7E) || (v >= 0x80)) ? 1 : 0;
442
+ yp_encoding_koi8_r_char_width(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
443
+ return ((*b >= 0x20 && *b <= 0x7E) || (*b >= 0x80)) ? 1 : 0;
446
444
  }
447
445
 
448
446
  static size_t
449
- yp_encoding_single_char_width(YP_ATTRIBUTE_UNUSED const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
447
+ yp_encoding_single_char_width(YP_ATTRIBUTE_UNUSED const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
450
448
  return 1;
451
449
  }
452
450
 
@@ -469,14 +467,14 @@ yp_encoding_t yp_encoding_ascii_8bit = {
469
467
  };
470
468
 
471
469
  #define YP_ENCODING_TABLE(s, i, w) \
472
- static size_t yp_encoding_ ##i ## _alpha_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
473
- return (yp_encoding_ ##i ## _table[(const unsigned char) *c] & YP_ENCODING_ALPHABETIC_BIT); \
470
+ static size_t yp_encoding_ ##i ## _alpha_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
471
+ return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_ALPHABETIC_BIT); \
474
472
  } \
475
- static size_t yp_encoding_ ##i ## _alnum_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
476
- return (yp_encoding_ ##i ## _table[(const unsigned char) *c] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
473
+ static size_t yp_encoding_ ##i ## _alnum_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
474
+ return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
477
475
  } \
478
- static bool yp_encoding_ ##i ## _isupper_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
479
- return (yp_encoding_ ##i ## _table[(const unsigned char) *c] & YP_ENCODING_UPPERCASE_BIT); \
476
+ static bool yp_encoding_ ##i ## _isupper_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
477
+ return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_UPPERCASE_BIT); \
480
478
  } \
481
479
  yp_encoding_t yp_encoding_ ##i = { \
482
480
  .name = s, \