yarp 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +48 -1
  3. data/Makefile +5 -1
  4. data/README.md +4 -3
  5. data/config.yml +461 -150
  6. data/docs/configuration.md +1 -0
  7. data/docs/encoding.md +5 -5
  8. data/docs/ruby_api.md +2 -0
  9. data/docs/serialization.md +3 -3
  10. data/docs/testing.md +2 -2
  11. data/ext/yarp/api_node.c +810 -199
  12. data/ext/yarp/extension.c +94 -31
  13. data/ext/yarp/extension.h +2 -2
  14. data/include/yarp/ast.h +653 -150
  15. data/include/yarp/defines.h +2 -1
  16. data/include/yarp/diagnostic.h +3 -3
  17. data/include/yarp/enc/yp_encoding.h +10 -10
  18. data/include/yarp/node.h +10 -0
  19. data/include/yarp/parser.h +19 -19
  20. data/include/yarp/regexp.h +1 -1
  21. data/include/yarp/unescape.h +7 -5
  22. data/include/yarp/util/yp_buffer.h +3 -0
  23. data/include/yarp/util/yp_char.h +16 -16
  24. data/include/yarp/util/yp_constant_pool.h +2 -2
  25. data/include/yarp/util/yp_newline_list.h +7 -4
  26. data/include/yarp/util/yp_string.h +4 -4
  27. data/include/yarp/util/yp_string_list.h +0 -3
  28. data/include/yarp/util/yp_strpbrk.h +1 -1
  29. data/include/yarp/version.h +2 -2
  30. data/include/yarp.h +14 -3
  31. data/lib/yarp/desugar_visitor.rb +204 -0
  32. data/lib/yarp/ffi.rb +27 -1
  33. data/lib/yarp/lex_compat.rb +93 -25
  34. data/lib/yarp/mutation_visitor.rb +683 -0
  35. data/lib/yarp/node.rb +3121 -597
  36. data/lib/yarp/serialize.rb +198 -126
  37. data/lib/yarp.rb +53 -7
  38. data/src/diagnostic.c +1 -1
  39. data/src/enc/yp_big5.c +15 -42
  40. data/src/enc/yp_euc_jp.c +16 -43
  41. data/src/enc/yp_gbk.c +19 -46
  42. data/src/enc/yp_shift_jis.c +16 -43
  43. data/src/enc/yp_tables.c +36 -38
  44. data/src/enc/yp_unicode.c +20 -25
  45. data/src/enc/yp_windows_31j.c +16 -43
  46. data/src/node.c +1444 -836
  47. data/src/prettyprint.c +324 -103
  48. data/src/regexp.c +21 -21
  49. data/src/serialize.c +429 -276
  50. data/src/token_type.c +2 -2
  51. data/src/unescape.c +184 -136
  52. data/src/util/yp_buffer.c +7 -2
  53. data/src/util/yp_char.c +34 -34
  54. data/src/util/yp_constant_pool.c +4 -4
  55. data/src/util/yp_memchr.c +1 -1
  56. data/src/util/yp_newline_list.c +14 -3
  57. data/src/util/yp_string.c +22 -20
  58. data/src/util/yp_string_list.c +0 -6
  59. data/src/util/yp_strncasecmp.c +3 -6
  60. data/src/util/yp_strpbrk.c +8 -8
  61. data/src/yarp.c +1504 -615
  62. data/yarp.gemspec +3 -1
  63. metadata +4 -2
data/src/enc/yp_big5.c CHANGED
@@ -1,69 +1,42 @@
1
1
  #include "yarp/enc/yp_encoding.h"
2
2
 
3
- typedef uint16_t yp_big5_codepoint_t;
4
-
5
- static yp_big5_codepoint_t
6
- yp_big5_codepoint(const char *c, ptrdiff_t n, size_t *width) {
7
- const unsigned char *uc = (const unsigned char *) c;
8
-
3
+ static size_t
4
+ yp_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
9
5
  // These are the single byte characters.
10
- if (*uc < 0x80) {
11
- *width = 1;
12
- return *uc;
6
+ if (*b < 0x80) {
7
+ return 1;
13
8
  }
14
9
 
15
10
  // These are the double byte characters.
16
- if ((n > 1) && (uc[0] >= 0xA1 && uc[0] <= 0xFE) && (uc[1] >= 0x40 && uc[1] <= 0xFE)) {
17
- *width = 2;
18
- return (yp_big5_codepoint_t) (uc[0] << 8 | uc[1]);
11
+ if ((n > 1) && (b[0] >= 0xA1 && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xFE)) {
12
+ return 2;
19
13
  }
20
14
 
21
- *width = 0;
22
15
  return 0;
23
16
  }
24
17
 
25
18
  static size_t
26
- yp_encoding_big5_char_width(const char *c, ptrdiff_t n) {
27
- size_t width;
28
- yp_big5_codepoint(c, n, &width);
29
-
30
- return width;
31
- }
32
-
33
- static size_t
34
- yp_encoding_big5_alpha_char(const char *c, ptrdiff_t n) {
35
- size_t width;
36
- yp_big5_codepoint_t codepoint = yp_big5_codepoint(c, n, &width);
37
-
38
- if (width == 1) {
39
- const char value = (const char) codepoint;
40
- return yp_encoding_ascii_alpha_char(&value, n);
19
+ yp_encoding_big5_alpha_char(const uint8_t *b, ptrdiff_t n) {
20
+ if (yp_encoding_big5_char_width(b, n) == 1) {
21
+ return yp_encoding_ascii_alpha_char(b, n);
41
22
  } else {
42
23
  return 0;
43
24
  }
44
25
  }
45
26
 
46
27
  static size_t
47
- yp_encoding_big5_alnum_char(const char *c, ptrdiff_t n) {
48
- size_t width;
49
- yp_big5_codepoint_t codepoint = yp_big5_codepoint(c, n, &width);
50
-
51
- if (width == 1) {
52
- const char value = (const char) codepoint;
53
- return yp_encoding_ascii_alnum_char(&value, n);
28
+ yp_encoding_big5_alnum_char(const uint8_t *b, ptrdiff_t n) {
29
+ if (yp_encoding_big5_char_width(b, n) == 1) {
30
+ return yp_encoding_ascii_alnum_char(b, n);
54
31
  } else {
55
32
  return 0;
56
33
  }
57
34
  }
58
35
 
59
36
  static bool
60
- yp_encoding_big5_isupper_char(const char *c, ptrdiff_t n) {
61
- size_t width;
62
- yp_big5_codepoint_t codepoint = yp_big5_codepoint(c, n, &width);
63
-
64
- if (width == 1) {
65
- const char value = (const char) codepoint;
66
- return yp_encoding_ascii_isupper_char(&value, n);
37
+ yp_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
38
+ if (yp_encoding_big5_char_width(b, n) == 1) {
39
+ return yp_encoding_ascii_isupper_char(b, n);
67
40
  } else {
68
41
  return false;
69
42
  }
data/src/enc/yp_euc_jp.c CHANGED
@@ -1,75 +1,48 @@
1
1
  #include "yarp/enc/yp_encoding.h"
2
2
 
3
- typedef uint16_t yp_euc_jp_codepoint_t;
4
-
5
- static yp_euc_jp_codepoint_t
6
- yp_euc_jp_codepoint(const char *c, ptrdiff_t n, size_t *width) {
7
- const unsigned char *uc = (const unsigned char *) c;
8
-
3
+ static size_t
4
+ yp_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
9
5
  // These are the single byte characters.
10
- if (*uc < 0x80) {
11
- *width = 1;
12
- return *uc;
6
+ if (*b < 0x80) {
7
+ return 1;
13
8
  }
14
9
 
15
10
  // These are the double byte characters.
16
11
  if (
17
12
  (n > 1) &&
18
13
  (
19
- ((uc[0] == 0x8E) && (uc[1] >= 0xA1 && uc[1] <= 0xFE)) ||
20
- ((uc[0] >= 0xA1 && uc[0] <= 0xFE) && (uc[1] >= 0xA1 && uc[1] <= 0xFE))
14
+ ((b[0] == 0x8E) && (b[1] >= 0xA1 && b[1] <= 0xFE)) ||
15
+ ((b[0] >= 0xA1 && b[0] <= 0xFE) && (b[1] >= 0xA1 && b[1] <= 0xFE))
21
16
  )
22
17
  ) {
23
- *width = 2;
24
- return (yp_euc_jp_codepoint_t) (uc[0] << 8 | uc[1]);
18
+ return 2;
25
19
  }
26
20
 
27
- *width = 0;
28
21
  return 0;
29
22
  }
30
23
 
31
24
  static size_t
32
- yp_encoding_euc_jp_char_width(const char *c, ptrdiff_t n) {
33
- size_t width;
34
- yp_euc_jp_codepoint(c, n, &width);
35
-
36
- return width;
37
- }
38
-
39
- static size_t
40
- yp_encoding_euc_jp_alpha_char(const char *c, ptrdiff_t n) {
41
- size_t width;
42
- yp_euc_jp_codepoint_t codepoint = yp_euc_jp_codepoint(c, n, &width);
43
-
44
- if (width == 1) {
45
- const char value = (const char) codepoint;
46
- return yp_encoding_ascii_alpha_char(&value, n);
25
+ yp_encoding_euc_jp_alpha_char(const uint8_t *b, ptrdiff_t n) {
26
+ if (yp_encoding_euc_jp_char_width(b, n) == 1) {
27
+ return yp_encoding_ascii_alpha_char(b, n);
47
28
  } else {
48
29
  return 0;
49
30
  }
50
31
  }
51
32
 
52
33
  static size_t
53
- yp_encoding_euc_jp_alnum_char(const char *c, ptrdiff_t n) {
54
- size_t width;
55
- yp_euc_jp_codepoint_t codepoint = yp_euc_jp_codepoint(c, n, &width);
56
-
57
- if (width == 1) {
58
- const char value = (const char) codepoint;
59
- return yp_encoding_ascii_alnum_char(&value, n);
34
+ yp_encoding_euc_jp_alnum_char(const uint8_t *b, ptrdiff_t n) {
35
+ if (yp_encoding_euc_jp_char_width(b, n) == 1) {
36
+ return yp_encoding_ascii_alnum_char(b, n);
60
37
  } else {
61
38
  return 0;
62
39
  }
63
40
  }
64
41
 
65
42
  static bool
66
- yp_encoding_euc_jp_isupper_char(const char *c, ptrdiff_t n) {
67
- size_t width;
68
- yp_euc_jp_codepoint_t codepoint = yp_euc_jp_codepoint(c, n, &width);
69
-
70
- if (width == 1) {
71
- const char value = (const char) codepoint;
72
- return yp_encoding_ascii_isupper_char(&value, n);
43
+ yp_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
44
+ if (yp_encoding_euc_jp_char_width(b, n) == 1) {
45
+ return yp_encoding_ascii_isupper_char(b, n);
73
46
  } else {
74
47
  return 0;
75
48
  }
data/src/enc/yp_gbk.c CHANGED
@@ -1,78 +1,51 @@
1
1
  #include "yarp/enc/yp_encoding.h"
2
2
 
3
- typedef uint16_t yp_gbk_codepoint_t;
4
-
5
- static yp_gbk_codepoint_t
6
- yp_gbk_codepoint(const char *c, ptrdiff_t n, size_t *width) {
7
- const unsigned char *uc = (const unsigned char *) c;
8
-
3
+ static size_t
4
+ yp_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
9
5
  // These are the single byte characters.
10
- if (*uc < 0x80) {
11
- *width = 1;
12
- return *uc;
6
+ if (*b < 0x80) {
7
+ return 1;
13
8
  }
14
9
 
15
10
  // These are the double byte characters.
16
11
  if (
17
12
  (n > 1) &&
18
13
  (
19
- ((uc[0] >= 0xA1 && uc[0] <= 0xA9) && (uc[1] >= 0xA1 && uc[1] <= 0xFE)) || // GBK/1
20
- ((uc[0] >= 0xB0 && uc[0] <= 0xF7) && (uc[1] >= 0xA1 && uc[1] <= 0xFE)) || // GBK/2
21
- ((uc[0] >= 0x81 && uc[0] <= 0xA0) && (uc[1] >= 0x40 && uc[1] <= 0xFE) && (uc[1] != 0x7F)) || // GBK/3
22
- ((uc[0] >= 0xAA && uc[0] <= 0xFE) && (uc[1] >= 0x40 && uc[1] <= 0xA0) && (uc[1] != 0x7F)) || // GBK/4
23
- ((uc[0] >= 0xA8 && uc[0] <= 0xA9) && (uc[1] >= 0x40 && uc[1] <= 0xA0) && (uc[1] != 0x7F)) // GBK/5
14
+ ((b[0] >= 0xA1 && b[0] <= 0xA9) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/1
15
+ ((b[0] >= 0xB0 && b[0] <= 0xF7) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/2
16
+ ((b[0] >= 0x81 && b[0] <= 0xA0) && (b[1] >= 0x40 && b[1] <= 0xFE) && (b[1] != 0x7F)) || // GBK/3
17
+ ((b[0] >= 0xAA && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) || // GBK/4
18
+ ((b[0] >= 0xA8 && b[0] <= 0xA9) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) // GBK/5
24
19
  )
25
20
  ) {
26
- *width = 2;
27
- return (yp_gbk_codepoint_t) (uc[0] << 8 | uc[1]);
21
+ return 2;
28
22
  }
29
23
 
30
- *width = 0;
31
24
  return 0;
32
25
  }
33
26
 
34
27
  static size_t
35
- yp_encoding_gbk_char_width(const char *c, ptrdiff_t n) {
36
- size_t width;
37
- yp_gbk_codepoint(c, n, &width);
38
-
39
- return width;
40
- }
41
-
42
- static size_t
43
- yp_encoding_gbk_alpha_char(const char *c, ptrdiff_t n) {
44
- size_t width;
45
- yp_gbk_codepoint_t codepoint = yp_gbk_codepoint(c, n, &width);
46
-
47
- if (width == 1) {
48
- const char value = (const char) codepoint;
49
- return yp_encoding_ascii_alpha_char(&value, n);
28
+ yp_encoding_gbk_alpha_char(const uint8_t *b, ptrdiff_t n) {
29
+ if (yp_encoding_gbk_char_width(b, n) == 1) {
30
+ return yp_encoding_ascii_alpha_char(b, n);
50
31
  } else {
51
32
  return 0;
52
33
  }
53
34
  }
54
35
 
55
36
  static size_t
56
- yp_encoding_gbk_alnum_char(const char *c, ptrdiff_t n) {
57
- size_t width;
58
- yp_gbk_codepoint_t codepoint = yp_gbk_codepoint(c, n, &width);
59
-
60
- if (width == 1) {
61
- const char value = (const char) codepoint;
62
- return yp_encoding_ascii_alnum_char(&value, n);
37
+ yp_encoding_gbk_alnum_char(const uint8_t *b, ptrdiff_t n) {
38
+ if (yp_encoding_gbk_char_width(b, n) == 1) {
39
+ return yp_encoding_ascii_alnum_char(b, n);
63
40
  } else {
64
41
  return 0;
65
42
  }
66
43
  }
67
44
 
68
45
  static bool
69
- yp_encoding_gbk_isupper_char(const char *c, ptrdiff_t n) {
70
- size_t width;
71
- yp_gbk_codepoint_t codepoint = yp_gbk_codepoint(c, n, &width);
72
-
73
- if (width == 1) {
74
- const char value = (const char) codepoint;
75
- return yp_encoding_ascii_isupper_char(&value, n);
46
+ yp_encoding_gbk_isupper_char(const uint8_t *b, ptrdiff_t n) {
47
+ if (yp_encoding_gbk_char_width(b, n) == 1) {
48
+ return yp_encoding_ascii_isupper_char(b, n);
76
49
  } else {
77
50
  return false;
78
51
  }
@@ -1,73 +1,46 @@
1
1
  #include "yarp/enc/yp_encoding.h"
2
2
 
3
- typedef uint16_t yp_shift_jis_codepoint_t;
4
-
5
- static yp_shift_jis_codepoint_t
6
- yp_shift_jis_codepoint(const char *c, ptrdiff_t n, size_t *width) {
7
- const unsigned char *uc = (const unsigned char *) c;
8
-
3
+ static size_t
4
+ yp_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) {
9
5
  // These are the single byte characters.
10
- if (*uc < 0x80 || (*uc >= 0xA1 && *uc <= 0xDF)) {
11
- *width = 1;
12
- return *uc;
6
+ if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
7
+ return 1;
13
8
  }
14
9
 
15
10
  // These are the double byte characters.
16
11
  if (
17
12
  (n > 1) &&
18
- ((uc[0] >= 0x81 && uc[0] <= 0x9F) || (uc[0] >= 0xE0 && uc[0] <= 0xFC)) &&
19
- (uc[1] >= 0x40 && uc[1] <= 0xFC)
13
+ ((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) &&
14
+ (b[1] >= 0x40 && b[1] <= 0xFC)
20
15
  ) {
21
- *width = 2;
22
- return (yp_shift_jis_codepoint_t) (uc[0] << 8 | uc[1]);
16
+ return 2;
23
17
  }
24
18
 
25
- *width = 0;
26
19
  return 0;
27
20
  }
28
21
 
29
22
  static size_t
30
- yp_encoding_shift_jis_char_width(const char *c, ptrdiff_t n) {
31
- size_t width;
32
- yp_shift_jis_codepoint(c, n, &width);
33
-
34
- return width;
35
- }
36
-
37
- static size_t
38
- yp_encoding_shift_jis_alpha_char(const char *c, ptrdiff_t n) {
39
- size_t width;
40
- yp_shift_jis_codepoint_t codepoint = yp_shift_jis_codepoint(c, n, &width);
41
-
42
- if (width == 1) {
43
- const char value = (const char) codepoint;
44
- return yp_encoding_ascii_alpha_char(&value, n);
23
+ yp_encoding_shift_jis_alpha_char(const uint8_t *b, ptrdiff_t n) {
24
+ if (yp_encoding_shift_jis_char_width(b, n) == 1) {
25
+ return yp_encoding_ascii_alpha_char(b, n);
45
26
  } else {
46
27
  return 0;
47
28
  }
48
29
  }
49
30
 
50
31
  static size_t
51
- yp_encoding_shift_jis_alnum_char(const char *c, ptrdiff_t n) {
52
- size_t width;
53
- yp_shift_jis_codepoint_t codepoint = yp_shift_jis_codepoint(c, n, &width);
54
-
55
- if (width == 1) {
56
- const char value = (const char) codepoint;
57
- return yp_encoding_ascii_alnum_char(&value, n);
32
+ yp_encoding_shift_jis_alnum_char(const uint8_t *b, ptrdiff_t n) {
33
+ if (yp_encoding_shift_jis_char_width(b, n) == 1) {
34
+ return yp_encoding_ascii_alnum_char(b, n);
58
35
  } else {
59
36
  return 0;
60
37
  }
61
38
  }
62
39
 
63
40
  static bool
64
- yp_encoding_shift_jis_isupper_char(const char *c, ptrdiff_t n) {
65
- size_t width;
66
- yp_shift_jis_codepoint_t codepoint = yp_shift_jis_codepoint(c, n, &width);
67
-
68
- if (width == 1) {
69
- const char value = (const char) codepoint;
70
- return yp_encoding_ascii_isupper_char(&value, n);
41
+ yp_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
42
+ if (yp_encoding_shift_jis_char_width(b, n) == 1) {
43
+ return yp_encoding_ascii_isupper_char(b, n);
71
44
  } else {
72
45
  return 0;
73
46
  }
data/src/enc/yp_tables.c CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  // Each element of the following table contains a bitfield that indicates a
4
4
  // piece of information about the corresponding ASCII character.
5
- static unsigned char yp_encoding_ascii_table[256] = {
5
+ static uint8_t yp_encoding_ascii_table[256] = {
6
6
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
7
7
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
8
8
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -24,7 +24,7 @@ static unsigned char yp_encoding_ascii_table[256] = {
24
24
 
25
25
  // Each element of the following table contains a bitfield that indicates a
26
26
  // piece of information about the corresponding ISO-8859-1 character.
27
- static unsigned char yp_encoding_iso_8859_1_table[256] = {
27
+ static uint8_t yp_encoding_iso_8859_1_table[256] = {
28
28
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
29
29
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
30
30
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -46,7 +46,7 @@ static unsigned char yp_encoding_iso_8859_1_table[256] = {
46
46
 
47
47
  // Each element of the following table contains a bitfield that indicates a
48
48
  // piece of information about the corresponding ISO-8859-2 character.
49
- static unsigned char yp_encoding_iso_8859_2_table[256] = {
49
+ static uint8_t yp_encoding_iso_8859_2_table[256] = {
50
50
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
51
51
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
52
52
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -68,7 +68,7 @@ static unsigned char yp_encoding_iso_8859_2_table[256] = {
68
68
 
69
69
  // Each element of the following table contains a bitfield that indicates a
70
70
  // piece of information about the corresponding ISO-8859-3 character.
71
- static unsigned char yp_encoding_iso_8859_3_table[256] = {
71
+ static uint8_t yp_encoding_iso_8859_3_table[256] = {
72
72
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
73
73
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
74
74
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -90,7 +90,7 @@ static unsigned char yp_encoding_iso_8859_3_table[256] = {
90
90
 
91
91
  // Each element of the following table contains a bitfield that indicates a
92
92
  // piece of information about the corresponding ISO-8859-4 character.
93
- static unsigned char yp_encoding_iso_8859_4_table[256] = {
93
+ static uint8_t yp_encoding_iso_8859_4_table[256] = {
94
94
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
95
95
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
96
96
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -112,7 +112,7 @@ static unsigned char yp_encoding_iso_8859_4_table[256] = {
112
112
 
113
113
  // Each element of the following table contains a bitfield that indicates a
114
114
  // piece of information about the corresponding ISO-8859-5 character.
115
- static unsigned char yp_encoding_iso_8859_5_table[256] = {
115
+ static uint8_t yp_encoding_iso_8859_5_table[256] = {
116
116
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
117
117
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
118
118
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -134,7 +134,7 @@ static unsigned char yp_encoding_iso_8859_5_table[256] = {
134
134
 
135
135
  // Each element of the following table contains a bitfield that indicates a
136
136
  // piece of information about the corresponding ISO-8859-6 character.
137
- static unsigned char yp_encoding_iso_8859_6_table[256] = {
137
+ static uint8_t yp_encoding_iso_8859_6_table[256] = {
138
138
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
139
139
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
140
140
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -156,7 +156,7 @@ static unsigned char yp_encoding_iso_8859_6_table[256] = {
156
156
 
157
157
  // Each element of the following table contains a bitfield that indicates a
158
158
  // piece of information about the corresponding ISO-8859-7 character.
159
- static unsigned char yp_encoding_iso_8859_7_table[256] = {
159
+ static uint8_t yp_encoding_iso_8859_7_table[256] = {
160
160
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
161
161
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
162
162
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -178,7 +178,7 @@ static unsigned char yp_encoding_iso_8859_7_table[256] = {
178
178
 
179
179
  // Each element of the following table contains a bitfield that indicates a
180
180
  // piece of information about the corresponding ISO-8859-8 character.
181
- static unsigned char yp_encoding_iso_8859_8_table[256] = {
181
+ static uint8_t yp_encoding_iso_8859_8_table[256] = {
182
182
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
183
183
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
184
184
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -200,7 +200,7 @@ static unsigned char yp_encoding_iso_8859_8_table[256] = {
200
200
 
201
201
  // Each element of the following table contains a bitfield that indicates a
202
202
  // piece of information about the corresponding ISO-8859-9 character.
203
- static unsigned char yp_encoding_iso_8859_9_table[256] = {
203
+ static uint8_t yp_encoding_iso_8859_9_table[256] = {
204
204
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
205
205
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
206
206
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -222,7 +222,7 @@ static unsigned char yp_encoding_iso_8859_9_table[256] = {
222
222
 
223
223
  // Each element of the following table contains a bitfield that indicates a
224
224
  // piece of information about the corresponding ISO-8859-10 character.
225
- static unsigned char yp_encoding_iso_8859_10_table[256] = {
225
+ static uint8_t yp_encoding_iso_8859_10_table[256] = {
226
226
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
227
227
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
228
228
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -244,7 +244,7 @@ static unsigned char yp_encoding_iso_8859_10_table[256] = {
244
244
 
245
245
  // Each element of the following table contains a bitfield that indicates a
246
246
  // piece of information about the corresponding ISO-8859-11 character.
247
- static unsigned char yp_encoding_iso_8859_11_table[256] = {
247
+ static uint8_t yp_encoding_iso_8859_11_table[256] = {
248
248
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
249
249
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
250
250
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -266,7 +266,7 @@ static unsigned char yp_encoding_iso_8859_11_table[256] = {
266
266
 
267
267
  // Each element of the following table contains a bitfield that indicates a
268
268
  // piece of information about the corresponding ISO-8859-13 character.
269
- static unsigned char yp_encoding_iso_8859_13_table[256] = {
269
+ static uint8_t yp_encoding_iso_8859_13_table[256] = {
270
270
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
271
271
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
272
272
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -288,7 +288,7 @@ static unsigned char yp_encoding_iso_8859_13_table[256] = {
288
288
 
289
289
  // Each element of the following table contains a bitfield that indicates a
290
290
  // piece of information about the corresponding ISO-8859-14 character.
291
- static unsigned char yp_encoding_iso_8859_14_table[256] = {
291
+ static uint8_t yp_encoding_iso_8859_14_table[256] = {
292
292
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
293
293
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
294
294
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -310,7 +310,7 @@ static unsigned char yp_encoding_iso_8859_14_table[256] = {
310
310
 
311
311
  // Each element of the following table contains a bitfield that indicates a
312
312
  // piece of information about the corresponding ISO-8859-15 character.
313
- static unsigned char yp_encoding_iso_8859_15_table[256] = {
313
+ static uint8_t yp_encoding_iso_8859_15_table[256] = {
314
314
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
315
315
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
316
316
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -332,7 +332,7 @@ static unsigned char yp_encoding_iso_8859_15_table[256] = {
332
332
 
333
333
  // Each element of the following table contains a bitfield that indicates a
334
334
  // piece of information about the corresponding ISO-8859-16 character.
335
- static unsigned char yp_encoding_iso_8859_16_table[256] = {
335
+ static uint8_t yp_encoding_iso_8859_16_table[256] = {
336
336
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
337
337
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
338
338
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -354,7 +354,7 @@ static unsigned char yp_encoding_iso_8859_16_table[256] = {
354
354
 
355
355
  // Each element of the following table contains a bitfield that indicates a
356
356
  // piece of information about the corresponding KOI8-R character.
357
- static unsigned char yp_encoding_koi8_r_table[256] = {
357
+ static uint8_t yp_encoding_koi8_r_table[256] = {
358
358
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
359
359
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
360
360
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -376,7 +376,7 @@ static unsigned char yp_encoding_koi8_r_table[256] = {
376
376
 
377
377
  // Each element of the following table contains a bitfield that indicates a
378
378
  // piece of information about the corresponding windows-1251 character.
379
- static unsigned char yp_encoding_windows_1251_table[256] = {
379
+ static uint8_t yp_encoding_windows_1251_table[256] = {
380
380
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
381
381
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
382
382
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -398,7 +398,7 @@ static unsigned char yp_encoding_windows_1251_table[256] = {
398
398
 
399
399
  // Each element of the following table contains a bitfield that indicates a
400
400
  // piece of information about the corresponding windows-1252 character.
401
- static unsigned char yp_encoding_windows_1252_table[256] = {
401
+ static uint8_t yp_encoding_windows_1252_table[256] = {
402
402
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
403
403
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
404
404
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -419,34 +419,32 @@ static unsigned char yp_encoding_windows_1252_table[256] = {
419
419
  };
420
420
 
421
421
  static size_t
422
- yp_encoding_ascii_char_width(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
423
- const unsigned char v = (const unsigned char) *c;
424
- return v < 0x80 ? 1 : 0;
422
+ yp_encoding_ascii_char_width(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
423
+ return *b < 0x80 ? 1 : 0;
425
424
  }
426
425
 
427
426
  size_t
428
- yp_encoding_ascii_alpha_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
429
- return (yp_encoding_ascii_table[(const unsigned char) *c] & YP_ENCODING_ALPHABETIC_BIT);
427
+ yp_encoding_ascii_alpha_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
428
+ return (yp_encoding_ascii_table[*b] & YP_ENCODING_ALPHABETIC_BIT);
430
429
  }
431
430
 
432
431
  size_t
433
- yp_encoding_ascii_alnum_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
434
- return (yp_encoding_ascii_table[(const unsigned char) *c] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
432
+ yp_encoding_ascii_alnum_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
433
+ return (yp_encoding_ascii_table[*b] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
435
434
  }
436
435
 
437
436
  bool
438
- yp_encoding_ascii_isupper_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
439
- return (yp_encoding_ascii_table[(const unsigned char) *c] & YP_ENCODING_UPPERCASE_BIT);
437
+ yp_encoding_ascii_isupper_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
438
+ return (yp_encoding_ascii_table[*b] & YP_ENCODING_UPPERCASE_BIT);
440
439
  }
441
440
 
442
441
  static size_t
443
- yp_encoding_koi8_r_char_width(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
444
- const unsigned char v = (const unsigned char) *c;
445
- return ((v >= 0x20 && v <= 0x7E) || (v >= 0x80)) ? 1 : 0;
442
+ yp_encoding_koi8_r_char_width(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
443
+ return ((*b >= 0x20 && *b <= 0x7E) || (*b >= 0x80)) ? 1 : 0;
446
444
  }
447
445
 
448
446
  static size_t
449
- yp_encoding_single_char_width(YP_ATTRIBUTE_UNUSED const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
447
+ yp_encoding_single_char_width(YP_ATTRIBUTE_UNUSED const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
450
448
  return 1;
451
449
  }
452
450
 
@@ -469,14 +467,14 @@ yp_encoding_t yp_encoding_ascii_8bit = {
469
467
  };
470
468
 
471
469
  #define YP_ENCODING_TABLE(s, i, w) \
472
- static size_t yp_encoding_ ##i ## _alpha_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
473
- return (yp_encoding_ ##i ## _table[(const unsigned char) *c] & YP_ENCODING_ALPHABETIC_BIT); \
470
+ static size_t yp_encoding_ ##i ## _alpha_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
471
+ return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_ALPHABETIC_BIT); \
474
472
  } \
475
- static size_t yp_encoding_ ##i ## _alnum_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
476
- return (yp_encoding_ ##i ## _table[(const unsigned char) *c] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
473
+ static size_t yp_encoding_ ##i ## _alnum_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
474
+ return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
477
475
  } \
478
- static bool yp_encoding_ ##i ## _isupper_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
479
- return (yp_encoding_ ##i ## _table[(const unsigned char) *c] & YP_ENCODING_UPPERCASE_BIT); \
476
+ static bool yp_encoding_ ##i ## _isupper_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
477
+ return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_UPPERCASE_BIT); \
480
478
  } \
481
479
  yp_encoding_t yp_encoding_ ##i = { \
482
480
  .name = s, \