yarp 0.9.0 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +15 -1
  3. data/Makefile +5 -1
  4. data/config.yml +156 -125
  5. data/docs/encoding.md +5 -5
  6. data/docs/serialization.md +2 -2
  7. data/ext/yarp/api_node.c +142 -98
  8. data/ext/yarp/extension.c +21 -7
  9. data/ext/yarp/extension.h +1 -1
  10. data/include/yarp/ast.h +327 -18
  11. data/include/yarp/defines.h +2 -1
  12. data/include/yarp/diagnostic.h +3 -3
  13. data/include/yarp/enc/yp_encoding.h +10 -10
  14. data/include/yarp/parser.h +19 -19
  15. data/include/yarp/regexp.h +1 -1
  16. data/include/yarp/unescape.h +4 -4
  17. data/include/yarp/util/yp_buffer.h +3 -0
  18. data/include/yarp/util/yp_char.h +16 -16
  19. data/include/yarp/util/yp_constant_pool.h +2 -2
  20. data/include/yarp/util/yp_newline_list.h +5 -5
  21. data/include/yarp/util/yp_string.h +4 -4
  22. data/include/yarp/util/yp_string_list.h +0 -3
  23. data/include/yarp/util/yp_strpbrk.h +1 -1
  24. data/include/yarp/version.h +2 -2
  25. data/include/yarp.h +5 -4
  26. data/lib/yarp/desugar_visitor.rb +59 -122
  27. data/lib/yarp/node.rb +230 -240
  28. data/lib/yarp/serialize.rb +16 -16
  29. data/lib/yarp.rb +5 -5
  30. data/src/diagnostic.c +1 -1
  31. data/src/enc/yp_big5.c +15 -42
  32. data/src/enc/yp_euc_jp.c +16 -43
  33. data/src/enc/yp_gbk.c +19 -46
  34. data/src/enc/yp_shift_jis.c +16 -43
  35. data/src/enc/yp_tables.c +36 -38
  36. data/src/enc/yp_unicode.c +20 -25
  37. data/src/enc/yp_windows_31j.c +16 -43
  38. data/src/node.c +1271 -899
  39. data/src/prettyprint.c +87 -48
  40. data/src/regexp.c +21 -21
  41. data/src/serialize.c +28 -15
  42. data/src/unescape.c +151 -121
  43. data/src/util/yp_buffer.c +7 -2
  44. data/src/util/yp_char.c +34 -34
  45. data/src/util/yp_constant_pool.c +4 -4
  46. data/src/util/yp_memchr.c +1 -1
  47. data/src/util/yp_newline_list.c +5 -4
  48. data/src/util/yp_string.c +22 -20
  49. data/src/util/yp_string_list.c +0 -6
  50. data/src/util/yp_strncasecmp.c +3 -6
  51. data/src/util/yp_strpbrk.c +8 -8
  52. data/src/yarp.c +355 -216
  53. data/yarp.gemspec +1 -1
  54. metadata +2 -2
data/src/enc/yp_tables.c CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  // Each element of the following table contains a bitfield that indicates a
4
4
  // piece of information about the corresponding ASCII character.
5
- static unsigned char yp_encoding_ascii_table[256] = {
5
+ static uint8_t yp_encoding_ascii_table[256] = {
6
6
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
7
7
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
8
8
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -24,7 +24,7 @@ static unsigned char yp_encoding_ascii_table[256] = {
24
24
 
25
25
  // Each element of the following table contains a bitfield that indicates a
26
26
  // piece of information about the corresponding ISO-8859-1 character.
27
- static unsigned char yp_encoding_iso_8859_1_table[256] = {
27
+ static uint8_t yp_encoding_iso_8859_1_table[256] = {
28
28
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
29
29
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
30
30
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -46,7 +46,7 @@ static unsigned char yp_encoding_iso_8859_1_table[256] = {
46
46
 
47
47
  // Each element of the following table contains a bitfield that indicates a
48
48
  // piece of information about the corresponding ISO-8859-2 character.
49
- static unsigned char yp_encoding_iso_8859_2_table[256] = {
49
+ static uint8_t yp_encoding_iso_8859_2_table[256] = {
50
50
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
51
51
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
52
52
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -68,7 +68,7 @@ static unsigned char yp_encoding_iso_8859_2_table[256] = {
68
68
 
69
69
  // Each element of the following table contains a bitfield that indicates a
70
70
  // piece of information about the corresponding ISO-8859-3 character.
71
- static unsigned char yp_encoding_iso_8859_3_table[256] = {
71
+ static uint8_t yp_encoding_iso_8859_3_table[256] = {
72
72
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
73
73
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
74
74
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -90,7 +90,7 @@ static unsigned char yp_encoding_iso_8859_3_table[256] = {
90
90
 
91
91
  // Each element of the following table contains a bitfield that indicates a
92
92
  // piece of information about the corresponding ISO-8859-4 character.
93
- static unsigned char yp_encoding_iso_8859_4_table[256] = {
93
+ static uint8_t yp_encoding_iso_8859_4_table[256] = {
94
94
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
95
95
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
96
96
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -112,7 +112,7 @@ static unsigned char yp_encoding_iso_8859_4_table[256] = {
112
112
 
113
113
  // Each element of the following table contains a bitfield that indicates a
114
114
  // piece of information about the corresponding ISO-8859-5 character.
115
- static unsigned char yp_encoding_iso_8859_5_table[256] = {
115
+ static uint8_t yp_encoding_iso_8859_5_table[256] = {
116
116
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
117
117
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
118
118
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -134,7 +134,7 @@ static unsigned char yp_encoding_iso_8859_5_table[256] = {
134
134
 
135
135
  // Each element of the following table contains a bitfield that indicates a
136
136
  // piece of information about the corresponding ISO-8859-6 character.
137
- static unsigned char yp_encoding_iso_8859_6_table[256] = {
137
+ static uint8_t yp_encoding_iso_8859_6_table[256] = {
138
138
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
139
139
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
140
140
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -156,7 +156,7 @@ static unsigned char yp_encoding_iso_8859_6_table[256] = {
156
156
 
157
157
  // Each element of the following table contains a bitfield that indicates a
158
158
  // piece of information about the corresponding ISO-8859-7 character.
159
- static unsigned char yp_encoding_iso_8859_7_table[256] = {
159
+ static uint8_t yp_encoding_iso_8859_7_table[256] = {
160
160
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
161
161
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
162
162
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -178,7 +178,7 @@ static unsigned char yp_encoding_iso_8859_7_table[256] = {
178
178
 
179
179
  // Each element of the following table contains a bitfield that indicates a
180
180
  // piece of information about the corresponding ISO-8859-8 character.
181
- static unsigned char yp_encoding_iso_8859_8_table[256] = {
181
+ static uint8_t yp_encoding_iso_8859_8_table[256] = {
182
182
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
183
183
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
184
184
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -200,7 +200,7 @@ static unsigned char yp_encoding_iso_8859_8_table[256] = {
200
200
 
201
201
  // Each element of the following table contains a bitfield that indicates a
202
202
  // piece of information about the corresponding ISO-8859-9 character.
203
- static unsigned char yp_encoding_iso_8859_9_table[256] = {
203
+ static uint8_t yp_encoding_iso_8859_9_table[256] = {
204
204
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
205
205
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
206
206
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -222,7 +222,7 @@ static unsigned char yp_encoding_iso_8859_9_table[256] = {
222
222
 
223
223
  // Each element of the following table contains a bitfield that indicates a
224
224
  // piece of information about the corresponding ISO-8859-10 character.
225
- static unsigned char yp_encoding_iso_8859_10_table[256] = {
225
+ static uint8_t yp_encoding_iso_8859_10_table[256] = {
226
226
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
227
227
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
228
228
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -244,7 +244,7 @@ static unsigned char yp_encoding_iso_8859_10_table[256] = {
244
244
 
245
245
  // Each element of the following table contains a bitfield that indicates a
246
246
  // piece of information about the corresponding ISO-8859-11 character.
247
- static unsigned char yp_encoding_iso_8859_11_table[256] = {
247
+ static uint8_t yp_encoding_iso_8859_11_table[256] = {
248
248
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
249
249
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
250
250
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -266,7 +266,7 @@ static unsigned char yp_encoding_iso_8859_11_table[256] = {
266
266
 
267
267
  // Each element of the following table contains a bitfield that indicates a
268
268
  // piece of information about the corresponding ISO-8859-13 character.
269
- static unsigned char yp_encoding_iso_8859_13_table[256] = {
269
+ static uint8_t yp_encoding_iso_8859_13_table[256] = {
270
270
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
271
271
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
272
272
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -288,7 +288,7 @@ static unsigned char yp_encoding_iso_8859_13_table[256] = {
288
288
 
289
289
  // Each element of the following table contains a bitfield that indicates a
290
290
  // piece of information about the corresponding ISO-8859-14 character.
291
- static unsigned char yp_encoding_iso_8859_14_table[256] = {
291
+ static uint8_t yp_encoding_iso_8859_14_table[256] = {
292
292
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
293
293
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
294
294
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -310,7 +310,7 @@ static unsigned char yp_encoding_iso_8859_14_table[256] = {
310
310
 
311
311
  // Each element of the following table contains a bitfield that indicates a
312
312
  // piece of information about the corresponding ISO-8859-15 character.
313
- static unsigned char yp_encoding_iso_8859_15_table[256] = {
313
+ static uint8_t yp_encoding_iso_8859_15_table[256] = {
314
314
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
315
315
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
316
316
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -332,7 +332,7 @@ static unsigned char yp_encoding_iso_8859_15_table[256] = {
332
332
 
333
333
  // Each element of the following table contains a bitfield that indicates a
334
334
  // piece of information about the corresponding ISO-8859-16 character.
335
- static unsigned char yp_encoding_iso_8859_16_table[256] = {
335
+ static uint8_t yp_encoding_iso_8859_16_table[256] = {
336
336
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
337
337
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
338
338
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -354,7 +354,7 @@ static unsigned char yp_encoding_iso_8859_16_table[256] = {
354
354
 
355
355
  // Each element of the following table contains a bitfield that indicates a
356
356
  // piece of information about the corresponding KOI8-R character.
357
- static unsigned char yp_encoding_koi8_r_table[256] = {
357
+ static uint8_t yp_encoding_koi8_r_table[256] = {
358
358
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
359
359
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
360
360
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -376,7 +376,7 @@ static unsigned char yp_encoding_koi8_r_table[256] = {
376
376
 
377
377
  // Each element of the following table contains a bitfield that indicates a
378
378
  // piece of information about the corresponding windows-1251 character.
379
- static unsigned char yp_encoding_windows_1251_table[256] = {
379
+ static uint8_t yp_encoding_windows_1251_table[256] = {
380
380
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
381
381
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
382
382
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -398,7 +398,7 @@ static unsigned char yp_encoding_windows_1251_table[256] = {
398
398
 
399
399
  // Each element of the following table contains a bitfield that indicates a
400
400
  // piece of information about the corresponding windows-1252 character.
401
- static unsigned char yp_encoding_windows_1252_table[256] = {
401
+ static uint8_t yp_encoding_windows_1252_table[256] = {
402
402
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
403
403
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
404
404
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -419,34 +419,32 @@ static unsigned char yp_encoding_windows_1252_table[256] = {
419
419
  };
420
420
 
421
421
  static size_t
422
- yp_encoding_ascii_char_width(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
423
- const unsigned char v = (const unsigned char) *c;
424
- return v < 0x80 ? 1 : 0;
422
+ yp_encoding_ascii_char_width(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
423
+ return *b < 0x80 ? 1 : 0;
425
424
  }
426
425
 
427
426
  size_t
428
- yp_encoding_ascii_alpha_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
429
- return (yp_encoding_ascii_table[(const unsigned char) *c] & YP_ENCODING_ALPHABETIC_BIT);
427
+ yp_encoding_ascii_alpha_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
428
+ return (yp_encoding_ascii_table[*b] & YP_ENCODING_ALPHABETIC_BIT);
430
429
  }
431
430
 
432
431
  size_t
433
- yp_encoding_ascii_alnum_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
434
- return (yp_encoding_ascii_table[(const unsigned char) *c] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
432
+ yp_encoding_ascii_alnum_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
433
+ return (yp_encoding_ascii_table[*b] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
435
434
  }
436
435
 
437
436
  bool
438
- yp_encoding_ascii_isupper_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
439
- return (yp_encoding_ascii_table[(const unsigned char) *c] & YP_ENCODING_UPPERCASE_BIT);
437
+ yp_encoding_ascii_isupper_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
438
+ return (yp_encoding_ascii_table[*b] & YP_ENCODING_UPPERCASE_BIT);
440
439
  }
441
440
 
442
441
  static size_t
443
- yp_encoding_koi8_r_char_width(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
444
- const unsigned char v = (const unsigned char) *c;
445
- return ((v >= 0x20 && v <= 0x7E) || (v >= 0x80)) ? 1 : 0;
442
+ yp_encoding_koi8_r_char_width(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
443
+ return ((*b >= 0x20 && *b <= 0x7E) || (*b >= 0x80)) ? 1 : 0;
446
444
  }
447
445
 
448
446
  static size_t
449
- yp_encoding_single_char_width(YP_ATTRIBUTE_UNUSED const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
447
+ yp_encoding_single_char_width(YP_ATTRIBUTE_UNUSED const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
450
448
  return 1;
451
449
  }
452
450
 
@@ -469,14 +467,14 @@ yp_encoding_t yp_encoding_ascii_8bit = {
469
467
  };
470
468
 
471
469
  #define YP_ENCODING_TABLE(s, i, w) \
472
- static size_t yp_encoding_ ##i ## _alpha_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
473
- return (yp_encoding_ ##i ## _table[(const unsigned char) *c] & YP_ENCODING_ALPHABETIC_BIT); \
470
+ static size_t yp_encoding_ ##i ## _alpha_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
471
+ return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_ALPHABETIC_BIT); \
474
472
  } \
475
- static size_t yp_encoding_ ##i ## _alnum_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
476
- return (yp_encoding_ ##i ## _table[(const unsigned char) *c] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
473
+ static size_t yp_encoding_ ##i ## _alnum_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
474
+ return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
477
475
  } \
478
- static bool yp_encoding_ ##i ## _isupper_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
479
- return (yp_encoding_ ##i ## _table[(const unsigned char) *c] & YP_ENCODING_UPPERCASE_BIT); \
476
+ static bool yp_encoding_ ##i ## _isupper_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
477
+ return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_UPPERCASE_BIT); \
480
478
  } \
481
479
  yp_encoding_t yp_encoding_ ##i = { \
482
480
  .name = s, \
data/src/enc/yp_unicode.c CHANGED
@@ -10,7 +10,7 @@ typedef uint32_t yp_unicode_codepoint_t;
10
10
  // this table is different from other encodings where we used a lookup table
11
11
  // because the indices of those tables are the byte representations, not the
12
12
  // codepoints themselves.
13
- unsigned char yp_encoding_unicode_table[256] = {
13
+ uint8_t yp_encoding_unicode_table[256] = {
14
14
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
15
15
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
16
16
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -2220,7 +2220,7 @@ static const uint8_t yp_utf_8_dfa[] = {
2220
2220
  };
2221
2221
 
2222
2222
  static yp_unicode_codepoint_t
2223
- yp_utf_8_codepoint(const unsigned char *c, ptrdiff_t n, size_t *width) {
2223
+ yp_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
2224
2224
  assert(n >= 1);
2225
2225
  size_t maximum = (size_t) n;
2226
2226
 
@@ -2228,7 +2228,7 @@ yp_utf_8_codepoint(const unsigned char *c, ptrdiff_t n, size_t *width) {
2228
2228
  uint32_t state = 0;
2229
2229
 
2230
2230
  for (size_t index = 0; index < 4 && index < maximum; index++) {
2231
- uint32_t byte = c[index];
2231
+ uint32_t byte = b[index];
2232
2232
  uint32_t type = yp_utf_8_dfa[byte];
2233
2233
 
2234
2234
  codepoint = (state != 0) ?
@@ -2247,60 +2247,55 @@ yp_utf_8_codepoint(const unsigned char *c, ptrdiff_t n, size_t *width) {
2247
2247
  }
2248
2248
 
2249
2249
  static size_t
2250
- yp_encoding_utf_8_char_width(const char *c, ptrdiff_t n) {
2250
+ yp_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
2251
2251
  size_t width;
2252
- const unsigned char *v = (const unsigned char *) c;
2253
-
2254
- yp_utf_8_codepoint(v, n, &width);
2252
+ yp_utf_8_codepoint(b, n, &width);
2255
2253
  return width;
2256
2254
  }
2257
2255
 
2258
2256
  size_t
2259
- yp_encoding_utf_8_alpha_char(const char *c, ptrdiff_t n) {
2260
- const unsigned char *v = (const unsigned char *) c;
2261
- if (*v < 0x80) {
2262
- return (yp_encoding_unicode_table[*v] & YP_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
2257
+ yp_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
2258
+ if (*b < 0x80) {
2259
+ return (yp_encoding_unicode_table[*b] & YP_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
2263
2260
  }
2264
2261
 
2265
2262
  size_t width;
2266
- yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(v, n, &width);
2263
+ yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
2267
2264
 
2268
2265
  if (codepoint <= 0xFF) {
2269
- return (yp_encoding_unicode_table[(unsigned char) codepoint] & YP_ENCODING_ALPHABETIC_BIT) ? width : 0;
2266
+ return (yp_encoding_unicode_table[(uint8_t) codepoint] & YP_ENCODING_ALPHABETIC_BIT) ? width : 0;
2270
2267
  } else {
2271
2268
  return yp_unicode_codepoint_match(codepoint, unicode_alpha_codepoints, UNICODE_ALPHA_CODEPOINTS_LENGTH) ? width : 0;
2272
2269
  }
2273
2270
  }
2274
2271
 
2275
2272
  size_t
2276
- yp_encoding_utf_8_alnum_char(const char *c, ptrdiff_t n) {
2277
- const unsigned char *v = (const unsigned char *) c;
2278
- if (*v < 0x80) {
2279
- return (yp_encoding_unicode_table[*v] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
2273
+ yp_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
2274
+ if (*b < 0x80) {
2275
+ return (yp_encoding_unicode_table[*b] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
2280
2276
  }
2281
2277
 
2282
2278
  size_t width;
2283
- yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(v, n, &width);
2279
+ yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
2284
2280
 
2285
2281
  if (codepoint <= 0xFF) {
2286
- return (yp_encoding_unicode_table[(unsigned char) codepoint] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? width : 0;
2282
+ return (yp_encoding_unicode_table[(uint8_t) codepoint] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? width : 0;
2287
2283
  } else {
2288
2284
  return yp_unicode_codepoint_match(codepoint, unicode_alnum_codepoints, UNICODE_ALNUM_CODEPOINTS_LENGTH) ? width : 0;
2289
2285
  }
2290
2286
  }
2291
2287
 
2292
2288
  static bool
2293
- yp_encoding_utf_8_isupper_char(const char *c, ptrdiff_t n) {
2294
- const unsigned char *v = (const unsigned char *) c;
2295
- if (*v < 0x80) {
2296
- return (yp_encoding_unicode_table[*v] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
2289
+ yp_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
2290
+ if (*b < 0x80) {
2291
+ return (yp_encoding_unicode_table[*b] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
2297
2292
  }
2298
2293
 
2299
2294
  size_t width;
2300
- yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(v, n, &width);
2295
+ yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
2301
2296
 
2302
2297
  if (codepoint <= 0xFF) {
2303
- return (yp_encoding_unicode_table[(unsigned char) codepoint] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
2298
+ return (yp_encoding_unicode_table[(uint8_t) codepoint] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
2304
2299
  } else {
2305
2300
  return yp_unicode_codepoint_match(codepoint, unicode_isupper_codepoints, UNICODE_ISUPPER_CODEPOINTS_LENGTH) ? true : false;
2306
2301
  }
@@ -1,73 +1,46 @@
1
1
  #include "yarp/enc/yp_encoding.h"
2
2
 
3
- typedef uint16_t yp_windows_31j_codepoint_t;
4
-
5
- static yp_windows_31j_codepoint_t
6
- yp_windows_31j_codepoint(const char *c, ptrdiff_t n, size_t *width) {
7
- const unsigned char *uc = (const unsigned char *) c;
8
-
3
+ static size_t
4
+ yp_encoding_windows_31j_char_width(const uint8_t *b, ptrdiff_t n) {
9
5
  // These are the single byte characters.
10
- if (*uc < 0x80 || (*uc >= 0xA1 && *uc <= 0xDF)) {
11
- *width = 1;
12
- return *uc;
6
+ if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
7
+ return 1;
13
8
  }
14
9
 
15
10
  // These are the double byte characters.
16
11
  if (
17
12
  (n > 1) &&
18
- ((uc[0] >= 0x81 && uc[0] <= 0x9F) || (uc[0] >= 0xE0 && uc[0] <= 0xFC)) &&
19
- (uc[1] >= 0x40 && uc[1] <= 0xFC)
13
+ ((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) &&
14
+ (b[1] >= 0x40 && b[1] <= 0xFC)
20
15
  ) {
21
- *width = 2;
22
- return (yp_windows_31j_codepoint_t) (uc[0] << 8 | uc[1]);
16
+ return 2;
23
17
  }
24
18
 
25
- *width = 0;
26
19
  return 0;
27
20
  }
28
21
 
29
22
  static size_t
30
- yp_encoding_windows_31j_char_width(const char *c, ptrdiff_t n) {
31
- size_t width;
32
- yp_windows_31j_codepoint(c, n, &width);
33
-
34
- return width;
35
- }
36
-
37
- static size_t
38
- yp_encoding_windows_31j_alpha_char(const char *c, ptrdiff_t n) {
39
- size_t width;
40
- yp_windows_31j_codepoint_t codepoint = yp_windows_31j_codepoint(c, n, &width);
41
-
42
- if (width == 1) {
43
- const char value = (const char) codepoint;
44
- return yp_encoding_ascii_alpha_char(&value, n);
23
+ yp_encoding_windows_31j_alpha_char(const uint8_t *b, ptrdiff_t n) {
24
+ if (yp_encoding_windows_31j_char_width(b, n) == 1) {
25
+ return yp_encoding_ascii_alpha_char(b, n);
45
26
  } else {
46
27
  return 0;
47
28
  }
48
29
  }
49
30
 
50
31
  static size_t
51
- yp_encoding_windows_31j_alnum_char(const char *c, ptrdiff_t n) {
52
- size_t width;
53
- yp_windows_31j_codepoint_t codepoint = yp_windows_31j_codepoint(c, n, &width);
54
-
55
- if (width == 1) {
56
- const char value = (const char) codepoint;
57
- return yp_encoding_ascii_alnum_char(&value, n);
32
+ yp_encoding_windows_31j_alnum_char(const uint8_t *b, ptrdiff_t n) {
33
+ if (yp_encoding_windows_31j_char_width(b, n) == 1) {
34
+ return yp_encoding_ascii_alnum_char(b, n);
58
35
  } else {
59
36
  return 0;
60
37
  }
61
38
  }
62
39
 
63
40
  static bool
64
- yp_encoding_windows_31j_isupper_char(const char *c, ptrdiff_t n) {
65
- size_t width;
66
- yp_windows_31j_codepoint_t codepoint = yp_windows_31j_codepoint(c, n, &width);
67
-
68
- if (width == 1) {
69
- const char value = (const char) codepoint;
70
- return yp_encoding_ascii_isupper_char(&value, n);
41
+ yp_encoding_windows_31j_isupper_char(const uint8_t *b, ptrdiff_t n) {
42
+ if (yp_encoding_windows_31j_char_width(b, n) == 1) {
43
+ return yp_encoding_ascii_isupper_char(b, n);
71
44
  } else {
72
45
  return false;
73
46
  }