yarp 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +15 -1
  3. data/Makefile +5 -1
  4. data/config.yml +156 -125
  5. data/docs/encoding.md +5 -5
  6. data/docs/serialization.md +2 -2
  7. data/ext/yarp/api_node.c +142 -98
  8. data/ext/yarp/extension.c +21 -7
  9. data/ext/yarp/extension.h +1 -1
  10. data/include/yarp/ast.h +327 -18
  11. data/include/yarp/defines.h +2 -1
  12. data/include/yarp/diagnostic.h +3 -3
  13. data/include/yarp/enc/yp_encoding.h +10 -10
  14. data/include/yarp/parser.h +19 -19
  15. data/include/yarp/regexp.h +1 -1
  16. data/include/yarp/unescape.h +4 -4
  17. data/include/yarp/util/yp_buffer.h +3 -0
  18. data/include/yarp/util/yp_char.h +16 -16
  19. data/include/yarp/util/yp_constant_pool.h +2 -2
  20. data/include/yarp/util/yp_newline_list.h +5 -5
  21. data/include/yarp/util/yp_string.h +4 -4
  22. data/include/yarp/util/yp_string_list.h +0 -3
  23. data/include/yarp/util/yp_strpbrk.h +1 -1
  24. data/include/yarp/version.h +2 -2
  25. data/include/yarp.h +5 -4
  26. data/lib/yarp/desugar_visitor.rb +59 -122
  27. data/lib/yarp/node.rb +230 -240
  28. data/lib/yarp/serialize.rb +16 -16
  29. data/lib/yarp.rb +5 -5
  30. data/src/diagnostic.c +1 -1
  31. data/src/enc/yp_big5.c +15 -42
  32. data/src/enc/yp_euc_jp.c +16 -43
  33. data/src/enc/yp_gbk.c +19 -46
  34. data/src/enc/yp_shift_jis.c +16 -43
  35. data/src/enc/yp_tables.c +36 -38
  36. data/src/enc/yp_unicode.c +20 -25
  37. data/src/enc/yp_windows_31j.c +16 -43
  38. data/src/node.c +1271 -899
  39. data/src/prettyprint.c +87 -48
  40. data/src/regexp.c +21 -21
  41. data/src/serialize.c +28 -15
  42. data/src/unescape.c +151 -121
  43. data/src/util/yp_buffer.c +7 -2
  44. data/src/util/yp_char.c +34 -34
  45. data/src/util/yp_constant_pool.c +4 -4
  46. data/src/util/yp_memchr.c +1 -1
  47. data/src/util/yp_newline_list.c +5 -4
  48. data/src/util/yp_string.c +22 -20
  49. data/src/util/yp_string_list.c +0 -6
  50. data/src/util/yp_strncasecmp.c +3 -6
  51. data/src/util/yp_strpbrk.c +8 -8
  52. data/src/yarp.c +355 -216
  53. data/yarp.gemspec +1 -1
  54. metadata +2 -2
data/src/enc/yp_tables.c CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  // Each element of the following table contains a bitfield that indicates a
4
4
  // piece of information about the corresponding ASCII character.
5
- static unsigned char yp_encoding_ascii_table[256] = {
5
+ static uint8_t yp_encoding_ascii_table[256] = {
6
6
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
7
7
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
8
8
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -24,7 +24,7 @@ static unsigned char yp_encoding_ascii_table[256] = {
24
24
 
25
25
  // Each element of the following table contains a bitfield that indicates a
26
26
  // piece of information about the corresponding ISO-8859-1 character.
27
- static unsigned char yp_encoding_iso_8859_1_table[256] = {
27
+ static uint8_t yp_encoding_iso_8859_1_table[256] = {
28
28
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
29
29
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
30
30
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -46,7 +46,7 @@ static unsigned char yp_encoding_iso_8859_1_table[256] = {
46
46
 
47
47
  // Each element of the following table contains a bitfield that indicates a
48
48
  // piece of information about the corresponding ISO-8859-2 character.
49
- static unsigned char yp_encoding_iso_8859_2_table[256] = {
49
+ static uint8_t yp_encoding_iso_8859_2_table[256] = {
50
50
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
51
51
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
52
52
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -68,7 +68,7 @@ static unsigned char yp_encoding_iso_8859_2_table[256] = {
68
68
 
69
69
  // Each element of the following table contains a bitfield that indicates a
70
70
  // piece of information about the corresponding ISO-8859-3 character.
71
- static unsigned char yp_encoding_iso_8859_3_table[256] = {
71
+ static uint8_t yp_encoding_iso_8859_3_table[256] = {
72
72
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
73
73
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
74
74
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -90,7 +90,7 @@ static unsigned char yp_encoding_iso_8859_3_table[256] = {
90
90
 
91
91
  // Each element of the following table contains a bitfield that indicates a
92
92
  // piece of information about the corresponding ISO-8859-4 character.
93
- static unsigned char yp_encoding_iso_8859_4_table[256] = {
93
+ static uint8_t yp_encoding_iso_8859_4_table[256] = {
94
94
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
95
95
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
96
96
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -112,7 +112,7 @@ static unsigned char yp_encoding_iso_8859_4_table[256] = {
112
112
 
113
113
  // Each element of the following table contains a bitfield that indicates a
114
114
  // piece of information about the corresponding ISO-8859-5 character.
115
- static unsigned char yp_encoding_iso_8859_5_table[256] = {
115
+ static uint8_t yp_encoding_iso_8859_5_table[256] = {
116
116
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
117
117
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
118
118
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -134,7 +134,7 @@ static unsigned char yp_encoding_iso_8859_5_table[256] = {
134
134
 
135
135
  // Each element of the following table contains a bitfield that indicates a
136
136
  // piece of information about the corresponding ISO-8859-6 character.
137
- static unsigned char yp_encoding_iso_8859_6_table[256] = {
137
+ static uint8_t yp_encoding_iso_8859_6_table[256] = {
138
138
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
139
139
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
140
140
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -156,7 +156,7 @@ static unsigned char yp_encoding_iso_8859_6_table[256] = {
156
156
 
157
157
  // Each element of the following table contains a bitfield that indicates a
158
158
  // piece of information about the corresponding ISO-8859-7 character.
159
- static unsigned char yp_encoding_iso_8859_7_table[256] = {
159
+ static uint8_t yp_encoding_iso_8859_7_table[256] = {
160
160
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
161
161
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
162
162
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -178,7 +178,7 @@ static unsigned char yp_encoding_iso_8859_7_table[256] = {
178
178
 
179
179
  // Each element of the following table contains a bitfield that indicates a
180
180
  // piece of information about the corresponding ISO-8859-8 character.
181
- static unsigned char yp_encoding_iso_8859_8_table[256] = {
181
+ static uint8_t yp_encoding_iso_8859_8_table[256] = {
182
182
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
183
183
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
184
184
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -200,7 +200,7 @@ static unsigned char yp_encoding_iso_8859_8_table[256] = {
200
200
 
201
201
  // Each element of the following table contains a bitfield that indicates a
202
202
  // piece of information about the corresponding ISO-8859-9 character.
203
- static unsigned char yp_encoding_iso_8859_9_table[256] = {
203
+ static uint8_t yp_encoding_iso_8859_9_table[256] = {
204
204
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
205
205
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
206
206
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -222,7 +222,7 @@ static unsigned char yp_encoding_iso_8859_9_table[256] = {
222
222
 
223
223
  // Each element of the following table contains a bitfield that indicates a
224
224
  // piece of information about the corresponding ISO-8859-10 character.
225
- static unsigned char yp_encoding_iso_8859_10_table[256] = {
225
+ static uint8_t yp_encoding_iso_8859_10_table[256] = {
226
226
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
227
227
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
228
228
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -244,7 +244,7 @@ static unsigned char yp_encoding_iso_8859_10_table[256] = {
244
244
 
245
245
  // Each element of the following table contains a bitfield that indicates a
246
246
  // piece of information about the corresponding ISO-8859-11 character.
247
- static unsigned char yp_encoding_iso_8859_11_table[256] = {
247
+ static uint8_t yp_encoding_iso_8859_11_table[256] = {
248
248
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
249
249
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
250
250
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -266,7 +266,7 @@ static unsigned char yp_encoding_iso_8859_11_table[256] = {
266
266
 
267
267
  // Each element of the following table contains a bitfield that indicates a
268
268
  // piece of information about the corresponding ISO-8859-13 character.
269
- static unsigned char yp_encoding_iso_8859_13_table[256] = {
269
+ static uint8_t yp_encoding_iso_8859_13_table[256] = {
270
270
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
271
271
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
272
272
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -288,7 +288,7 @@ static unsigned char yp_encoding_iso_8859_13_table[256] = {
288
288
 
289
289
  // Each element of the following table contains a bitfield that indicates a
290
290
  // piece of information about the corresponding ISO-8859-14 character.
291
- static unsigned char yp_encoding_iso_8859_14_table[256] = {
291
+ static uint8_t yp_encoding_iso_8859_14_table[256] = {
292
292
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
293
293
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
294
294
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -310,7 +310,7 @@ static unsigned char yp_encoding_iso_8859_14_table[256] = {
310
310
 
311
311
  // Each element of the following table contains a bitfield that indicates a
312
312
  // piece of information about the corresponding ISO-8859-15 character.
313
- static unsigned char yp_encoding_iso_8859_15_table[256] = {
313
+ static uint8_t yp_encoding_iso_8859_15_table[256] = {
314
314
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
315
315
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
316
316
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -332,7 +332,7 @@ static unsigned char yp_encoding_iso_8859_15_table[256] = {
332
332
 
333
333
  // Each element of the following table contains a bitfield that indicates a
334
334
  // piece of information about the corresponding ISO-8859-16 character.
335
- static unsigned char yp_encoding_iso_8859_16_table[256] = {
335
+ static uint8_t yp_encoding_iso_8859_16_table[256] = {
336
336
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
337
337
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
338
338
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -354,7 +354,7 @@ static unsigned char yp_encoding_iso_8859_16_table[256] = {
354
354
 
355
355
  // Each element of the following table contains a bitfield that indicates a
356
356
  // piece of information about the corresponding KOI8-R character.
357
- static unsigned char yp_encoding_koi8_r_table[256] = {
357
+ static uint8_t yp_encoding_koi8_r_table[256] = {
358
358
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
359
359
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
360
360
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -376,7 +376,7 @@ static unsigned char yp_encoding_koi8_r_table[256] = {
376
376
 
377
377
  // Each element of the following table contains a bitfield that indicates a
378
378
  // piece of information about the corresponding windows-1251 character.
379
- static unsigned char yp_encoding_windows_1251_table[256] = {
379
+ static uint8_t yp_encoding_windows_1251_table[256] = {
380
380
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
381
381
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
382
382
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -398,7 +398,7 @@ static unsigned char yp_encoding_windows_1251_table[256] = {
398
398
 
399
399
  // Each element of the following table contains a bitfield that indicates a
400
400
  // piece of information about the corresponding windows-1252 character.
401
- static unsigned char yp_encoding_windows_1252_table[256] = {
401
+ static uint8_t yp_encoding_windows_1252_table[256] = {
402
402
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
403
403
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
404
404
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -419,34 +419,32 @@ static unsigned char yp_encoding_windows_1252_table[256] = {
419
419
  };
420
420
 
421
421
  static size_t
422
- yp_encoding_ascii_char_width(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
423
- const unsigned char v = (const unsigned char) *c;
424
- return v < 0x80 ? 1 : 0;
422
+ yp_encoding_ascii_char_width(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
423
+ return *b < 0x80 ? 1 : 0;
425
424
  }
426
425
 
427
426
  size_t
428
- yp_encoding_ascii_alpha_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
429
- return (yp_encoding_ascii_table[(const unsigned char) *c] & YP_ENCODING_ALPHABETIC_BIT);
427
+ yp_encoding_ascii_alpha_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
428
+ return (yp_encoding_ascii_table[*b] & YP_ENCODING_ALPHABETIC_BIT);
430
429
  }
431
430
 
432
431
  size_t
433
- yp_encoding_ascii_alnum_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
434
- return (yp_encoding_ascii_table[(const unsigned char) *c] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
432
+ yp_encoding_ascii_alnum_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
433
+ return (yp_encoding_ascii_table[*b] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
435
434
  }
436
435
 
437
436
  bool
438
- yp_encoding_ascii_isupper_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
439
- return (yp_encoding_ascii_table[(const unsigned char) *c] & YP_ENCODING_UPPERCASE_BIT);
437
+ yp_encoding_ascii_isupper_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
438
+ return (yp_encoding_ascii_table[*b] & YP_ENCODING_UPPERCASE_BIT);
440
439
  }
441
440
 
442
441
  static size_t
443
- yp_encoding_koi8_r_char_width(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
444
- const unsigned char v = (const unsigned char) *c;
445
- return ((v >= 0x20 && v <= 0x7E) || (v >= 0x80)) ? 1 : 0;
442
+ yp_encoding_koi8_r_char_width(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
443
+ return ((*b >= 0x20 && *b <= 0x7E) || (*b >= 0x80)) ? 1 : 0;
446
444
  }
447
445
 
448
446
  static size_t
449
- yp_encoding_single_char_width(YP_ATTRIBUTE_UNUSED const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
447
+ yp_encoding_single_char_width(YP_ATTRIBUTE_UNUSED const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
450
448
  return 1;
451
449
  }
452
450
 
@@ -469,14 +467,14 @@ yp_encoding_t yp_encoding_ascii_8bit = {
469
467
  };
470
468
 
471
469
  #define YP_ENCODING_TABLE(s, i, w) \
472
- static size_t yp_encoding_ ##i ## _alpha_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
473
- return (yp_encoding_ ##i ## _table[(const unsigned char) *c] & YP_ENCODING_ALPHABETIC_BIT); \
470
+ static size_t yp_encoding_ ##i ## _alpha_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
471
+ return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_ALPHABETIC_BIT); \
474
472
  } \
475
- static size_t yp_encoding_ ##i ## _alnum_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
476
- return (yp_encoding_ ##i ## _table[(const unsigned char) *c] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
473
+ static size_t yp_encoding_ ##i ## _alnum_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
474
+ return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
477
475
  } \
478
- static bool yp_encoding_ ##i ## _isupper_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
479
- return (yp_encoding_ ##i ## _table[(const unsigned char) *c] & YP_ENCODING_UPPERCASE_BIT); \
476
+ static bool yp_encoding_ ##i ## _isupper_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
477
+ return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_UPPERCASE_BIT); \
480
478
  } \
481
479
  yp_encoding_t yp_encoding_ ##i = { \
482
480
  .name = s, \
data/src/enc/yp_unicode.c CHANGED
@@ -10,7 +10,7 @@ typedef uint32_t yp_unicode_codepoint_t;
10
10
  // this table is different from other encodings where we used a lookup table
11
11
  // because the indices of those tables are the byte representations, not the
12
12
  // codepoints themselves.
13
- unsigned char yp_encoding_unicode_table[256] = {
13
+ uint8_t yp_encoding_unicode_table[256] = {
14
14
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
15
15
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
16
16
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -2220,7 +2220,7 @@ static const uint8_t yp_utf_8_dfa[] = {
2220
2220
  };
2221
2221
 
2222
2222
  static yp_unicode_codepoint_t
2223
- yp_utf_8_codepoint(const unsigned char *c, ptrdiff_t n, size_t *width) {
2223
+ yp_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
2224
2224
  assert(n >= 1);
2225
2225
  size_t maximum = (size_t) n;
2226
2226
 
@@ -2228,7 +2228,7 @@ yp_utf_8_codepoint(const unsigned char *c, ptrdiff_t n, size_t *width) {
2228
2228
  uint32_t state = 0;
2229
2229
 
2230
2230
  for (size_t index = 0; index < 4 && index < maximum; index++) {
2231
- uint32_t byte = c[index];
2231
+ uint32_t byte = b[index];
2232
2232
  uint32_t type = yp_utf_8_dfa[byte];
2233
2233
 
2234
2234
  codepoint = (state != 0) ?
@@ -2247,60 +2247,55 @@ yp_utf_8_codepoint(const unsigned char *c, ptrdiff_t n, size_t *width) {
2247
2247
  }
2248
2248
 
2249
2249
  static size_t
2250
- yp_encoding_utf_8_char_width(const char *c, ptrdiff_t n) {
2250
+ yp_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
2251
2251
  size_t width;
2252
- const unsigned char *v = (const unsigned char *) c;
2253
-
2254
- yp_utf_8_codepoint(v, n, &width);
2252
+ yp_utf_8_codepoint(b, n, &width);
2255
2253
  return width;
2256
2254
  }
2257
2255
 
2258
2256
  size_t
2259
- yp_encoding_utf_8_alpha_char(const char *c, ptrdiff_t n) {
2260
- const unsigned char *v = (const unsigned char *) c;
2261
- if (*v < 0x80) {
2262
- return (yp_encoding_unicode_table[*v] & YP_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
2257
+ yp_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
2258
+ if (*b < 0x80) {
2259
+ return (yp_encoding_unicode_table[*b] & YP_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
2263
2260
  }
2264
2261
 
2265
2262
  size_t width;
2266
- yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(v, n, &width);
2263
+ yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
2267
2264
 
2268
2265
  if (codepoint <= 0xFF) {
2269
- return (yp_encoding_unicode_table[(unsigned char) codepoint] & YP_ENCODING_ALPHABETIC_BIT) ? width : 0;
2266
+ return (yp_encoding_unicode_table[(uint8_t) codepoint] & YP_ENCODING_ALPHABETIC_BIT) ? width : 0;
2270
2267
  } else {
2271
2268
  return yp_unicode_codepoint_match(codepoint, unicode_alpha_codepoints, UNICODE_ALPHA_CODEPOINTS_LENGTH) ? width : 0;
2272
2269
  }
2273
2270
  }
2274
2271
 
2275
2272
  size_t
2276
- yp_encoding_utf_8_alnum_char(const char *c, ptrdiff_t n) {
2277
- const unsigned char *v = (const unsigned char *) c;
2278
- if (*v < 0x80) {
2279
- return (yp_encoding_unicode_table[*v] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
2273
+ yp_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
2274
+ if (*b < 0x80) {
2275
+ return (yp_encoding_unicode_table[*b] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
2280
2276
  }
2281
2277
 
2282
2278
  size_t width;
2283
- yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(v, n, &width);
2279
+ yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
2284
2280
 
2285
2281
  if (codepoint <= 0xFF) {
2286
- return (yp_encoding_unicode_table[(unsigned char) codepoint] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? width : 0;
2282
+ return (yp_encoding_unicode_table[(uint8_t) codepoint] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? width : 0;
2287
2283
  } else {
2288
2284
  return yp_unicode_codepoint_match(codepoint, unicode_alnum_codepoints, UNICODE_ALNUM_CODEPOINTS_LENGTH) ? width : 0;
2289
2285
  }
2290
2286
  }
2291
2287
 
2292
2288
  static bool
2293
- yp_encoding_utf_8_isupper_char(const char *c, ptrdiff_t n) {
2294
- const unsigned char *v = (const unsigned char *) c;
2295
- if (*v < 0x80) {
2296
- return (yp_encoding_unicode_table[*v] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
2289
+ yp_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
2290
+ if (*b < 0x80) {
2291
+ return (yp_encoding_unicode_table[*b] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
2297
2292
  }
2298
2293
 
2299
2294
  size_t width;
2300
- yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(v, n, &width);
2295
+ yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
2301
2296
 
2302
2297
  if (codepoint <= 0xFF) {
2303
- return (yp_encoding_unicode_table[(unsigned char) codepoint] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
2298
+ return (yp_encoding_unicode_table[(uint8_t) codepoint] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
2304
2299
  } else {
2305
2300
  return yp_unicode_codepoint_match(codepoint, unicode_isupper_codepoints, UNICODE_ISUPPER_CODEPOINTS_LENGTH) ? true : false;
2306
2301
  }
@@ -1,73 +1,46 @@
1
1
  #include "yarp/enc/yp_encoding.h"
2
2
 
3
- typedef uint16_t yp_windows_31j_codepoint_t;
4
-
5
- static yp_windows_31j_codepoint_t
6
- yp_windows_31j_codepoint(const char *c, ptrdiff_t n, size_t *width) {
7
- const unsigned char *uc = (const unsigned char *) c;
8
-
3
+ static size_t
4
+ yp_encoding_windows_31j_char_width(const uint8_t *b, ptrdiff_t n) {
9
5
  // These are the single byte characters.
10
- if (*uc < 0x80 || (*uc >= 0xA1 && *uc <= 0xDF)) {
11
- *width = 1;
12
- return *uc;
6
+ if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
7
+ return 1;
13
8
  }
14
9
 
15
10
  // These are the double byte characters.
16
11
  if (
17
12
  (n > 1) &&
18
- ((uc[0] >= 0x81 && uc[0] <= 0x9F) || (uc[0] >= 0xE0 && uc[0] <= 0xFC)) &&
19
- (uc[1] >= 0x40 && uc[1] <= 0xFC)
13
+ ((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) &&
14
+ (b[1] >= 0x40 && b[1] <= 0xFC)
20
15
  ) {
21
- *width = 2;
22
- return (yp_windows_31j_codepoint_t) (uc[0] << 8 | uc[1]);
16
+ return 2;
23
17
  }
24
18
 
25
- *width = 0;
26
19
  return 0;
27
20
  }
28
21
 
29
22
  static size_t
30
- yp_encoding_windows_31j_char_width(const char *c, ptrdiff_t n) {
31
- size_t width;
32
- yp_windows_31j_codepoint(c, n, &width);
33
-
34
- return width;
35
- }
36
-
37
- static size_t
38
- yp_encoding_windows_31j_alpha_char(const char *c, ptrdiff_t n) {
39
- size_t width;
40
- yp_windows_31j_codepoint_t codepoint = yp_windows_31j_codepoint(c, n, &width);
41
-
42
- if (width == 1) {
43
- const char value = (const char) codepoint;
44
- return yp_encoding_ascii_alpha_char(&value, n);
23
+ yp_encoding_windows_31j_alpha_char(const uint8_t *b, ptrdiff_t n) {
24
+ if (yp_encoding_windows_31j_char_width(b, n) == 1) {
25
+ return yp_encoding_ascii_alpha_char(b, n);
45
26
  } else {
46
27
  return 0;
47
28
  }
48
29
  }
49
30
 
50
31
  static size_t
51
- yp_encoding_windows_31j_alnum_char(const char *c, ptrdiff_t n) {
52
- size_t width;
53
- yp_windows_31j_codepoint_t codepoint = yp_windows_31j_codepoint(c, n, &width);
54
-
55
- if (width == 1) {
56
- const char value = (const char) codepoint;
57
- return yp_encoding_ascii_alnum_char(&value, n);
32
+ yp_encoding_windows_31j_alnum_char(const uint8_t *b, ptrdiff_t n) {
33
+ if (yp_encoding_windows_31j_char_width(b, n) == 1) {
34
+ return yp_encoding_ascii_alnum_char(b, n);
58
35
  } else {
59
36
  return 0;
60
37
  }
61
38
  }
62
39
 
63
40
  static bool
64
- yp_encoding_windows_31j_isupper_char(const char *c, ptrdiff_t n) {
65
- size_t width;
66
- yp_windows_31j_codepoint_t codepoint = yp_windows_31j_codepoint(c, n, &width);
67
-
68
- if (width == 1) {
69
- const char value = (const char) codepoint;
70
- return yp_encoding_ascii_isupper_char(&value, n);
41
+ yp_encoding_windows_31j_isupper_char(const uint8_t *b, ptrdiff_t n) {
42
+ if (yp_encoding_windows_31j_char_width(b, n) == 1) {
43
+ return yp_encoding_ascii_isupper_char(b, n);
71
44
  } else {
72
45
  return false;
73
46
  }