yarp 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -1
- data/Makefile +5 -1
- data/config.yml +156 -125
- data/docs/encoding.md +5 -5
- data/docs/serialization.md +2 -2
- data/ext/yarp/api_node.c +142 -98
- data/ext/yarp/extension.c +21 -7
- data/ext/yarp/extension.h +1 -1
- data/include/yarp/ast.h +327 -18
- data/include/yarp/defines.h +2 -1
- data/include/yarp/diagnostic.h +3 -3
- data/include/yarp/enc/yp_encoding.h +10 -10
- data/include/yarp/parser.h +19 -19
- data/include/yarp/regexp.h +1 -1
- data/include/yarp/unescape.h +4 -4
- data/include/yarp/util/yp_buffer.h +3 -0
- data/include/yarp/util/yp_char.h +16 -16
- data/include/yarp/util/yp_constant_pool.h +2 -2
- data/include/yarp/util/yp_newline_list.h +5 -5
- data/include/yarp/util/yp_string.h +4 -4
- data/include/yarp/util/yp_string_list.h +0 -3
- data/include/yarp/util/yp_strpbrk.h +1 -1
- data/include/yarp/version.h +2 -2
- data/include/yarp.h +5 -4
- data/lib/yarp/desugar_visitor.rb +59 -122
- data/lib/yarp/node.rb +230 -240
- data/lib/yarp/serialize.rb +16 -16
- data/lib/yarp.rb +5 -5
- data/src/diagnostic.c +1 -1
- data/src/enc/yp_big5.c +15 -42
- data/src/enc/yp_euc_jp.c +16 -43
- data/src/enc/yp_gbk.c +19 -46
- data/src/enc/yp_shift_jis.c +16 -43
- data/src/enc/yp_tables.c +36 -38
- data/src/enc/yp_unicode.c +20 -25
- data/src/enc/yp_windows_31j.c +16 -43
- data/src/node.c +1271 -899
- data/src/prettyprint.c +87 -48
- data/src/regexp.c +21 -21
- data/src/serialize.c +28 -15
- data/src/unescape.c +151 -121
- data/src/util/yp_buffer.c +7 -2
- data/src/util/yp_char.c +34 -34
- data/src/util/yp_constant_pool.c +4 -4
- data/src/util/yp_memchr.c +1 -1
- data/src/util/yp_newline_list.c +5 -4
- data/src/util/yp_string.c +22 -20
- data/src/util/yp_string_list.c +0 -6
- data/src/util/yp_strncasecmp.c +3 -6
- data/src/util/yp_strpbrk.c +8 -8
- data/src/yarp.c +355 -216
- data/yarp.gemspec +1 -1
- metadata +2 -2
data/src/enc/yp_tables.c
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
// Each element of the following table contains a bitfield that indicates a
|
4
4
|
// piece of information about the corresponding ASCII character.
|
5
|
-
static
|
5
|
+
static uint8_t yp_encoding_ascii_table[256] = {
|
6
6
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
7
7
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
8
8
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -24,7 +24,7 @@ static unsigned char yp_encoding_ascii_table[256] = {
|
|
24
24
|
|
25
25
|
// Each element of the following table contains a bitfield that indicates a
|
26
26
|
// piece of information about the corresponding ISO-8859-1 character.
|
27
|
-
static
|
27
|
+
static uint8_t yp_encoding_iso_8859_1_table[256] = {
|
28
28
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
29
29
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
30
30
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -46,7 +46,7 @@ static unsigned char yp_encoding_iso_8859_1_table[256] = {
|
|
46
46
|
|
47
47
|
// Each element of the following table contains a bitfield that indicates a
|
48
48
|
// piece of information about the corresponding ISO-8859-2 character.
|
49
|
-
static
|
49
|
+
static uint8_t yp_encoding_iso_8859_2_table[256] = {
|
50
50
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
51
51
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
52
52
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -68,7 +68,7 @@ static unsigned char yp_encoding_iso_8859_2_table[256] = {
|
|
68
68
|
|
69
69
|
// Each element of the following table contains a bitfield that indicates a
|
70
70
|
// piece of information about the corresponding ISO-8859-3 character.
|
71
|
-
static
|
71
|
+
static uint8_t yp_encoding_iso_8859_3_table[256] = {
|
72
72
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
73
73
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
74
74
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -90,7 +90,7 @@ static unsigned char yp_encoding_iso_8859_3_table[256] = {
|
|
90
90
|
|
91
91
|
// Each element of the following table contains a bitfield that indicates a
|
92
92
|
// piece of information about the corresponding ISO-8859-4 character.
|
93
|
-
static
|
93
|
+
static uint8_t yp_encoding_iso_8859_4_table[256] = {
|
94
94
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
95
95
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
96
96
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -112,7 +112,7 @@ static unsigned char yp_encoding_iso_8859_4_table[256] = {
|
|
112
112
|
|
113
113
|
// Each element of the following table contains a bitfield that indicates a
|
114
114
|
// piece of information about the corresponding ISO-8859-5 character.
|
115
|
-
static
|
115
|
+
static uint8_t yp_encoding_iso_8859_5_table[256] = {
|
116
116
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
117
117
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
118
118
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -134,7 +134,7 @@ static unsigned char yp_encoding_iso_8859_5_table[256] = {
|
|
134
134
|
|
135
135
|
// Each element of the following table contains a bitfield that indicates a
|
136
136
|
// piece of information about the corresponding ISO-8859-6 character.
|
137
|
-
static
|
137
|
+
static uint8_t yp_encoding_iso_8859_6_table[256] = {
|
138
138
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
139
139
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
140
140
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -156,7 +156,7 @@ static unsigned char yp_encoding_iso_8859_6_table[256] = {
|
|
156
156
|
|
157
157
|
// Each element of the following table contains a bitfield that indicates a
|
158
158
|
// piece of information about the corresponding ISO-8859-7 character.
|
159
|
-
static
|
159
|
+
static uint8_t yp_encoding_iso_8859_7_table[256] = {
|
160
160
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
161
161
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
162
162
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -178,7 +178,7 @@ static unsigned char yp_encoding_iso_8859_7_table[256] = {
|
|
178
178
|
|
179
179
|
// Each element of the following table contains a bitfield that indicates a
|
180
180
|
// piece of information about the corresponding ISO-8859-8 character.
|
181
|
-
static
|
181
|
+
static uint8_t yp_encoding_iso_8859_8_table[256] = {
|
182
182
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
183
183
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
184
184
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -200,7 +200,7 @@ static unsigned char yp_encoding_iso_8859_8_table[256] = {
|
|
200
200
|
|
201
201
|
// Each element of the following table contains a bitfield that indicates a
|
202
202
|
// piece of information about the corresponding ISO-8859-9 character.
|
203
|
-
static
|
203
|
+
static uint8_t yp_encoding_iso_8859_9_table[256] = {
|
204
204
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
205
205
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
206
206
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -222,7 +222,7 @@ static unsigned char yp_encoding_iso_8859_9_table[256] = {
|
|
222
222
|
|
223
223
|
// Each element of the following table contains a bitfield that indicates a
|
224
224
|
// piece of information about the corresponding ISO-8859-10 character.
|
225
|
-
static
|
225
|
+
static uint8_t yp_encoding_iso_8859_10_table[256] = {
|
226
226
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
227
227
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
228
228
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -244,7 +244,7 @@ static unsigned char yp_encoding_iso_8859_10_table[256] = {
|
|
244
244
|
|
245
245
|
// Each element of the following table contains a bitfield that indicates a
|
246
246
|
// piece of information about the corresponding ISO-8859-11 character.
|
247
|
-
static
|
247
|
+
static uint8_t yp_encoding_iso_8859_11_table[256] = {
|
248
248
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
249
249
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
250
250
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -266,7 +266,7 @@ static unsigned char yp_encoding_iso_8859_11_table[256] = {
|
|
266
266
|
|
267
267
|
// Each element of the following table contains a bitfield that indicates a
|
268
268
|
// piece of information about the corresponding ISO-8859-13 character.
|
269
|
-
static
|
269
|
+
static uint8_t yp_encoding_iso_8859_13_table[256] = {
|
270
270
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
271
271
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
272
272
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -288,7 +288,7 @@ static unsigned char yp_encoding_iso_8859_13_table[256] = {
|
|
288
288
|
|
289
289
|
// Each element of the following table contains a bitfield that indicates a
|
290
290
|
// piece of information about the corresponding ISO-8859-14 character.
|
291
|
-
static
|
291
|
+
static uint8_t yp_encoding_iso_8859_14_table[256] = {
|
292
292
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
293
293
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
294
294
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -310,7 +310,7 @@ static unsigned char yp_encoding_iso_8859_14_table[256] = {
|
|
310
310
|
|
311
311
|
// Each element of the following table contains a bitfield that indicates a
|
312
312
|
// piece of information about the corresponding ISO-8859-15 character.
|
313
|
-
static
|
313
|
+
static uint8_t yp_encoding_iso_8859_15_table[256] = {
|
314
314
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
315
315
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
316
316
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -332,7 +332,7 @@ static unsigned char yp_encoding_iso_8859_15_table[256] = {
|
|
332
332
|
|
333
333
|
// Each element of the following table contains a bitfield that indicates a
|
334
334
|
// piece of information about the corresponding ISO-8859-16 character.
|
335
|
-
static
|
335
|
+
static uint8_t yp_encoding_iso_8859_16_table[256] = {
|
336
336
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
337
337
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
338
338
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -354,7 +354,7 @@ static unsigned char yp_encoding_iso_8859_16_table[256] = {
|
|
354
354
|
|
355
355
|
// Each element of the following table contains a bitfield that indicates a
|
356
356
|
// piece of information about the corresponding KOI8-R character.
|
357
|
-
static
|
357
|
+
static uint8_t yp_encoding_koi8_r_table[256] = {
|
358
358
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
359
359
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
360
360
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -376,7 +376,7 @@ static unsigned char yp_encoding_koi8_r_table[256] = {
|
|
376
376
|
|
377
377
|
// Each element of the following table contains a bitfield that indicates a
|
378
378
|
// piece of information about the corresponding windows-1251 character.
|
379
|
-
static
|
379
|
+
static uint8_t yp_encoding_windows_1251_table[256] = {
|
380
380
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
381
381
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
382
382
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -398,7 +398,7 @@ static unsigned char yp_encoding_windows_1251_table[256] = {
|
|
398
398
|
|
399
399
|
// Each element of the following table contains a bitfield that indicates a
|
400
400
|
// piece of information about the corresponding windows-1252 character.
|
401
|
-
static
|
401
|
+
static uint8_t yp_encoding_windows_1252_table[256] = {
|
402
402
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
403
403
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
404
404
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -419,34 +419,32 @@ static unsigned char yp_encoding_windows_1252_table[256] = {
|
|
419
419
|
};
|
420
420
|
|
421
421
|
static size_t
|
422
|
-
yp_encoding_ascii_char_width(const
|
423
|
-
|
424
|
-
return v < 0x80 ? 1 : 0;
|
422
|
+
yp_encoding_ascii_char_width(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
423
|
+
return *b < 0x80 ? 1 : 0;
|
425
424
|
}
|
426
425
|
|
427
426
|
size_t
|
428
|
-
yp_encoding_ascii_alpha_char(const
|
429
|
-
return (yp_encoding_ascii_table[
|
427
|
+
yp_encoding_ascii_alpha_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
428
|
+
return (yp_encoding_ascii_table[*b] & YP_ENCODING_ALPHABETIC_BIT);
|
430
429
|
}
|
431
430
|
|
432
431
|
size_t
|
433
|
-
yp_encoding_ascii_alnum_char(const
|
434
|
-
return (yp_encoding_ascii_table[
|
432
|
+
yp_encoding_ascii_alnum_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
433
|
+
return (yp_encoding_ascii_table[*b] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
|
435
434
|
}
|
436
435
|
|
437
436
|
bool
|
438
|
-
yp_encoding_ascii_isupper_char(const
|
439
|
-
return (yp_encoding_ascii_table[
|
437
|
+
yp_encoding_ascii_isupper_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
438
|
+
return (yp_encoding_ascii_table[*b] & YP_ENCODING_UPPERCASE_BIT);
|
440
439
|
}
|
441
440
|
|
442
441
|
static size_t
|
443
|
-
yp_encoding_koi8_r_char_width(const
|
444
|
-
|
445
|
-
return ((v >= 0x20 && v <= 0x7E) || (v >= 0x80)) ? 1 : 0;
|
442
|
+
yp_encoding_koi8_r_char_width(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
443
|
+
return ((*b >= 0x20 && *b <= 0x7E) || (*b >= 0x80)) ? 1 : 0;
|
446
444
|
}
|
447
445
|
|
448
446
|
static size_t
|
449
|
-
yp_encoding_single_char_width(YP_ATTRIBUTE_UNUSED const
|
447
|
+
yp_encoding_single_char_width(YP_ATTRIBUTE_UNUSED const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
450
448
|
return 1;
|
451
449
|
}
|
452
450
|
|
@@ -469,14 +467,14 @@ yp_encoding_t yp_encoding_ascii_8bit = {
|
|
469
467
|
};
|
470
468
|
|
471
469
|
#define YP_ENCODING_TABLE(s, i, w) \
|
472
|
-
static size_t yp_encoding_ ##i ## _alpha_char(const
|
473
|
-
return (yp_encoding_ ##i ## _table[
|
470
|
+
static size_t yp_encoding_ ##i ## _alpha_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
|
471
|
+
return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_ALPHABETIC_BIT); \
|
474
472
|
} \
|
475
|
-
static size_t yp_encoding_ ##i ## _alnum_char(const
|
476
|
-
return (yp_encoding_ ##i ## _table[
|
473
|
+
static size_t yp_encoding_ ##i ## _alnum_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
|
474
|
+
return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
|
477
475
|
} \
|
478
|
-
static bool yp_encoding_ ##i ## _isupper_char(const
|
479
|
-
return (yp_encoding_ ##i ## _table[
|
476
|
+
static bool yp_encoding_ ##i ## _isupper_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
|
477
|
+
return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_UPPERCASE_BIT); \
|
480
478
|
} \
|
481
479
|
yp_encoding_t yp_encoding_ ##i = { \
|
482
480
|
.name = s, \
|
data/src/enc/yp_unicode.c
CHANGED
@@ -10,7 +10,7 @@ typedef uint32_t yp_unicode_codepoint_t;
|
|
10
10
|
// this table is different from other encodings where we used a lookup table
|
11
11
|
// because the indices of those tables are the byte representations, not the
|
12
12
|
// codepoints themselves.
|
13
|
-
|
13
|
+
uint8_t yp_encoding_unicode_table[256] = {
|
14
14
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
15
15
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
16
16
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -2220,7 +2220,7 @@ static const uint8_t yp_utf_8_dfa[] = {
|
|
2220
2220
|
};
|
2221
2221
|
|
2222
2222
|
static yp_unicode_codepoint_t
|
2223
|
-
yp_utf_8_codepoint(const
|
2223
|
+
yp_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
|
2224
2224
|
assert(n >= 1);
|
2225
2225
|
size_t maximum = (size_t) n;
|
2226
2226
|
|
@@ -2228,7 +2228,7 @@ yp_utf_8_codepoint(const unsigned char *c, ptrdiff_t n, size_t *width) {
|
|
2228
2228
|
uint32_t state = 0;
|
2229
2229
|
|
2230
2230
|
for (size_t index = 0; index < 4 && index < maximum; index++) {
|
2231
|
-
uint32_t byte =
|
2231
|
+
uint32_t byte = b[index];
|
2232
2232
|
uint32_t type = yp_utf_8_dfa[byte];
|
2233
2233
|
|
2234
2234
|
codepoint = (state != 0) ?
|
@@ -2247,60 +2247,55 @@ yp_utf_8_codepoint(const unsigned char *c, ptrdiff_t n, size_t *width) {
|
|
2247
2247
|
}
|
2248
2248
|
|
2249
2249
|
static size_t
|
2250
|
-
yp_encoding_utf_8_char_width(const
|
2250
|
+
yp_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
|
2251
2251
|
size_t width;
|
2252
|
-
|
2253
|
-
|
2254
|
-
yp_utf_8_codepoint(v, n, &width);
|
2252
|
+
yp_utf_8_codepoint(b, n, &width);
|
2255
2253
|
return width;
|
2256
2254
|
}
|
2257
2255
|
|
2258
2256
|
size_t
|
2259
|
-
yp_encoding_utf_8_alpha_char(const
|
2260
|
-
|
2261
|
-
|
2262
|
-
return (yp_encoding_unicode_table[*v] & YP_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
|
2257
|
+
yp_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
2258
|
+
if (*b < 0x80) {
|
2259
|
+
return (yp_encoding_unicode_table[*b] & YP_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
|
2263
2260
|
}
|
2264
2261
|
|
2265
2262
|
size_t width;
|
2266
|
-
yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(
|
2263
|
+
yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
|
2267
2264
|
|
2268
2265
|
if (codepoint <= 0xFF) {
|
2269
|
-
return (yp_encoding_unicode_table[(
|
2266
|
+
return (yp_encoding_unicode_table[(uint8_t) codepoint] & YP_ENCODING_ALPHABETIC_BIT) ? width : 0;
|
2270
2267
|
} else {
|
2271
2268
|
return yp_unicode_codepoint_match(codepoint, unicode_alpha_codepoints, UNICODE_ALPHA_CODEPOINTS_LENGTH) ? width : 0;
|
2272
2269
|
}
|
2273
2270
|
}
|
2274
2271
|
|
2275
2272
|
size_t
|
2276
|
-
yp_encoding_utf_8_alnum_char(const
|
2277
|
-
|
2278
|
-
|
2279
|
-
return (yp_encoding_unicode_table[*v] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
|
2273
|
+
yp_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
2274
|
+
if (*b < 0x80) {
|
2275
|
+
return (yp_encoding_unicode_table[*b] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
|
2280
2276
|
}
|
2281
2277
|
|
2282
2278
|
size_t width;
|
2283
|
-
yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(
|
2279
|
+
yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
|
2284
2280
|
|
2285
2281
|
if (codepoint <= 0xFF) {
|
2286
|
-
return (yp_encoding_unicode_table[(
|
2282
|
+
return (yp_encoding_unicode_table[(uint8_t) codepoint] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? width : 0;
|
2287
2283
|
} else {
|
2288
2284
|
return yp_unicode_codepoint_match(codepoint, unicode_alnum_codepoints, UNICODE_ALNUM_CODEPOINTS_LENGTH) ? width : 0;
|
2289
2285
|
}
|
2290
2286
|
}
|
2291
2287
|
|
2292
2288
|
static bool
|
2293
|
-
yp_encoding_utf_8_isupper_char(const
|
2294
|
-
|
2295
|
-
|
2296
|
-
return (yp_encoding_unicode_table[*v] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
|
2289
|
+
yp_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
2290
|
+
if (*b < 0x80) {
|
2291
|
+
return (yp_encoding_unicode_table[*b] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
|
2297
2292
|
}
|
2298
2293
|
|
2299
2294
|
size_t width;
|
2300
|
-
yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(
|
2295
|
+
yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
|
2301
2296
|
|
2302
2297
|
if (codepoint <= 0xFF) {
|
2303
|
-
return (yp_encoding_unicode_table[(
|
2298
|
+
return (yp_encoding_unicode_table[(uint8_t) codepoint] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
|
2304
2299
|
} else {
|
2305
2300
|
return yp_unicode_codepoint_match(codepoint, unicode_isupper_codepoints, UNICODE_ISUPPER_CODEPOINTS_LENGTH) ? true : false;
|
2306
2301
|
}
|
data/src/enc/yp_windows_31j.c
CHANGED
@@ -1,73 +1,46 @@
|
|
1
1
|
#include "yarp/enc/yp_encoding.h"
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
static yp_windows_31j_codepoint_t
|
6
|
-
yp_windows_31j_codepoint(const char *c, ptrdiff_t n, size_t *width) {
|
7
|
-
const unsigned char *uc = (const unsigned char *) c;
|
8
|
-
|
3
|
+
static size_t
|
4
|
+
yp_encoding_windows_31j_char_width(const uint8_t *b, ptrdiff_t n) {
|
9
5
|
// These are the single byte characters.
|
10
|
-
if (*
|
11
|
-
|
12
|
-
return *uc;
|
6
|
+
if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
|
7
|
+
return 1;
|
13
8
|
}
|
14
9
|
|
15
10
|
// These are the double byte characters.
|
16
11
|
if (
|
17
12
|
(n > 1) &&
|
18
|
-
((
|
19
|
-
(
|
13
|
+
((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) &&
|
14
|
+
(b[1] >= 0x40 && b[1] <= 0xFC)
|
20
15
|
) {
|
21
|
-
|
22
|
-
return (yp_windows_31j_codepoint_t) (uc[0] << 8 | uc[1]);
|
16
|
+
return 2;
|
23
17
|
}
|
24
18
|
|
25
|
-
*width = 0;
|
26
19
|
return 0;
|
27
20
|
}
|
28
21
|
|
29
22
|
static size_t
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
return width;
|
35
|
-
}
|
36
|
-
|
37
|
-
static size_t
|
38
|
-
yp_encoding_windows_31j_alpha_char(const char *c, ptrdiff_t n) {
|
39
|
-
size_t width;
|
40
|
-
yp_windows_31j_codepoint_t codepoint = yp_windows_31j_codepoint(c, n, &width);
|
41
|
-
|
42
|
-
if (width == 1) {
|
43
|
-
const char value = (const char) codepoint;
|
44
|
-
return yp_encoding_ascii_alpha_char(&value, n);
|
23
|
+
yp_encoding_windows_31j_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
24
|
+
if (yp_encoding_windows_31j_char_width(b, n) == 1) {
|
25
|
+
return yp_encoding_ascii_alpha_char(b, n);
|
45
26
|
} else {
|
46
27
|
return 0;
|
47
28
|
}
|
48
29
|
}
|
49
30
|
|
50
31
|
static size_t
|
51
|
-
yp_encoding_windows_31j_alnum_char(const
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
if (width == 1) {
|
56
|
-
const char value = (const char) codepoint;
|
57
|
-
return yp_encoding_ascii_alnum_char(&value, n);
|
32
|
+
yp_encoding_windows_31j_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
33
|
+
if (yp_encoding_windows_31j_char_width(b, n) == 1) {
|
34
|
+
return yp_encoding_ascii_alnum_char(b, n);
|
58
35
|
} else {
|
59
36
|
return 0;
|
60
37
|
}
|
61
38
|
}
|
62
39
|
|
63
40
|
static bool
|
64
|
-
yp_encoding_windows_31j_isupper_char(const
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
if (width == 1) {
|
69
|
-
const char value = (const char) codepoint;
|
70
|
-
return yp_encoding_ascii_isupper_char(&value, n);
|
41
|
+
yp_encoding_windows_31j_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
42
|
+
if (yp_encoding_windows_31j_char_width(b, n) == 1) {
|
43
|
+
return yp_encoding_ascii_isupper_char(b, n);
|
71
44
|
} else {
|
72
45
|
return false;
|
73
46
|
}
|