yarp 0.9.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -1
- data/Makefile +5 -1
- data/config.yml +156 -125
- data/docs/encoding.md +5 -5
- data/docs/serialization.md +2 -2
- data/ext/yarp/api_node.c +142 -98
- data/ext/yarp/extension.c +21 -7
- data/ext/yarp/extension.h +1 -1
- data/include/yarp/ast.h +327 -18
- data/include/yarp/defines.h +2 -1
- data/include/yarp/diagnostic.h +3 -3
- data/include/yarp/enc/yp_encoding.h +10 -10
- data/include/yarp/parser.h +19 -19
- data/include/yarp/regexp.h +1 -1
- data/include/yarp/unescape.h +4 -4
- data/include/yarp/util/yp_buffer.h +3 -0
- data/include/yarp/util/yp_char.h +16 -16
- data/include/yarp/util/yp_constant_pool.h +2 -2
- data/include/yarp/util/yp_newline_list.h +5 -5
- data/include/yarp/util/yp_string.h +4 -4
- data/include/yarp/util/yp_string_list.h +0 -3
- data/include/yarp/util/yp_strpbrk.h +1 -1
- data/include/yarp/version.h +2 -2
- data/include/yarp.h +5 -4
- data/lib/yarp/desugar_visitor.rb +59 -122
- data/lib/yarp/node.rb +230 -240
- data/lib/yarp/serialize.rb +16 -16
- data/lib/yarp.rb +5 -5
- data/src/diagnostic.c +1 -1
- data/src/enc/yp_big5.c +15 -42
- data/src/enc/yp_euc_jp.c +16 -43
- data/src/enc/yp_gbk.c +19 -46
- data/src/enc/yp_shift_jis.c +16 -43
- data/src/enc/yp_tables.c +36 -38
- data/src/enc/yp_unicode.c +20 -25
- data/src/enc/yp_windows_31j.c +16 -43
- data/src/node.c +1271 -899
- data/src/prettyprint.c +87 -48
- data/src/regexp.c +21 -21
- data/src/serialize.c +28 -15
- data/src/unescape.c +151 -121
- data/src/util/yp_buffer.c +7 -2
- data/src/util/yp_char.c +34 -34
- data/src/util/yp_constant_pool.c +4 -4
- data/src/util/yp_memchr.c +1 -1
- data/src/util/yp_newline_list.c +5 -4
- data/src/util/yp_string.c +22 -20
- data/src/util/yp_string_list.c +0 -6
- data/src/util/yp_strncasecmp.c +3 -6
- data/src/util/yp_strpbrk.c +8 -8
- data/src/yarp.c +355 -216
- data/yarp.gemspec +1 -1
- metadata +2 -2
data/src/enc/yp_tables.c
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
// Each element of the following table contains a bitfield that indicates a
|
4
4
|
// piece of information about the corresponding ASCII character.
|
5
|
-
static
|
5
|
+
static uint8_t yp_encoding_ascii_table[256] = {
|
6
6
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
7
7
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
8
8
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -24,7 +24,7 @@ static unsigned char yp_encoding_ascii_table[256] = {
|
|
24
24
|
|
25
25
|
// Each element of the following table contains a bitfield that indicates a
|
26
26
|
// piece of information about the corresponding ISO-8859-1 character.
|
27
|
-
static
|
27
|
+
static uint8_t yp_encoding_iso_8859_1_table[256] = {
|
28
28
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
29
29
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
30
30
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -46,7 +46,7 @@ static unsigned char yp_encoding_iso_8859_1_table[256] = {
|
|
46
46
|
|
47
47
|
// Each element of the following table contains a bitfield that indicates a
|
48
48
|
// piece of information about the corresponding ISO-8859-2 character.
|
49
|
-
static
|
49
|
+
static uint8_t yp_encoding_iso_8859_2_table[256] = {
|
50
50
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
51
51
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
52
52
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -68,7 +68,7 @@ static unsigned char yp_encoding_iso_8859_2_table[256] = {
|
|
68
68
|
|
69
69
|
// Each element of the following table contains a bitfield that indicates a
|
70
70
|
// piece of information about the corresponding ISO-8859-3 character.
|
71
|
-
static
|
71
|
+
static uint8_t yp_encoding_iso_8859_3_table[256] = {
|
72
72
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
73
73
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
74
74
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -90,7 +90,7 @@ static unsigned char yp_encoding_iso_8859_3_table[256] = {
|
|
90
90
|
|
91
91
|
// Each element of the following table contains a bitfield that indicates a
|
92
92
|
// piece of information about the corresponding ISO-8859-4 character.
|
93
|
-
static
|
93
|
+
static uint8_t yp_encoding_iso_8859_4_table[256] = {
|
94
94
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
95
95
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
96
96
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -112,7 +112,7 @@ static unsigned char yp_encoding_iso_8859_4_table[256] = {
|
|
112
112
|
|
113
113
|
// Each element of the following table contains a bitfield that indicates a
|
114
114
|
// piece of information about the corresponding ISO-8859-5 character.
|
115
|
-
static
|
115
|
+
static uint8_t yp_encoding_iso_8859_5_table[256] = {
|
116
116
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
117
117
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
118
118
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -134,7 +134,7 @@ static unsigned char yp_encoding_iso_8859_5_table[256] = {
|
|
134
134
|
|
135
135
|
// Each element of the following table contains a bitfield that indicates a
|
136
136
|
// piece of information about the corresponding ISO-8859-6 character.
|
137
|
-
static
|
137
|
+
static uint8_t yp_encoding_iso_8859_6_table[256] = {
|
138
138
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
139
139
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
140
140
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -156,7 +156,7 @@ static unsigned char yp_encoding_iso_8859_6_table[256] = {
|
|
156
156
|
|
157
157
|
// Each element of the following table contains a bitfield that indicates a
|
158
158
|
// piece of information about the corresponding ISO-8859-7 character.
|
159
|
-
static
|
159
|
+
static uint8_t yp_encoding_iso_8859_7_table[256] = {
|
160
160
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
161
161
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
162
162
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -178,7 +178,7 @@ static unsigned char yp_encoding_iso_8859_7_table[256] = {
|
|
178
178
|
|
179
179
|
// Each element of the following table contains a bitfield that indicates a
|
180
180
|
// piece of information about the corresponding ISO-8859-8 character.
|
181
|
-
static
|
181
|
+
static uint8_t yp_encoding_iso_8859_8_table[256] = {
|
182
182
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
183
183
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
184
184
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -200,7 +200,7 @@ static unsigned char yp_encoding_iso_8859_8_table[256] = {
|
|
200
200
|
|
201
201
|
// Each element of the following table contains a bitfield that indicates a
|
202
202
|
// piece of information about the corresponding ISO-8859-9 character.
|
203
|
-
static
|
203
|
+
static uint8_t yp_encoding_iso_8859_9_table[256] = {
|
204
204
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
205
205
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
206
206
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -222,7 +222,7 @@ static unsigned char yp_encoding_iso_8859_9_table[256] = {
|
|
222
222
|
|
223
223
|
// Each element of the following table contains a bitfield that indicates a
|
224
224
|
// piece of information about the corresponding ISO-8859-10 character.
|
225
|
-
static
|
225
|
+
static uint8_t yp_encoding_iso_8859_10_table[256] = {
|
226
226
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
227
227
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
228
228
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -244,7 +244,7 @@ static unsigned char yp_encoding_iso_8859_10_table[256] = {
|
|
244
244
|
|
245
245
|
// Each element of the following table contains a bitfield that indicates a
|
246
246
|
// piece of information about the corresponding ISO-8859-11 character.
|
247
|
-
static
|
247
|
+
static uint8_t yp_encoding_iso_8859_11_table[256] = {
|
248
248
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
249
249
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
250
250
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -266,7 +266,7 @@ static unsigned char yp_encoding_iso_8859_11_table[256] = {
|
|
266
266
|
|
267
267
|
// Each element of the following table contains a bitfield that indicates a
|
268
268
|
// piece of information about the corresponding ISO-8859-13 character.
|
269
|
-
static
|
269
|
+
static uint8_t yp_encoding_iso_8859_13_table[256] = {
|
270
270
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
271
271
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
272
272
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -288,7 +288,7 @@ static unsigned char yp_encoding_iso_8859_13_table[256] = {
|
|
288
288
|
|
289
289
|
// Each element of the following table contains a bitfield that indicates a
|
290
290
|
// piece of information about the corresponding ISO-8859-14 character.
|
291
|
-
static
|
291
|
+
static uint8_t yp_encoding_iso_8859_14_table[256] = {
|
292
292
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
293
293
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
294
294
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -310,7 +310,7 @@ static unsigned char yp_encoding_iso_8859_14_table[256] = {
|
|
310
310
|
|
311
311
|
// Each element of the following table contains a bitfield that indicates a
|
312
312
|
// piece of information about the corresponding ISO-8859-15 character.
|
313
|
-
static
|
313
|
+
static uint8_t yp_encoding_iso_8859_15_table[256] = {
|
314
314
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
315
315
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
316
316
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -332,7 +332,7 @@ static unsigned char yp_encoding_iso_8859_15_table[256] = {
|
|
332
332
|
|
333
333
|
// Each element of the following table contains a bitfield that indicates a
|
334
334
|
// piece of information about the corresponding ISO-8859-16 character.
|
335
|
-
static
|
335
|
+
static uint8_t yp_encoding_iso_8859_16_table[256] = {
|
336
336
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
337
337
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
338
338
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -354,7 +354,7 @@ static unsigned char yp_encoding_iso_8859_16_table[256] = {
|
|
354
354
|
|
355
355
|
// Each element of the following table contains a bitfield that indicates a
|
356
356
|
// piece of information about the corresponding KOI8-R character.
|
357
|
-
static
|
357
|
+
static uint8_t yp_encoding_koi8_r_table[256] = {
|
358
358
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
359
359
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
360
360
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -376,7 +376,7 @@ static unsigned char yp_encoding_koi8_r_table[256] = {
|
|
376
376
|
|
377
377
|
// Each element of the following table contains a bitfield that indicates a
|
378
378
|
// piece of information about the corresponding windows-1251 character.
|
379
|
-
static
|
379
|
+
static uint8_t yp_encoding_windows_1251_table[256] = {
|
380
380
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
381
381
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
382
382
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -398,7 +398,7 @@ static unsigned char yp_encoding_windows_1251_table[256] = {
|
|
398
398
|
|
399
399
|
// Each element of the following table contains a bitfield that indicates a
|
400
400
|
// piece of information about the corresponding windows-1252 character.
|
401
|
-
static
|
401
|
+
static uint8_t yp_encoding_windows_1252_table[256] = {
|
402
402
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
403
403
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
404
404
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -419,34 +419,32 @@ static unsigned char yp_encoding_windows_1252_table[256] = {
|
|
419
419
|
};
|
420
420
|
|
421
421
|
static size_t
|
422
|
-
yp_encoding_ascii_char_width(const
|
423
|
-
|
424
|
-
return v < 0x80 ? 1 : 0;
|
422
|
+
yp_encoding_ascii_char_width(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
423
|
+
return *b < 0x80 ? 1 : 0;
|
425
424
|
}
|
426
425
|
|
427
426
|
size_t
|
428
|
-
yp_encoding_ascii_alpha_char(const
|
429
|
-
return (yp_encoding_ascii_table[
|
427
|
+
yp_encoding_ascii_alpha_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
428
|
+
return (yp_encoding_ascii_table[*b] & YP_ENCODING_ALPHABETIC_BIT);
|
430
429
|
}
|
431
430
|
|
432
431
|
size_t
|
433
|
-
yp_encoding_ascii_alnum_char(const
|
434
|
-
return (yp_encoding_ascii_table[
|
432
|
+
yp_encoding_ascii_alnum_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
433
|
+
return (yp_encoding_ascii_table[*b] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
|
435
434
|
}
|
436
435
|
|
437
436
|
bool
|
438
|
-
yp_encoding_ascii_isupper_char(const
|
439
|
-
return (yp_encoding_ascii_table[
|
437
|
+
yp_encoding_ascii_isupper_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
438
|
+
return (yp_encoding_ascii_table[*b] & YP_ENCODING_UPPERCASE_BIT);
|
440
439
|
}
|
441
440
|
|
442
441
|
static size_t
|
443
|
-
yp_encoding_koi8_r_char_width(const
|
444
|
-
|
445
|
-
return ((v >= 0x20 && v <= 0x7E) || (v >= 0x80)) ? 1 : 0;
|
442
|
+
yp_encoding_koi8_r_char_width(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
443
|
+
return ((*b >= 0x20 && *b <= 0x7E) || (*b >= 0x80)) ? 1 : 0;
|
446
444
|
}
|
447
445
|
|
448
446
|
static size_t
|
449
|
-
yp_encoding_single_char_width(YP_ATTRIBUTE_UNUSED const
|
447
|
+
yp_encoding_single_char_width(YP_ATTRIBUTE_UNUSED const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) {
|
450
448
|
return 1;
|
451
449
|
}
|
452
450
|
|
@@ -469,14 +467,14 @@ yp_encoding_t yp_encoding_ascii_8bit = {
|
|
469
467
|
};
|
470
468
|
|
471
469
|
#define YP_ENCODING_TABLE(s, i, w) \
|
472
|
-
static size_t yp_encoding_ ##i ## _alpha_char(const
|
473
|
-
return (yp_encoding_ ##i ## _table[
|
470
|
+
static size_t yp_encoding_ ##i ## _alpha_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
|
471
|
+
return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_ALPHABETIC_BIT); \
|
474
472
|
} \
|
475
|
-
static size_t yp_encoding_ ##i ## _alnum_char(const
|
476
|
-
return (yp_encoding_ ##i ## _table[
|
473
|
+
static size_t yp_encoding_ ##i ## _alnum_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
|
474
|
+
return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
|
477
475
|
} \
|
478
|
-
static bool yp_encoding_ ##i ## _isupper_char(const
|
479
|
-
return (yp_encoding_ ##i ## _table[
|
476
|
+
static bool yp_encoding_ ##i ## _isupper_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n) { \
|
477
|
+
return (yp_encoding_ ##i ## _table[*b] & YP_ENCODING_UPPERCASE_BIT); \
|
480
478
|
} \
|
481
479
|
yp_encoding_t yp_encoding_ ##i = { \
|
482
480
|
.name = s, \
|
data/src/enc/yp_unicode.c
CHANGED
@@ -10,7 +10,7 @@ typedef uint32_t yp_unicode_codepoint_t;
|
|
10
10
|
// this table is different from other encodings where we used a lookup table
|
11
11
|
// because the indices of those tables are the byte representations, not the
|
12
12
|
// codepoints themselves.
|
13
|
-
|
13
|
+
uint8_t yp_encoding_unicode_table[256] = {
|
14
14
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
15
15
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
16
16
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -2220,7 +2220,7 @@ static const uint8_t yp_utf_8_dfa[] = {
|
|
2220
2220
|
};
|
2221
2221
|
|
2222
2222
|
static yp_unicode_codepoint_t
|
2223
|
-
yp_utf_8_codepoint(const
|
2223
|
+
yp_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
|
2224
2224
|
assert(n >= 1);
|
2225
2225
|
size_t maximum = (size_t) n;
|
2226
2226
|
|
@@ -2228,7 +2228,7 @@ yp_utf_8_codepoint(const unsigned char *c, ptrdiff_t n, size_t *width) {
|
|
2228
2228
|
uint32_t state = 0;
|
2229
2229
|
|
2230
2230
|
for (size_t index = 0; index < 4 && index < maximum; index++) {
|
2231
|
-
uint32_t byte =
|
2231
|
+
uint32_t byte = b[index];
|
2232
2232
|
uint32_t type = yp_utf_8_dfa[byte];
|
2233
2233
|
|
2234
2234
|
codepoint = (state != 0) ?
|
@@ -2247,60 +2247,55 @@ yp_utf_8_codepoint(const unsigned char *c, ptrdiff_t n, size_t *width) {
|
|
2247
2247
|
}
|
2248
2248
|
|
2249
2249
|
static size_t
|
2250
|
-
yp_encoding_utf_8_char_width(const
|
2250
|
+
yp_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
|
2251
2251
|
size_t width;
|
2252
|
-
|
2253
|
-
|
2254
|
-
yp_utf_8_codepoint(v, n, &width);
|
2252
|
+
yp_utf_8_codepoint(b, n, &width);
|
2255
2253
|
return width;
|
2256
2254
|
}
|
2257
2255
|
|
2258
2256
|
size_t
|
2259
|
-
yp_encoding_utf_8_alpha_char(const
|
2260
|
-
|
2261
|
-
|
2262
|
-
return (yp_encoding_unicode_table[*v] & YP_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
|
2257
|
+
yp_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
2258
|
+
if (*b < 0x80) {
|
2259
|
+
return (yp_encoding_unicode_table[*b] & YP_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
|
2263
2260
|
}
|
2264
2261
|
|
2265
2262
|
size_t width;
|
2266
|
-
yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(
|
2263
|
+
yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
|
2267
2264
|
|
2268
2265
|
if (codepoint <= 0xFF) {
|
2269
|
-
return (yp_encoding_unicode_table[(
|
2266
|
+
return (yp_encoding_unicode_table[(uint8_t) codepoint] & YP_ENCODING_ALPHABETIC_BIT) ? width : 0;
|
2270
2267
|
} else {
|
2271
2268
|
return yp_unicode_codepoint_match(codepoint, unicode_alpha_codepoints, UNICODE_ALPHA_CODEPOINTS_LENGTH) ? width : 0;
|
2272
2269
|
}
|
2273
2270
|
}
|
2274
2271
|
|
2275
2272
|
size_t
|
2276
|
-
yp_encoding_utf_8_alnum_char(const
|
2277
|
-
|
2278
|
-
|
2279
|
-
return (yp_encoding_unicode_table[*v] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
|
2273
|
+
yp_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
2274
|
+
if (*b < 0x80) {
|
2275
|
+
return (yp_encoding_unicode_table[*b] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
|
2280
2276
|
}
|
2281
2277
|
|
2282
2278
|
size_t width;
|
2283
|
-
yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(
|
2279
|
+
yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
|
2284
2280
|
|
2285
2281
|
if (codepoint <= 0xFF) {
|
2286
|
-
return (yp_encoding_unicode_table[(
|
2282
|
+
return (yp_encoding_unicode_table[(uint8_t) codepoint] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? width : 0;
|
2287
2283
|
} else {
|
2288
2284
|
return yp_unicode_codepoint_match(codepoint, unicode_alnum_codepoints, UNICODE_ALNUM_CODEPOINTS_LENGTH) ? width : 0;
|
2289
2285
|
}
|
2290
2286
|
}
|
2291
2287
|
|
2292
2288
|
static bool
|
2293
|
-
yp_encoding_utf_8_isupper_char(const
|
2294
|
-
|
2295
|
-
|
2296
|
-
return (yp_encoding_unicode_table[*v] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
|
2289
|
+
yp_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
2290
|
+
if (*b < 0x80) {
|
2291
|
+
return (yp_encoding_unicode_table[*b] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
|
2297
2292
|
}
|
2298
2293
|
|
2299
2294
|
size_t width;
|
2300
|
-
yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(
|
2295
|
+
yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
|
2301
2296
|
|
2302
2297
|
if (codepoint <= 0xFF) {
|
2303
|
-
return (yp_encoding_unicode_table[(
|
2298
|
+
return (yp_encoding_unicode_table[(uint8_t) codepoint] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
|
2304
2299
|
} else {
|
2305
2300
|
return yp_unicode_codepoint_match(codepoint, unicode_isupper_codepoints, UNICODE_ISUPPER_CODEPOINTS_LENGTH) ? true : false;
|
2306
2301
|
}
|
data/src/enc/yp_windows_31j.c
CHANGED
@@ -1,73 +1,46 @@
|
|
1
1
|
#include "yarp/enc/yp_encoding.h"
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
static yp_windows_31j_codepoint_t
|
6
|
-
yp_windows_31j_codepoint(const char *c, ptrdiff_t n, size_t *width) {
|
7
|
-
const unsigned char *uc = (const unsigned char *) c;
|
8
|
-
|
3
|
+
static size_t
|
4
|
+
yp_encoding_windows_31j_char_width(const uint8_t *b, ptrdiff_t n) {
|
9
5
|
// These are the single byte characters.
|
10
|
-
if (*
|
11
|
-
|
12
|
-
return *uc;
|
6
|
+
if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
|
7
|
+
return 1;
|
13
8
|
}
|
14
9
|
|
15
10
|
// These are the double byte characters.
|
16
11
|
if (
|
17
12
|
(n > 1) &&
|
18
|
-
((
|
19
|
-
(
|
13
|
+
((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) &&
|
14
|
+
(b[1] >= 0x40 && b[1] <= 0xFC)
|
20
15
|
) {
|
21
|
-
|
22
|
-
return (yp_windows_31j_codepoint_t) (uc[0] << 8 | uc[1]);
|
16
|
+
return 2;
|
23
17
|
}
|
24
18
|
|
25
|
-
*width = 0;
|
26
19
|
return 0;
|
27
20
|
}
|
28
21
|
|
29
22
|
static size_t
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
return width;
|
35
|
-
}
|
36
|
-
|
37
|
-
static size_t
|
38
|
-
yp_encoding_windows_31j_alpha_char(const char *c, ptrdiff_t n) {
|
39
|
-
size_t width;
|
40
|
-
yp_windows_31j_codepoint_t codepoint = yp_windows_31j_codepoint(c, n, &width);
|
41
|
-
|
42
|
-
if (width == 1) {
|
43
|
-
const char value = (const char) codepoint;
|
44
|
-
return yp_encoding_ascii_alpha_char(&value, n);
|
23
|
+
yp_encoding_windows_31j_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
24
|
+
if (yp_encoding_windows_31j_char_width(b, n) == 1) {
|
25
|
+
return yp_encoding_ascii_alpha_char(b, n);
|
45
26
|
} else {
|
46
27
|
return 0;
|
47
28
|
}
|
48
29
|
}
|
49
30
|
|
50
31
|
static size_t
|
51
|
-
yp_encoding_windows_31j_alnum_char(const
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
if (width == 1) {
|
56
|
-
const char value = (const char) codepoint;
|
57
|
-
return yp_encoding_ascii_alnum_char(&value, n);
|
32
|
+
yp_encoding_windows_31j_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
33
|
+
if (yp_encoding_windows_31j_char_width(b, n) == 1) {
|
34
|
+
return yp_encoding_ascii_alnum_char(b, n);
|
58
35
|
} else {
|
59
36
|
return 0;
|
60
37
|
}
|
61
38
|
}
|
62
39
|
|
63
40
|
static bool
|
64
|
-
yp_encoding_windows_31j_isupper_char(const
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
if (width == 1) {
|
69
|
-
const char value = (const char) codepoint;
|
70
|
-
return yp_encoding_ascii_isupper_char(&value, n);
|
41
|
+
yp_encoding_windows_31j_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
42
|
+
if (yp_encoding_windows_31j_char_width(b, n) == 1) {
|
43
|
+
return yp_encoding_ascii_isupper_char(b, n);
|
71
44
|
} else {
|
72
45
|
return false;
|
73
46
|
}
|