yarp 0.8.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +48 -1
- data/Makefile +5 -1
- data/README.md +4 -3
- data/config.yml +461 -150
- data/docs/configuration.md +1 -0
- data/docs/encoding.md +5 -5
- data/docs/ruby_api.md +2 -0
- data/docs/serialization.md +3 -3
- data/docs/testing.md +2 -2
- data/ext/yarp/api_node.c +810 -199
- data/ext/yarp/extension.c +94 -31
- data/ext/yarp/extension.h +2 -2
- data/include/yarp/ast.h +653 -150
- data/include/yarp/defines.h +2 -1
- data/include/yarp/diagnostic.h +3 -3
- data/include/yarp/enc/yp_encoding.h +10 -10
- data/include/yarp/node.h +10 -0
- data/include/yarp/parser.h +19 -19
- data/include/yarp/regexp.h +1 -1
- data/include/yarp/unescape.h +7 -5
- data/include/yarp/util/yp_buffer.h +3 -0
- data/include/yarp/util/yp_char.h +16 -16
- data/include/yarp/util/yp_constant_pool.h +2 -2
- data/include/yarp/util/yp_newline_list.h +7 -4
- data/include/yarp/util/yp_string.h +4 -4
- data/include/yarp/util/yp_string_list.h +0 -3
- data/include/yarp/util/yp_strpbrk.h +1 -1
- data/include/yarp/version.h +2 -2
- data/include/yarp.h +14 -3
- data/lib/yarp/desugar_visitor.rb +204 -0
- data/lib/yarp/ffi.rb +27 -1
- data/lib/yarp/lex_compat.rb +93 -25
- data/lib/yarp/mutation_visitor.rb +683 -0
- data/lib/yarp/node.rb +3121 -597
- data/lib/yarp/serialize.rb +198 -126
- data/lib/yarp.rb +53 -7
- data/src/diagnostic.c +1 -1
- data/src/enc/yp_big5.c +15 -42
- data/src/enc/yp_euc_jp.c +16 -43
- data/src/enc/yp_gbk.c +19 -46
- data/src/enc/yp_shift_jis.c +16 -43
- data/src/enc/yp_tables.c +36 -38
- data/src/enc/yp_unicode.c +20 -25
- data/src/enc/yp_windows_31j.c +16 -43
- data/src/node.c +1444 -836
- data/src/prettyprint.c +324 -103
- data/src/regexp.c +21 -21
- data/src/serialize.c +429 -276
- data/src/token_type.c +2 -2
- data/src/unescape.c +184 -136
- data/src/util/yp_buffer.c +7 -2
- data/src/util/yp_char.c +34 -34
- data/src/util/yp_constant_pool.c +4 -4
- data/src/util/yp_memchr.c +1 -1
- data/src/util/yp_newline_list.c +14 -3
- data/src/util/yp_string.c +22 -20
- data/src/util/yp_string_list.c +0 -6
- data/src/util/yp_strncasecmp.c +3 -6
- data/src/util/yp_strpbrk.c +8 -8
- data/src/yarp.c +1504 -615
- data/yarp.gemspec +3 -1
- metadata +4 -2
data/src/enc/yp_unicode.c
CHANGED
@@ -10,7 +10,7 @@ typedef uint32_t yp_unicode_codepoint_t;
|
|
10
10
|
// this table is different from other encodings where we used a lookup table
|
11
11
|
// because the indices of those tables are the byte representations, not the
|
12
12
|
// codepoints themselves.
|
13
|
-
|
13
|
+
uint8_t yp_encoding_unicode_table[256] = {
|
14
14
|
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
15
15
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
|
16
16
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
@@ -2220,7 +2220,7 @@ static const uint8_t yp_utf_8_dfa[] = {
|
|
2220
2220
|
};
|
2221
2221
|
|
2222
2222
|
static yp_unicode_codepoint_t
|
2223
|
-
yp_utf_8_codepoint(const
|
2223
|
+
yp_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
|
2224
2224
|
assert(n >= 1);
|
2225
2225
|
size_t maximum = (size_t) n;
|
2226
2226
|
|
@@ -2228,7 +2228,7 @@ yp_utf_8_codepoint(const unsigned char *c, ptrdiff_t n, size_t *width) {
|
|
2228
2228
|
uint32_t state = 0;
|
2229
2229
|
|
2230
2230
|
for (size_t index = 0; index < 4 && index < maximum; index++) {
|
2231
|
-
uint32_t byte =
|
2231
|
+
uint32_t byte = b[index];
|
2232
2232
|
uint32_t type = yp_utf_8_dfa[byte];
|
2233
2233
|
|
2234
2234
|
codepoint = (state != 0) ?
|
@@ -2247,60 +2247,55 @@ yp_utf_8_codepoint(const unsigned char *c, ptrdiff_t n, size_t *width) {
|
|
2247
2247
|
}
|
2248
2248
|
|
2249
2249
|
static size_t
|
2250
|
-
yp_encoding_utf_8_char_width(const
|
2250
|
+
yp_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
|
2251
2251
|
size_t width;
|
2252
|
-
|
2253
|
-
|
2254
|
-
yp_utf_8_codepoint(v, n, &width);
|
2252
|
+
yp_utf_8_codepoint(b, n, &width);
|
2255
2253
|
return width;
|
2256
2254
|
}
|
2257
2255
|
|
2258
2256
|
size_t
|
2259
|
-
yp_encoding_utf_8_alpha_char(const
|
2260
|
-
|
2261
|
-
|
2262
|
-
return (yp_encoding_unicode_table[*v] & YP_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
|
2257
|
+
yp_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
2258
|
+
if (*b < 0x80) {
|
2259
|
+
return (yp_encoding_unicode_table[*b] & YP_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
|
2263
2260
|
}
|
2264
2261
|
|
2265
2262
|
size_t width;
|
2266
|
-
yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(
|
2263
|
+
yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
|
2267
2264
|
|
2268
2265
|
if (codepoint <= 0xFF) {
|
2269
|
-
return (yp_encoding_unicode_table[(
|
2266
|
+
return (yp_encoding_unicode_table[(uint8_t) codepoint] & YP_ENCODING_ALPHABETIC_BIT) ? width : 0;
|
2270
2267
|
} else {
|
2271
2268
|
return yp_unicode_codepoint_match(codepoint, unicode_alpha_codepoints, UNICODE_ALPHA_CODEPOINTS_LENGTH) ? width : 0;
|
2272
2269
|
}
|
2273
2270
|
}
|
2274
2271
|
|
2275
2272
|
size_t
|
2276
|
-
yp_encoding_utf_8_alnum_char(const
|
2277
|
-
|
2278
|
-
|
2279
|
-
return (yp_encoding_unicode_table[*v] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
|
2273
|
+
yp_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
2274
|
+
if (*b < 0x80) {
|
2275
|
+
return (yp_encoding_unicode_table[*b] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
|
2280
2276
|
}
|
2281
2277
|
|
2282
2278
|
size_t width;
|
2283
|
-
yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(
|
2279
|
+
yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
|
2284
2280
|
|
2285
2281
|
if (codepoint <= 0xFF) {
|
2286
|
-
return (yp_encoding_unicode_table[(
|
2282
|
+
return (yp_encoding_unicode_table[(uint8_t) codepoint] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? width : 0;
|
2287
2283
|
} else {
|
2288
2284
|
return yp_unicode_codepoint_match(codepoint, unicode_alnum_codepoints, UNICODE_ALNUM_CODEPOINTS_LENGTH) ? width : 0;
|
2289
2285
|
}
|
2290
2286
|
}
|
2291
2287
|
|
2292
2288
|
static bool
|
2293
|
-
yp_encoding_utf_8_isupper_char(const
|
2294
|
-
|
2295
|
-
|
2296
|
-
return (yp_encoding_unicode_table[*v] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
|
2289
|
+
yp_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
2290
|
+
if (*b < 0x80) {
|
2291
|
+
return (yp_encoding_unicode_table[*b] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
|
2297
2292
|
}
|
2298
2293
|
|
2299
2294
|
size_t width;
|
2300
|
-
yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(
|
2295
|
+
yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
|
2301
2296
|
|
2302
2297
|
if (codepoint <= 0xFF) {
|
2303
|
-
return (yp_encoding_unicode_table[(
|
2298
|
+
return (yp_encoding_unicode_table[(uint8_t) codepoint] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
|
2304
2299
|
} else {
|
2305
2300
|
return yp_unicode_codepoint_match(codepoint, unicode_isupper_codepoints, UNICODE_ISUPPER_CODEPOINTS_LENGTH) ? true : false;
|
2306
2301
|
}
|
data/src/enc/yp_windows_31j.c
CHANGED
@@ -1,73 +1,46 @@
|
|
1
1
|
#include "yarp/enc/yp_encoding.h"
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
static yp_windows_31j_codepoint_t
|
6
|
-
yp_windows_31j_codepoint(const char *c, ptrdiff_t n, size_t *width) {
|
7
|
-
const unsigned char *uc = (const unsigned char *) c;
|
8
|
-
|
3
|
+
static size_t
|
4
|
+
yp_encoding_windows_31j_char_width(const uint8_t *b, ptrdiff_t n) {
|
9
5
|
// These are the single byte characters.
|
10
|
-
if (*
|
11
|
-
|
12
|
-
return *uc;
|
6
|
+
if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
|
7
|
+
return 1;
|
13
8
|
}
|
14
9
|
|
15
10
|
// These are the double byte characters.
|
16
11
|
if (
|
17
12
|
(n > 1) &&
|
18
|
-
((
|
19
|
-
(
|
13
|
+
((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) &&
|
14
|
+
(b[1] >= 0x40 && b[1] <= 0xFC)
|
20
15
|
) {
|
21
|
-
|
22
|
-
return (yp_windows_31j_codepoint_t) (uc[0] << 8 | uc[1]);
|
16
|
+
return 2;
|
23
17
|
}
|
24
18
|
|
25
|
-
*width = 0;
|
26
19
|
return 0;
|
27
20
|
}
|
28
21
|
|
29
22
|
static size_t
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
return width;
|
35
|
-
}
|
36
|
-
|
37
|
-
static size_t
|
38
|
-
yp_encoding_windows_31j_alpha_char(const char *c, ptrdiff_t n) {
|
39
|
-
size_t width;
|
40
|
-
yp_windows_31j_codepoint_t codepoint = yp_windows_31j_codepoint(c, n, &width);
|
41
|
-
|
42
|
-
if (width == 1) {
|
43
|
-
const char value = (const char) codepoint;
|
44
|
-
return yp_encoding_ascii_alpha_char(&value, n);
|
23
|
+
yp_encoding_windows_31j_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
24
|
+
if (yp_encoding_windows_31j_char_width(b, n) == 1) {
|
25
|
+
return yp_encoding_ascii_alpha_char(b, n);
|
45
26
|
} else {
|
46
27
|
return 0;
|
47
28
|
}
|
48
29
|
}
|
49
30
|
|
50
31
|
static size_t
|
51
|
-
yp_encoding_windows_31j_alnum_char(const
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
if (width == 1) {
|
56
|
-
const char value = (const char) codepoint;
|
57
|
-
return yp_encoding_ascii_alnum_char(&value, n);
|
32
|
+
yp_encoding_windows_31j_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
33
|
+
if (yp_encoding_windows_31j_char_width(b, n) == 1) {
|
34
|
+
return yp_encoding_ascii_alnum_char(b, n);
|
58
35
|
} else {
|
59
36
|
return 0;
|
60
37
|
}
|
61
38
|
}
|
62
39
|
|
63
40
|
static bool
|
64
|
-
yp_encoding_windows_31j_isupper_char(const
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
if (width == 1) {
|
69
|
-
const char value = (const char) codepoint;
|
70
|
-
return yp_encoding_ascii_isupper_char(&value, n);
|
41
|
+
yp_encoding_windows_31j_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
42
|
+
if (yp_encoding_windows_31j_char_width(b, n) == 1) {
|
43
|
+
return yp_encoding_ascii_isupper_char(b, n);
|
71
44
|
} else {
|
72
45
|
return false;
|
73
46
|
}
|