yarp 0.8.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +48 -1
- data/Makefile +5 -1
- data/README.md +4 -3
- data/config.yml +461 -150
- data/docs/configuration.md +1 -0
- data/docs/encoding.md +5 -5
- data/docs/ruby_api.md +2 -0
- data/docs/serialization.md +3 -3
- data/docs/testing.md +2 -2
- data/ext/yarp/api_node.c +810 -199
- data/ext/yarp/extension.c +94 -31
- data/ext/yarp/extension.h +2 -2
- data/include/yarp/ast.h +653 -150
- data/include/yarp/defines.h +2 -1
- data/include/yarp/diagnostic.h +3 -3
- data/include/yarp/enc/yp_encoding.h +10 -10
- data/include/yarp/node.h +10 -0
- data/include/yarp/parser.h +19 -19
- data/include/yarp/regexp.h +1 -1
- data/include/yarp/unescape.h +7 -5
- data/include/yarp/util/yp_buffer.h +3 -0
- data/include/yarp/util/yp_char.h +16 -16
- data/include/yarp/util/yp_constant_pool.h +2 -2
- data/include/yarp/util/yp_newline_list.h +7 -4
- data/include/yarp/util/yp_string.h +4 -4
- data/include/yarp/util/yp_string_list.h +0 -3
- data/include/yarp/util/yp_strpbrk.h +1 -1
- data/include/yarp/version.h +2 -2
- data/include/yarp.h +14 -3
- data/lib/yarp/desugar_visitor.rb +204 -0
- data/lib/yarp/ffi.rb +27 -1
- data/lib/yarp/lex_compat.rb +93 -25
- data/lib/yarp/mutation_visitor.rb +683 -0
- data/lib/yarp/node.rb +3121 -597
- data/lib/yarp/serialize.rb +198 -126
- data/lib/yarp.rb +53 -7
- data/src/diagnostic.c +1 -1
- data/src/enc/yp_big5.c +15 -42
- data/src/enc/yp_euc_jp.c +16 -43
- data/src/enc/yp_gbk.c +19 -46
- data/src/enc/yp_shift_jis.c +16 -43
- data/src/enc/yp_tables.c +36 -38
- data/src/enc/yp_unicode.c +20 -25
- data/src/enc/yp_windows_31j.c +16 -43
- data/src/node.c +1444 -836
- data/src/prettyprint.c +324 -103
- data/src/regexp.c +21 -21
- data/src/serialize.c +429 -276
- data/src/token_type.c +2 -2
- data/src/unescape.c +184 -136
- data/src/util/yp_buffer.c +7 -2
- data/src/util/yp_char.c +34 -34
- data/src/util/yp_constant_pool.c +4 -4
- data/src/util/yp_memchr.c +1 -1
- data/src/util/yp_newline_list.c +14 -3
- data/src/util/yp_string.c +22 -20
- data/src/util/yp_string_list.c +0 -6
- data/src/util/yp_strncasecmp.c +3 -6
- data/src/util/yp_strpbrk.c +8 -8
- data/src/yarp.c +1504 -615
- data/yarp.gemspec +3 -1
- metadata +4 -2
data/src/regexp.c
CHANGED
@@ -2,9 +2,9 @@
|
|
2
2
|
|
3
3
|
// This is the parser that is going to handle parsing regular expressions.
|
4
4
|
typedef struct {
|
5
|
-
const
|
6
|
-
const
|
7
|
-
const
|
5
|
+
const uint8_t *start;
|
6
|
+
const uint8_t *cursor;
|
7
|
+
const uint8_t *end;
|
8
8
|
yp_string_list_t *named_captures;
|
9
9
|
bool encoding_changed;
|
10
10
|
yp_encoding_t *encoding;
|
@@ -12,7 +12,7 @@ typedef struct {
|
|
12
12
|
|
13
13
|
// This initializes a new parser with the given source.
|
14
14
|
static void
|
15
|
-
yp_regexp_parser_init(yp_regexp_parser_t *parser, const
|
15
|
+
yp_regexp_parser_init(yp_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) {
|
16
16
|
*parser = (yp_regexp_parser_t) {
|
17
17
|
.start = start,
|
18
18
|
.cursor = start,
|
@@ -25,7 +25,7 @@ yp_regexp_parser_init(yp_regexp_parser_t *parser, const char *start, const char
|
|
25
25
|
|
26
26
|
// This appends a new string to the list of named captures.
|
27
27
|
static void
|
28
|
-
yp_regexp_parser_named_capture(yp_regexp_parser_t *parser, const
|
28
|
+
yp_regexp_parser_named_capture(yp_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
29
29
|
yp_string_t string;
|
30
30
|
yp_string_shared_init(&string, start, end);
|
31
31
|
yp_string_list_append(parser->named_captures, &string);
|
@@ -40,7 +40,7 @@ yp_regexp_char_is_eof(yp_regexp_parser_t *parser) {
|
|
40
40
|
|
41
41
|
// Optionally accept a char and consume it if it exists.
|
42
42
|
static inline bool
|
43
|
-
yp_regexp_char_accept(yp_regexp_parser_t *parser,
|
43
|
+
yp_regexp_char_accept(yp_regexp_parser_t *parser, uint8_t value) {
|
44
44
|
if (!yp_regexp_char_is_eof(parser) && *parser->cursor == value) {
|
45
45
|
parser->cursor++;
|
46
46
|
return true;
|
@@ -50,7 +50,7 @@ yp_regexp_char_accept(yp_regexp_parser_t *parser, char value) {
|
|
50
50
|
|
51
51
|
// Expect a character to be present and consume it.
|
52
52
|
static inline bool
|
53
|
-
yp_regexp_char_expect(yp_regexp_parser_t *parser,
|
53
|
+
yp_regexp_char_expect(yp_regexp_parser_t *parser, uint8_t value) {
|
54
54
|
if (!yp_regexp_char_is_eof(parser) && *parser->cursor == value) {
|
55
55
|
parser->cursor++;
|
56
56
|
return true;
|
@@ -60,12 +60,12 @@ yp_regexp_char_expect(yp_regexp_parser_t *parser, char value) {
|
|
60
60
|
|
61
61
|
// This advances the current token to the next instance of the given character.
|
62
62
|
static bool
|
63
|
-
yp_regexp_char_find(yp_regexp_parser_t *parser,
|
63
|
+
yp_regexp_char_find(yp_regexp_parser_t *parser, uint8_t value) {
|
64
64
|
if (yp_regexp_char_is_eof(parser)) {
|
65
65
|
return false;
|
66
66
|
}
|
67
67
|
|
68
|
-
const
|
68
|
+
const uint8_t *end = (const uint8_t *) yp_memchr(parser->cursor, value, (size_t) (parser->end - parser->cursor), parser->encoding_changed, parser->encoding);
|
69
69
|
if (end == NULL) {
|
70
70
|
return false;
|
71
71
|
}
|
@@ -107,7 +107,7 @@ yp_regexp_char_find(yp_regexp_parser_t *parser, char value) {
|
|
107
107
|
// consumed so we're in the start state.
|
108
108
|
static bool
|
109
109
|
yp_regexp_parse_range_quantifier(yp_regexp_parser_t *parser) {
|
110
|
-
const
|
110
|
+
const uint8_t *savepoint = parser->cursor;
|
111
111
|
|
112
112
|
enum {
|
113
113
|
YP_REGEXP_RANGE_QUANTIFIER_STATE_START,
|
@@ -252,7 +252,7 @@ yp_regexp_parse_character_set(yp_regexp_parser_t *parser) {
|
|
252
252
|
// A left bracket can either mean a POSIX class or a character set.
|
253
253
|
static bool
|
254
254
|
yp_regexp_parse_lbracket(yp_regexp_parser_t *parser) {
|
255
|
-
const
|
255
|
+
const uint8_t *reset = parser->cursor;
|
256
256
|
|
257
257
|
if ((parser->cursor + 2 < parser->end) && parser->cursor[0] == '[' && parser->cursor[1] == ':') {
|
258
258
|
parser->cursor++;
|
@@ -287,7 +287,7 @@ typedef enum {
|
|
287
287
|
|
288
288
|
// This is the set of options that are configurable on the regular expression.
|
289
289
|
typedef struct {
|
290
|
-
|
290
|
+
uint8_t values[YP_REGEXP_OPTION_STATE_SLOTS];
|
291
291
|
} yp_regexp_options_t;
|
292
292
|
|
293
293
|
// Initialize a new set of options to their default values.
|
@@ -305,9 +305,9 @@ yp_regexp_options_init(yp_regexp_options_t *options) {
|
|
305
305
|
// Attempt to add the given option to the set of options. Returns true if it was
|
306
306
|
// added, false if it was already present.
|
307
307
|
static bool
|
308
|
-
yp_regexp_options_add(yp_regexp_options_t *options,
|
308
|
+
yp_regexp_options_add(yp_regexp_options_t *options, uint8_t key) {
|
309
309
|
if (key >= YP_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= YP_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
|
310
|
-
key = (
|
310
|
+
key = (uint8_t) (key - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM);
|
311
311
|
|
312
312
|
switch (options->values[key]) {
|
313
313
|
case YP_REGEXP_OPTION_STATE_INVALID:
|
@@ -328,9 +328,9 @@ yp_regexp_options_add(yp_regexp_options_t *options, unsigned char key) {
|
|
328
328
|
// Attempt to remove the given option from the set of options. Returns true if
|
329
329
|
// it was removed, false if it was already absent.
|
330
330
|
static bool
|
331
|
-
yp_regexp_options_remove(yp_regexp_options_t *options,
|
331
|
+
yp_regexp_options_remove(yp_regexp_options_t *options, uint8_t key) {
|
332
332
|
if (key >= YP_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= YP_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
|
333
|
-
key = (
|
333
|
+
key = (uint8_t) (key - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM);
|
334
334
|
|
335
335
|
switch (options->values[key]) {
|
336
336
|
case YP_REGEXP_OPTION_STATE_INVALID:
|
@@ -431,7 +431,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
|
|
431
431
|
parser->cursor++;
|
432
432
|
break;
|
433
433
|
default: { // named capture group
|
434
|
-
const
|
434
|
+
const uint8_t *start = parser->cursor;
|
435
435
|
if (!yp_regexp_char_find(parser, '>')) {
|
436
436
|
return false;
|
437
437
|
}
|
@@ -441,7 +441,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
|
|
441
441
|
}
|
442
442
|
break;
|
443
443
|
case '\'': { // named capture group
|
444
|
-
const
|
444
|
+
const uint8_t *start = ++parser->cursor;
|
445
445
|
if (!yp_regexp_char_find(parser, '\'')) {
|
446
446
|
return false;
|
447
447
|
}
|
@@ -456,7 +456,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
|
|
456
456
|
break;
|
457
457
|
case 'i': case 'm': case 'x': case 'd': case 'a': case 'u': // options
|
458
458
|
while (!yp_regexp_char_is_eof(parser) && *parser->cursor != '-' && *parser->cursor != ':' && *parser->cursor != ')') {
|
459
|
-
if (!yp_regexp_options_add(&options,
|
459
|
+
if (!yp_regexp_options_add(&options, *parser->cursor)) {
|
460
460
|
return false;
|
461
461
|
}
|
462
462
|
parser->cursor++;
|
@@ -474,7 +474,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
|
|
474
474
|
case '-':
|
475
475
|
parser->cursor++;
|
476
476
|
while (!yp_regexp_char_is_eof(parser) && *parser->cursor != ':' && *parser->cursor != ')') {
|
477
|
-
if (!yp_regexp_options_remove(&options,
|
477
|
+
if (!yp_regexp_options_remove(&options, *parser->cursor)) {
|
478
478
|
return false;
|
479
479
|
}
|
480
480
|
parser->cursor++;
|
@@ -573,7 +573,7 @@ yp_regexp_parse_pattern(yp_regexp_parser_t *parser) {
|
|
573
573
|
// Parse a regular expression and extract the names of all of the named capture
|
574
574
|
// groups.
|
575
575
|
YP_EXPORTED_FUNCTION bool
|
576
|
-
yp_regexp_named_capture_group_names(const
|
576
|
+
yp_regexp_named_capture_group_names(const uint8_t *source, size_t size, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) {
|
577
577
|
yp_regexp_parser_t parser;
|
578
578
|
yp_regexp_parser_init(&parser, source, source + size, named_captures, encoding_changed, encoding);
|
579
579
|
return yp_regexp_parse_pattern(&parser);
|