yarp 0.8.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +48 -1
- data/Makefile +5 -1
- data/README.md +4 -3
- data/config.yml +461 -150
- data/docs/configuration.md +1 -0
- data/docs/encoding.md +5 -5
- data/docs/ruby_api.md +2 -0
- data/docs/serialization.md +3 -3
- data/docs/testing.md +2 -2
- data/ext/yarp/api_node.c +810 -199
- data/ext/yarp/extension.c +94 -31
- data/ext/yarp/extension.h +2 -2
- data/include/yarp/ast.h +653 -150
- data/include/yarp/defines.h +2 -1
- data/include/yarp/diagnostic.h +3 -3
- data/include/yarp/enc/yp_encoding.h +10 -10
- data/include/yarp/node.h +10 -0
- data/include/yarp/parser.h +19 -19
- data/include/yarp/regexp.h +1 -1
- data/include/yarp/unescape.h +7 -5
- data/include/yarp/util/yp_buffer.h +3 -0
- data/include/yarp/util/yp_char.h +16 -16
- data/include/yarp/util/yp_constant_pool.h +2 -2
- data/include/yarp/util/yp_newline_list.h +7 -4
- data/include/yarp/util/yp_string.h +4 -4
- data/include/yarp/util/yp_string_list.h +0 -3
- data/include/yarp/util/yp_strpbrk.h +1 -1
- data/include/yarp/version.h +2 -2
- data/include/yarp.h +14 -3
- data/lib/yarp/desugar_visitor.rb +204 -0
- data/lib/yarp/ffi.rb +27 -1
- data/lib/yarp/lex_compat.rb +93 -25
- data/lib/yarp/mutation_visitor.rb +683 -0
- data/lib/yarp/node.rb +3121 -597
- data/lib/yarp/serialize.rb +198 -126
- data/lib/yarp.rb +53 -7
- data/src/diagnostic.c +1 -1
- data/src/enc/yp_big5.c +15 -42
- data/src/enc/yp_euc_jp.c +16 -43
- data/src/enc/yp_gbk.c +19 -46
- data/src/enc/yp_shift_jis.c +16 -43
- data/src/enc/yp_tables.c +36 -38
- data/src/enc/yp_unicode.c +20 -25
- data/src/enc/yp_windows_31j.c +16 -43
- data/src/node.c +1444 -836
- data/src/prettyprint.c +324 -103
- data/src/regexp.c +21 -21
- data/src/serialize.c +429 -276
- data/src/token_type.c +2 -2
- data/src/unescape.c +184 -136
- data/src/util/yp_buffer.c +7 -2
- data/src/util/yp_char.c +34 -34
- data/src/util/yp_constant_pool.c +4 -4
- data/src/util/yp_memchr.c +1 -1
- data/src/util/yp_newline_list.c +14 -3
- data/src/util/yp_string.c +22 -20
- data/src/util/yp_string_list.c +0 -6
- data/src/util/yp_strncasecmp.c +3 -6
- data/src/util/yp_strpbrk.c +8 -8
- data/src/yarp.c +1504 -615
- data/yarp.gemspec +3 -1
- metadata +4 -2
data/src/regexp.c
CHANGED
@@ -2,9 +2,9 @@
|
|
2
2
|
|
3
3
|
// This is the parser that is going to handle parsing regular expressions.
|
4
4
|
typedef struct {
|
5
|
-
const
|
6
|
-
const
|
7
|
-
const
|
5
|
+
const uint8_t *start;
|
6
|
+
const uint8_t *cursor;
|
7
|
+
const uint8_t *end;
|
8
8
|
yp_string_list_t *named_captures;
|
9
9
|
bool encoding_changed;
|
10
10
|
yp_encoding_t *encoding;
|
@@ -12,7 +12,7 @@ typedef struct {
|
|
12
12
|
|
13
13
|
// This initializes a new parser with the given source.
|
14
14
|
static void
|
15
|
-
yp_regexp_parser_init(yp_regexp_parser_t *parser, const
|
15
|
+
yp_regexp_parser_init(yp_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) {
|
16
16
|
*parser = (yp_regexp_parser_t) {
|
17
17
|
.start = start,
|
18
18
|
.cursor = start,
|
@@ -25,7 +25,7 @@ yp_regexp_parser_init(yp_regexp_parser_t *parser, const char *start, const char
|
|
25
25
|
|
26
26
|
// This appends a new string to the list of named captures.
|
27
27
|
static void
|
28
|
-
yp_regexp_parser_named_capture(yp_regexp_parser_t *parser, const
|
28
|
+
yp_regexp_parser_named_capture(yp_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
29
29
|
yp_string_t string;
|
30
30
|
yp_string_shared_init(&string, start, end);
|
31
31
|
yp_string_list_append(parser->named_captures, &string);
|
@@ -40,7 +40,7 @@ yp_regexp_char_is_eof(yp_regexp_parser_t *parser) {
|
|
40
40
|
|
41
41
|
// Optionally accept a char and consume it if it exists.
|
42
42
|
static inline bool
|
43
|
-
yp_regexp_char_accept(yp_regexp_parser_t *parser,
|
43
|
+
yp_regexp_char_accept(yp_regexp_parser_t *parser, uint8_t value) {
|
44
44
|
if (!yp_regexp_char_is_eof(parser) && *parser->cursor == value) {
|
45
45
|
parser->cursor++;
|
46
46
|
return true;
|
@@ -50,7 +50,7 @@ yp_regexp_char_accept(yp_regexp_parser_t *parser, char value) {
|
|
50
50
|
|
51
51
|
// Expect a character to be present and consume it.
|
52
52
|
static inline bool
|
53
|
-
yp_regexp_char_expect(yp_regexp_parser_t *parser,
|
53
|
+
yp_regexp_char_expect(yp_regexp_parser_t *parser, uint8_t value) {
|
54
54
|
if (!yp_regexp_char_is_eof(parser) && *parser->cursor == value) {
|
55
55
|
parser->cursor++;
|
56
56
|
return true;
|
@@ -60,12 +60,12 @@ yp_regexp_char_expect(yp_regexp_parser_t *parser, char value) {
|
|
60
60
|
|
61
61
|
// This advances the current token to the next instance of the given character.
|
62
62
|
static bool
|
63
|
-
yp_regexp_char_find(yp_regexp_parser_t *parser,
|
63
|
+
yp_regexp_char_find(yp_regexp_parser_t *parser, uint8_t value) {
|
64
64
|
if (yp_regexp_char_is_eof(parser)) {
|
65
65
|
return false;
|
66
66
|
}
|
67
67
|
|
68
|
-
const
|
68
|
+
const uint8_t *end = (const uint8_t *) yp_memchr(parser->cursor, value, (size_t) (parser->end - parser->cursor), parser->encoding_changed, parser->encoding);
|
69
69
|
if (end == NULL) {
|
70
70
|
return false;
|
71
71
|
}
|
@@ -107,7 +107,7 @@ yp_regexp_char_find(yp_regexp_parser_t *parser, char value) {
|
|
107
107
|
// consumed so we're in the start state.
|
108
108
|
static bool
|
109
109
|
yp_regexp_parse_range_quantifier(yp_regexp_parser_t *parser) {
|
110
|
-
const
|
110
|
+
const uint8_t *savepoint = parser->cursor;
|
111
111
|
|
112
112
|
enum {
|
113
113
|
YP_REGEXP_RANGE_QUANTIFIER_STATE_START,
|
@@ -252,7 +252,7 @@ yp_regexp_parse_character_set(yp_regexp_parser_t *parser) {
|
|
252
252
|
// A left bracket can either mean a POSIX class or a character set.
|
253
253
|
static bool
|
254
254
|
yp_regexp_parse_lbracket(yp_regexp_parser_t *parser) {
|
255
|
-
const
|
255
|
+
const uint8_t *reset = parser->cursor;
|
256
256
|
|
257
257
|
if ((parser->cursor + 2 < parser->end) && parser->cursor[0] == '[' && parser->cursor[1] == ':') {
|
258
258
|
parser->cursor++;
|
@@ -287,7 +287,7 @@ typedef enum {
|
|
287
287
|
|
288
288
|
// This is the set of options that are configurable on the regular expression.
|
289
289
|
typedef struct {
|
290
|
-
|
290
|
+
uint8_t values[YP_REGEXP_OPTION_STATE_SLOTS];
|
291
291
|
} yp_regexp_options_t;
|
292
292
|
|
293
293
|
// Initialize a new set of options to their default values.
|
@@ -305,9 +305,9 @@ yp_regexp_options_init(yp_regexp_options_t *options) {
|
|
305
305
|
// Attempt to add the given option to the set of options. Returns true if it was
|
306
306
|
// added, false if it was already present.
|
307
307
|
static bool
|
308
|
-
yp_regexp_options_add(yp_regexp_options_t *options,
|
308
|
+
yp_regexp_options_add(yp_regexp_options_t *options, uint8_t key) {
|
309
309
|
if (key >= YP_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= YP_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
|
310
|
-
key = (
|
310
|
+
key = (uint8_t) (key - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM);
|
311
311
|
|
312
312
|
switch (options->values[key]) {
|
313
313
|
case YP_REGEXP_OPTION_STATE_INVALID:
|
@@ -328,9 +328,9 @@ yp_regexp_options_add(yp_regexp_options_t *options, unsigned char key) {
|
|
328
328
|
// Attempt to remove the given option from the set of options. Returns true if
|
329
329
|
// it was removed, false if it was already absent.
|
330
330
|
static bool
|
331
|
-
yp_regexp_options_remove(yp_regexp_options_t *options,
|
331
|
+
yp_regexp_options_remove(yp_regexp_options_t *options, uint8_t key) {
|
332
332
|
if (key >= YP_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= YP_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
|
333
|
-
key = (
|
333
|
+
key = (uint8_t) (key - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM);
|
334
334
|
|
335
335
|
switch (options->values[key]) {
|
336
336
|
case YP_REGEXP_OPTION_STATE_INVALID:
|
@@ -431,7 +431,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
|
|
431
431
|
parser->cursor++;
|
432
432
|
break;
|
433
433
|
default: { // named capture group
|
434
|
-
const
|
434
|
+
const uint8_t *start = parser->cursor;
|
435
435
|
if (!yp_regexp_char_find(parser, '>')) {
|
436
436
|
return false;
|
437
437
|
}
|
@@ -441,7 +441,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
|
|
441
441
|
}
|
442
442
|
break;
|
443
443
|
case '\'': { // named capture group
|
444
|
-
const
|
444
|
+
const uint8_t *start = ++parser->cursor;
|
445
445
|
if (!yp_regexp_char_find(parser, '\'')) {
|
446
446
|
return false;
|
447
447
|
}
|
@@ -456,7 +456,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
|
|
456
456
|
break;
|
457
457
|
case 'i': case 'm': case 'x': case 'd': case 'a': case 'u': // options
|
458
458
|
while (!yp_regexp_char_is_eof(parser) && *parser->cursor != '-' && *parser->cursor != ':' && *parser->cursor != ')') {
|
459
|
-
if (!yp_regexp_options_add(&options,
|
459
|
+
if (!yp_regexp_options_add(&options, *parser->cursor)) {
|
460
460
|
return false;
|
461
461
|
}
|
462
462
|
parser->cursor++;
|
@@ -474,7 +474,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
|
|
474
474
|
case '-':
|
475
475
|
parser->cursor++;
|
476
476
|
while (!yp_regexp_char_is_eof(parser) && *parser->cursor != ':' && *parser->cursor != ')') {
|
477
|
-
if (!yp_regexp_options_remove(&options,
|
477
|
+
if (!yp_regexp_options_remove(&options, *parser->cursor)) {
|
478
478
|
return false;
|
479
479
|
}
|
480
480
|
parser->cursor++;
|
@@ -573,7 +573,7 @@ yp_regexp_parse_pattern(yp_regexp_parser_t *parser) {
|
|
573
573
|
// Parse a regular expression and extract the names of all of the named capture
|
574
574
|
// groups.
|
575
575
|
YP_EXPORTED_FUNCTION bool
|
576
|
-
yp_regexp_named_capture_group_names(const
|
576
|
+
yp_regexp_named_capture_group_names(const uint8_t *source, size_t size, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) {
|
577
577
|
yp_regexp_parser_t parser;
|
578
578
|
yp_regexp_parser_init(&parser, source, source + size, named_captures, encoding_changed, encoding);
|
579
579
|
return yp_regexp_parse_pattern(&parser);
|