yarp 0.8.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +48 -1
- data/Makefile +5 -1
- data/README.md +4 -3
- data/config.yml +461 -150
- data/docs/configuration.md +1 -0
- data/docs/encoding.md +5 -5
- data/docs/ruby_api.md +2 -0
- data/docs/serialization.md +3 -3
- data/docs/testing.md +2 -2
- data/ext/yarp/api_node.c +810 -199
- data/ext/yarp/extension.c +94 -31
- data/ext/yarp/extension.h +2 -2
- data/include/yarp/ast.h +653 -150
- data/include/yarp/defines.h +2 -1
- data/include/yarp/diagnostic.h +3 -3
- data/include/yarp/enc/yp_encoding.h +10 -10
- data/include/yarp/node.h +10 -0
- data/include/yarp/parser.h +19 -19
- data/include/yarp/regexp.h +1 -1
- data/include/yarp/unescape.h +7 -5
- data/include/yarp/util/yp_buffer.h +3 -0
- data/include/yarp/util/yp_char.h +16 -16
- data/include/yarp/util/yp_constant_pool.h +2 -2
- data/include/yarp/util/yp_newline_list.h +7 -4
- data/include/yarp/util/yp_string.h +4 -4
- data/include/yarp/util/yp_string_list.h +0 -3
- data/include/yarp/util/yp_strpbrk.h +1 -1
- data/include/yarp/version.h +2 -2
- data/include/yarp.h +14 -3
- data/lib/yarp/desugar_visitor.rb +204 -0
- data/lib/yarp/ffi.rb +27 -1
- data/lib/yarp/lex_compat.rb +93 -25
- data/lib/yarp/mutation_visitor.rb +683 -0
- data/lib/yarp/node.rb +3121 -597
- data/lib/yarp/serialize.rb +198 -126
- data/lib/yarp.rb +53 -7
- data/src/diagnostic.c +1 -1
- data/src/enc/yp_big5.c +15 -42
- data/src/enc/yp_euc_jp.c +16 -43
- data/src/enc/yp_gbk.c +19 -46
- data/src/enc/yp_shift_jis.c +16 -43
- data/src/enc/yp_tables.c +36 -38
- data/src/enc/yp_unicode.c +20 -25
- data/src/enc/yp_windows_31j.c +16 -43
- data/src/node.c +1444 -836
- data/src/prettyprint.c +324 -103
- data/src/regexp.c +21 -21
- data/src/serialize.c +429 -276
- data/src/token_type.c +2 -2
- data/src/unescape.c +184 -136
- data/src/util/yp_buffer.c +7 -2
- data/src/util/yp_char.c +34 -34
- data/src/util/yp_constant_pool.c +4 -4
- data/src/util/yp_memchr.c +1 -1
- data/src/util/yp_newline_list.c +14 -3
- data/src/util/yp_string.c +22 -20
- data/src/util/yp_string_list.c +0 -6
- data/src/util/yp_strncasecmp.c +3 -6
- data/src/util/yp_strpbrk.c +8 -8
- data/src/yarp.c +1504 -615
- data/yarp.gemspec +3 -1
- metadata +4 -2
data/include/yarp/defines.h
CHANGED
@@ -6,6 +6,7 @@
|
|
6
6
|
#include <ctype.h>
|
7
7
|
#include <stdarg.h>
|
8
8
|
#include <stddef.h>
|
9
|
+
#include <stdint.h>
|
9
10
|
#include <stdio.h>
|
10
11
|
#include <string.h>
|
11
12
|
|
@@ -39,6 +40,6 @@
|
|
39
40
|
# define snprintf _snprintf
|
40
41
|
#endif
|
41
42
|
|
42
|
-
int yp_strncasecmp(const
|
43
|
+
int yp_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length);
|
43
44
|
|
44
45
|
#endif
|
data/include/yarp/diagnostic.h
CHANGED
@@ -10,13 +10,13 @@
|
|
10
10
|
// This struct represents a diagnostic found during parsing.
|
11
11
|
typedef struct {
|
12
12
|
yp_list_node_t node;
|
13
|
-
const
|
14
|
-
const
|
13
|
+
const uint8_t *start;
|
14
|
+
const uint8_t *end;
|
15
15
|
const char *message;
|
16
16
|
} yp_diagnostic_t;
|
17
17
|
|
18
18
|
// Append a diagnostic to the given list of diagnostics.
|
19
|
-
bool yp_diagnostic_list_append(yp_list_t *list, const
|
19
|
+
bool yp_diagnostic_list_append(yp_list_t *list, const uint8_t *start, const uint8_t *end, const char *message);
|
20
20
|
|
21
21
|
// Deallocate the internal state of the given diagnostic list.
|
22
22
|
void yp_diagnostic_list_free(yp_list_t *list);
|
@@ -16,22 +16,22 @@ typedef struct {
|
|
16
16
|
// Return the number of bytes that the next character takes if it is valid
|
17
17
|
// in the encoding. Does not read more than n bytes. It is assumed that n is
|
18
18
|
// at least 1.
|
19
|
-
size_t (*char_width)(const
|
19
|
+
size_t (*char_width)(const uint8_t *b, ptrdiff_t n);
|
20
20
|
|
21
21
|
// Return the number of bytes that the next character takes if it is valid
|
22
22
|
// in the encoding and is alphabetical. Does not read more than n bytes. It
|
23
23
|
// is assumed that n is at least 1.
|
24
|
-
size_t (*alpha_char)(const
|
24
|
+
size_t (*alpha_char)(const uint8_t *b, ptrdiff_t n);
|
25
25
|
|
26
26
|
// Return the number of bytes that the next character takes if it is valid
|
27
27
|
// in the encoding and is alphanumeric. Does not read more than n bytes. It
|
28
28
|
// is assumed that n is at least 1.
|
29
|
-
size_t (*alnum_char)(const
|
29
|
+
size_t (*alnum_char)(const uint8_t *b, ptrdiff_t n);
|
30
30
|
|
31
31
|
// Return true if the next character is valid in the encoding and is an
|
32
32
|
// uppercase character. Does not read more than n bytes. It is assumed that
|
33
33
|
// n is at least 1.
|
34
|
-
bool (*isupper_char)(const
|
34
|
+
bool (*isupper_char)(const uint8_t *b, ptrdiff_t n);
|
35
35
|
|
36
36
|
// The name of the encoding. This should correspond to a value that can be
|
37
37
|
// passed to Encoding.find in Ruby.
|
@@ -49,18 +49,18 @@ typedef struct {
|
|
49
49
|
|
50
50
|
// These functions are reused by some other encodings, so they are defined here
|
51
51
|
// so they can be shared.
|
52
|
-
size_t yp_encoding_ascii_alpha_char(const
|
53
|
-
size_t yp_encoding_ascii_alnum_char(const
|
54
|
-
bool yp_encoding_ascii_isupper_char(const
|
52
|
+
size_t yp_encoding_ascii_alpha_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n);
|
53
|
+
size_t yp_encoding_ascii_alnum_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n);
|
54
|
+
bool yp_encoding_ascii_isupper_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n);
|
55
55
|
|
56
56
|
// These functions are shared between the actual encoding and the fast path in
|
57
57
|
// the parser so they need to be internally visible.
|
58
|
-
size_t yp_encoding_utf_8_alpha_char(const
|
59
|
-
size_t yp_encoding_utf_8_alnum_char(const
|
58
|
+
size_t yp_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n);
|
59
|
+
size_t yp_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n);
|
60
60
|
|
61
61
|
// This lookup table is referenced in both the UTF-8 encoding file and the
|
62
62
|
// parser directly in order to speed up the default encoding processing.
|
63
|
-
extern
|
63
|
+
extern uint8_t yp_encoding_unicode_table[256];
|
64
64
|
|
65
65
|
// These are the encodings that are supported by the parser. They are defined in
|
66
66
|
// their own files in the src/enc directory.
|
data/include/yarp/node.h
CHANGED
@@ -34,3 +34,13 @@ YP_EXPORTED_FUNCTION const char * yp_node_type_to_str(yp_node_type_t node_type);
|
|
34
34
|
#define YP_EMPTY_LOCATION_LIST ((yp_location_list_t) { .locations = NULL, .size = 0, .capacity = 0 })
|
35
35
|
|
36
36
|
#endif // YARP_NODE_H
|
37
|
+
|
38
|
+
// ScopeNodes are helper nodes, and will never
|
39
|
+
// be part of the AST. We manually declare them
|
40
|
+
// here to avoid generating them
|
41
|
+
typedef struct yp_scope_node {
|
42
|
+
yp_node_t base;
|
43
|
+
struct yp_parameters_node *parameters;
|
44
|
+
yp_node_t *body;
|
45
|
+
yp_constant_id_list_t locals;
|
46
|
+
} yp_scope_node_t;
|
data/include/yarp/parser.h
CHANGED
@@ -109,14 +109,14 @@ typedef struct yp_lex_mode {
|
|
109
109
|
|
110
110
|
// When lexing a list, it takes into account balancing the
|
111
111
|
// terminator if the terminator is one of (), [], {}, or <>.
|
112
|
-
|
112
|
+
uint8_t incrementor;
|
113
113
|
|
114
114
|
// This is the terminator of the list literal.
|
115
|
-
|
115
|
+
uint8_t terminator;
|
116
116
|
|
117
117
|
// This is the character set that should be used to delimit the
|
118
118
|
// tokens within the list.
|
119
|
-
|
119
|
+
uint8_t breakpoints[11];
|
120
120
|
} list;
|
121
121
|
|
122
122
|
struct {
|
@@ -125,14 +125,14 @@ typedef struct yp_lex_mode {
|
|
125
125
|
|
126
126
|
// When lexing a regular expression, it takes into account balancing
|
127
127
|
// the terminator if the terminator is one of (), [], {}, or <>.
|
128
|
-
|
128
|
+
uint8_t incrementor;
|
129
129
|
|
130
130
|
// This is the terminator of the regular expression.
|
131
|
-
|
131
|
+
uint8_t terminator;
|
132
132
|
|
133
133
|
// This is the character set that should be used to delimit the
|
134
134
|
// tokens within the regular expression.
|
135
|
-
|
135
|
+
uint8_t breakpoints[6];
|
136
136
|
} regexp;
|
137
137
|
|
138
138
|
struct {
|
@@ -149,21 +149,21 @@ typedef struct yp_lex_mode {
|
|
149
149
|
|
150
150
|
// When lexing a string, it takes into account balancing the
|
151
151
|
// terminator if the terminator is one of (), [], {}, or <>.
|
152
|
-
|
152
|
+
uint8_t incrementor;
|
153
153
|
|
154
154
|
// This is the terminator of the string. It is typically either a
|
155
155
|
// single or double quote.
|
156
|
-
|
156
|
+
uint8_t terminator;
|
157
157
|
|
158
158
|
// This is the character set that should be used to delimit the
|
159
159
|
// tokens within the string.
|
160
|
-
|
160
|
+
uint8_t breakpoints[6];
|
161
161
|
} string;
|
162
162
|
|
163
163
|
struct {
|
164
164
|
// These pointers point to the beginning and end of the heredoc
|
165
165
|
// identifier.
|
166
|
-
const
|
166
|
+
const uint8_t *ident_start;
|
167
167
|
size_t ident_length;
|
168
168
|
|
169
169
|
yp_heredoc_quote_t quote;
|
@@ -171,7 +171,7 @@ typedef struct yp_lex_mode {
|
|
171
171
|
|
172
172
|
// This is the pointer to the character where lexing should resume
|
173
173
|
// once the heredoc has been completely processed.
|
174
|
-
const
|
174
|
+
const uint8_t *next_start;
|
175
175
|
} heredoc;
|
176
176
|
} as;
|
177
177
|
|
@@ -239,8 +239,8 @@ typedef enum {
|
|
239
239
|
// This is a node in the linked list of comments that we've found while parsing.
|
240
240
|
typedef struct yp_comment {
|
241
241
|
yp_list_node_t node;
|
242
|
-
const
|
243
|
-
const
|
242
|
+
const uint8_t *start;
|
243
|
+
const uint8_t *end;
|
244
244
|
yp_comment_type_t type;
|
245
245
|
} yp_comment_t;
|
246
246
|
|
@@ -252,7 +252,7 @@ typedef void (*yp_encoding_changed_callback_t)(yp_parser_t *parser);
|
|
252
252
|
// the ability here to call out to a user-defined function to get an encoding
|
253
253
|
// struct. If the function returns something that isn't NULL, we set that to
|
254
254
|
// our encoding and use it to parse identifiers.
|
255
|
-
typedef yp_encoding_t *(*yp_encoding_decode_callback_t)(yp_parser_t *parser, const
|
255
|
+
typedef yp_encoding_t *(*yp_encoding_decode_callback_t)(yp_parser_t *parser, const uint8_t *name, size_t width);
|
256
256
|
|
257
257
|
// When you are lexing through a file, the lexer needs all of the information
|
258
258
|
// that the parser additionally provides (for example, the local table). So if
|
@@ -316,21 +316,21 @@ struct yp_parser {
|
|
316
316
|
size_t index; // the current index into the lexer mode stack
|
317
317
|
} lex_modes;
|
318
318
|
|
319
|
-
const
|
320
|
-
const
|
319
|
+
const uint8_t *start; // the pointer to the start of the source
|
320
|
+
const uint8_t *end; // the pointer to the end of the source
|
321
321
|
yp_token_t previous; // the previous token we were considering
|
322
322
|
yp_token_t current; // the current token we're considering
|
323
323
|
|
324
324
|
// This is a special field set on the parser when we need the parser to jump
|
325
325
|
// to a specific location when lexing the next token, as opposed to just
|
326
326
|
// using the end of the previous token. Normally this is NULL.
|
327
|
-
const
|
327
|
+
const uint8_t *next_start;
|
328
328
|
|
329
329
|
// This field indicates the end of a heredoc whose identifier was found on
|
330
330
|
// the current line. If another heredoc is found on the same line, then this
|
331
331
|
// will be moved forward to the end of that heredoc. If no heredocs are
|
332
332
|
// found on a line then this is NULL.
|
333
|
-
const
|
333
|
+
const uint8_t *heredoc_end;
|
334
334
|
|
335
335
|
yp_list_t comment_list; // the list of comments that have been found while parsing
|
336
336
|
yp_list_t warning_list; // the list of warnings that have been found while parsing
|
@@ -361,7 +361,7 @@ struct yp_parser {
|
|
361
361
|
|
362
362
|
// This pointer indicates where a comment must start if it is to be
|
363
363
|
// considered an encoding comment.
|
364
|
-
const
|
364
|
+
const uint8_t *encoding_comment_start;
|
365
365
|
|
366
366
|
// This is an optional callback that can be attached to the parser that will
|
367
367
|
// be called whenever a new token is lexed by the parser.
|
data/include/yarp/regexp.h
CHANGED
@@ -14,6 +14,6 @@
|
|
14
14
|
|
15
15
|
// Parse a regular expression and extract the names of all of the named capture
|
16
16
|
// groups.
|
17
|
-
YP_EXPORTED_FUNCTION bool yp_regexp_named_capture_group_names(const
|
17
|
+
YP_EXPORTED_FUNCTION bool yp_regexp_named_capture_group_names(const uint8_t *source, size_t size, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding);
|
18
18
|
|
19
19
|
#endif
|
data/include/yarp/unescape.h
CHANGED
@@ -29,14 +29,16 @@ typedef enum {
|
|
29
29
|
YP_UNESCAPE_ALL
|
30
30
|
} yp_unescape_type_t;
|
31
31
|
|
32
|
-
// Unescape the contents of the given token into the given string using the
|
33
|
-
|
34
|
-
|
32
|
+
// Unescape the contents of the given token into the given string using the given unescape mode.
|
33
|
+
YP_EXPORTED_FUNCTION void yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type);
|
34
|
+
void yp_unescape_manipulate_char_literal(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type);
|
35
35
|
|
36
36
|
// Accepts a source string and a type of unescaping and returns the unescaped version.
|
37
37
|
// The caller must yp_string_free(result); after calling this function.
|
38
|
-
YP_EXPORTED_FUNCTION bool yp_unescape_string(const
|
38
|
+
YP_EXPORTED_FUNCTION bool yp_unescape_string(const uint8_t *start, size_t length, yp_unescape_type_t unescape_type, yp_string_t *result);
|
39
39
|
|
40
|
-
|
40
|
+
// Returns the number of bytes that encompass the first escape sequence in the
|
41
|
+
// given string.
|
42
|
+
size_t yp_unescape_calculate_difference(yp_parser_t *parser, const uint8_t *value, yp_unescape_type_t unescape_type, bool expect_single_codepoint);
|
41
43
|
|
42
44
|
#endif
|
@@ -36,6 +36,9 @@ void yp_buffer_append_zeroes(yp_buffer_t *buffer, size_t length);
|
|
36
36
|
// Append a string to the buffer.
|
37
37
|
void yp_buffer_append_str(yp_buffer_t *buffer, const char *value, size_t length);
|
38
38
|
|
39
|
+
// Append a list of bytes to the buffer.
|
40
|
+
void yp_buffer_append_bytes(yp_buffer_t *buffer, const uint8_t *value, size_t length);
|
41
|
+
|
39
42
|
// Append a single byte to the buffer.
|
40
43
|
void yp_buffer_append_u8(yp_buffer_t *buffer, uint8_t value);
|
41
44
|
|
data/include/yarp/util/yp_char.h
CHANGED
@@ -9,67 +9,67 @@
|
|
9
9
|
|
10
10
|
// Returns the number of characters at the start of the string that are
|
11
11
|
// whitespace. Disallows searching past the given maximum number of characters.
|
12
|
-
size_t yp_strspn_whitespace(const
|
12
|
+
size_t yp_strspn_whitespace(const uint8_t *string, ptrdiff_t length);
|
13
13
|
|
14
14
|
// Returns the number of characters at the start of the string that are
|
15
15
|
// whitespace while also tracking the location of each newline. Disallows
|
16
16
|
// searching past the given maximum number of characters.
|
17
17
|
size_t
|
18
|
-
yp_strspn_whitespace_newlines(const
|
18
|
+
yp_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, yp_newline_list_t *newline_list, bool stop_at_newline);
|
19
19
|
|
20
20
|
// Returns the number of characters at the start of the string that are inline
|
21
21
|
// whitespace. Disallows searching past the given maximum number of characters.
|
22
|
-
size_t yp_strspn_inline_whitespace(const
|
22
|
+
size_t yp_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length);
|
23
23
|
|
24
24
|
// Returns the number of characters at the start of the string that are decimal
|
25
25
|
// digits. Disallows searching past the given maximum number of characters.
|
26
|
-
size_t yp_strspn_decimal_digit(const
|
26
|
+
size_t yp_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length);
|
27
27
|
|
28
28
|
// Returns the number of characters at the start of the string that are
|
29
29
|
// hexadecimal digits. Disallows searching past the given maximum number of
|
30
30
|
// characters.
|
31
|
-
size_t yp_strspn_hexadecimal_digit(const
|
31
|
+
size_t yp_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length);
|
32
32
|
|
33
33
|
// Returns the number of characters at the start of the string that are octal
|
34
34
|
// digits or underscores. Disallows searching past the given maximum number of
|
35
35
|
// characters.
|
36
|
-
size_t yp_strspn_octal_number(const
|
36
|
+
size_t yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length);
|
37
37
|
|
38
38
|
// Returns the number of characters at the start of the string that are decimal
|
39
39
|
// digits or underscores. Disallows searching past the given maximum number of
|
40
40
|
// characters.
|
41
|
-
size_t yp_strspn_decimal_number(const
|
41
|
+
size_t yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length);
|
42
42
|
|
43
43
|
// Returns the number of characters at the start of the string that are
|
44
44
|
// hexadecimal digits or underscores. Disallows searching past the given maximum
|
45
45
|
// number of characters.
|
46
|
-
size_t yp_strspn_hexadecimal_number(const
|
46
|
+
size_t yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length);
|
47
47
|
|
48
48
|
// Returns the number of characters at the start of the string that are regexp
|
49
49
|
// options. Disallows searching past the given maximum number of characters.
|
50
|
-
size_t yp_strspn_regexp_option(const
|
50
|
+
size_t yp_strspn_regexp_option(const uint8_t *string, ptrdiff_t length);
|
51
51
|
|
52
52
|
// Returns the number of characters at the start of the string that are binary
|
53
53
|
// digits or underscores. Disallows searching past the given maximum number of
|
54
54
|
// characters.
|
55
|
-
size_t yp_strspn_binary_number(const
|
55
|
+
size_t yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length);
|
56
56
|
|
57
57
|
// Returns true if the given character is a whitespace character.
|
58
|
-
bool yp_char_is_whitespace(const
|
58
|
+
bool yp_char_is_whitespace(const uint8_t b);
|
59
59
|
|
60
60
|
// Returns true if the given character is an inline whitespace character.
|
61
|
-
bool yp_char_is_inline_whitespace(const
|
61
|
+
bool yp_char_is_inline_whitespace(const uint8_t b);
|
62
62
|
|
63
63
|
// Returns true if the given character is a binary digit.
|
64
|
-
bool yp_char_is_binary_digit(const
|
64
|
+
bool yp_char_is_binary_digit(const uint8_t b);
|
65
65
|
|
66
66
|
// Returns true if the given character is an octal digit.
|
67
|
-
bool yp_char_is_octal_digit(const
|
67
|
+
bool yp_char_is_octal_digit(const uint8_t b);
|
68
68
|
|
69
69
|
// Returns true if the given character is a decimal digit.
|
70
|
-
bool yp_char_is_decimal_digit(const
|
70
|
+
bool yp_char_is_decimal_digit(const uint8_t b);
|
71
71
|
|
72
72
|
// Returns true if the given character is a hexadecimal digit.
|
73
|
-
bool yp_char_is_hexadecimal_digit(const
|
73
|
+
bool yp_char_is_hexadecimal_digit(const uint8_t b);
|
74
74
|
|
75
75
|
#endif
|
@@ -40,7 +40,7 @@ void yp_constant_id_list_free(yp_constant_id_list_t *list);
|
|
40
40
|
|
41
41
|
typedef struct {
|
42
42
|
yp_constant_id_t id;
|
43
|
-
const
|
43
|
+
const uint8_t *start;
|
44
44
|
size_t length;
|
45
45
|
size_t hash;
|
46
46
|
} yp_constant_t;
|
@@ -59,7 +59,7 @@ bool yp_constant_pool_init(yp_constant_pool_t *pool, size_t capacity);
|
|
59
59
|
|
60
60
|
// Insert a constant into a constant pool. Returns the id of the constant, or 0
|
61
61
|
// if any potential calls to resize fail.
|
62
|
-
yp_constant_id_t yp_constant_pool_insert(yp_constant_pool_t *pool, const
|
62
|
+
yp_constant_id_t yp_constant_pool_insert(yp_constant_pool_t *pool, const uint8_t *start, size_t length);
|
63
63
|
|
64
64
|
// Free the memory associated with a constant pool.
|
65
65
|
void yp_constant_pool_free(yp_constant_pool_t *pool);
|
@@ -19,7 +19,7 @@
|
|
19
19
|
// A list of offsets of newlines in a string. The offsets are assumed to be
|
20
20
|
// sorted/inserted in ascending order.
|
21
21
|
typedef struct {
|
22
|
-
const
|
22
|
+
const uint8_t *start;
|
23
23
|
|
24
24
|
size_t *offsets;
|
25
25
|
size_t size;
|
@@ -41,16 +41,19 @@ typedef struct {
|
|
41
41
|
|
42
42
|
// Initialize a new newline list with the given capacity. Returns true if the
|
43
43
|
// allocation of the offsets succeeds, otherwise returns false.
|
44
|
-
bool yp_newline_list_init(yp_newline_list_t *list, const
|
44
|
+
bool yp_newline_list_init(yp_newline_list_t *list, const uint8_t *start, size_t capacity);
|
45
45
|
|
46
46
|
// Append a new offset to the newline list. Returns true if the reallocation of
|
47
47
|
// the offsets succeeds (if one was necessary), otherwise returns false.
|
48
|
-
bool yp_newline_list_append(yp_newline_list_t *list, const
|
48
|
+
bool yp_newline_list_append(yp_newline_list_t *list, const uint8_t *cursor);
|
49
|
+
|
50
|
+
// Conditionally append a new offset to the newline list, if the value passed in is a newline.
|
51
|
+
bool yp_newline_list_check_append(yp_newline_list_t *list, const uint8_t *cursor);
|
49
52
|
|
50
53
|
// Returns the line and column of the given offset. If the offset is not in the
|
51
54
|
// list, the line and column of the closest offset less than the given offset
|
52
55
|
// are returned.
|
53
|
-
yp_line_column_t yp_newline_list_line_column(yp_newline_list_t *list, const
|
56
|
+
yp_line_column_t yp_newline_list_line_column(yp_newline_list_t *list, const uint8_t *cursor);
|
54
57
|
|
55
58
|
// Free the internal memory allocated for the newline list.
|
56
59
|
void yp_newline_list_free(yp_newline_list_t *list);
|
@@ -12,17 +12,17 @@
|
|
12
12
|
// This struct represents a string value.
|
13
13
|
typedef struct {
|
14
14
|
enum { YP_STRING_SHARED, YP_STRING_OWNED, YP_STRING_CONSTANT, YP_STRING_MAPPED } type;
|
15
|
-
|
15
|
+
const uint8_t *source;
|
16
16
|
size_t length;
|
17
17
|
} yp_string_t;
|
18
18
|
|
19
19
|
#define YP_EMPTY_STRING ((yp_string_t) { .type = YP_STRING_CONSTANT, .source = NULL, .length = 0 })
|
20
20
|
|
21
21
|
// Initialize a shared string that is based on initial input.
|
22
|
-
void yp_string_shared_init(yp_string_t *string, const
|
22
|
+
void yp_string_shared_init(yp_string_t *string, const uint8_t *start, const uint8_t *end);
|
23
23
|
|
24
24
|
// Initialize an owned string that is responsible for freeing allocated memory.
|
25
|
-
void yp_string_owned_init(yp_string_t *string,
|
25
|
+
void yp_string_owned_init(yp_string_t *string, uint8_t *source, size_t length);
|
26
26
|
|
27
27
|
// Initialize a constant string that doesn't own its memory source.
|
28
28
|
void yp_string_constant_init(yp_string_t *string, const char *source, size_t length);
|
@@ -49,7 +49,7 @@ void yp_string_ensure_owned(yp_string_t *string);
|
|
49
49
|
YP_EXPORTED_FUNCTION size_t yp_string_length(const yp_string_t *string);
|
50
50
|
|
51
51
|
// Returns the start pointer associated with the string.
|
52
|
-
YP_EXPORTED_FUNCTION const
|
52
|
+
YP_EXPORTED_FUNCTION const uint8_t * yp_string_source(const yp_string_t *string);
|
53
53
|
|
54
54
|
// Free the associated memory of the given string.
|
55
55
|
YP_EXPORTED_FUNCTION void yp_string_free(yp_string_t *string);
|
@@ -13,9 +13,6 @@ typedef struct {
|
|
13
13
|
size_t capacity;
|
14
14
|
} yp_string_list_t;
|
15
15
|
|
16
|
-
// Allocate a new yp_string_list_t.
|
17
|
-
yp_string_list_t * yp_string_list_alloc(void);
|
18
|
-
|
19
16
|
// Initialize a yp_string_list_t with its default values.
|
20
17
|
YP_EXPORTED_FUNCTION void yp_string_list_init(yp_string_list_t *string_list);
|
21
18
|
|
@@ -24,6 +24,6 @@
|
|
24
24
|
// characters that are trailing bytes of multi-byte characters. For example, in
|
25
25
|
// Shift-JIS, the backslash character can be a trailing byte. In that case we
|
26
26
|
// need to take a slower path and iterate one multi-byte character at a time.
|
27
|
-
const
|
27
|
+
const uint8_t * yp_strpbrk(yp_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length);
|
28
28
|
|
29
29
|
#endif
|
data/include/yarp/version.h
CHANGED
data/include/yarp.h
CHANGED
@@ -13,8 +13,10 @@
|
|
13
13
|
#include "yarp/util/yp_char.h"
|
14
14
|
#include "yarp/util/yp_memchr.h"
|
15
15
|
#include "yarp/util/yp_strpbrk.h"
|
16
|
+
#include "yarp/version.h"
|
16
17
|
|
17
18
|
#include <assert.h>
|
19
|
+
#include <errno.h>
|
18
20
|
#include <stdarg.h>
|
19
21
|
#include <stdbool.h>
|
20
22
|
#include <stdint.h>
|
@@ -30,11 +32,16 @@ void yp_serialize_content(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buf
|
|
30
32
|
|
31
33
|
void yp_print_node(yp_parser_t *parser, yp_node_t *node);
|
32
34
|
|
35
|
+
void yp_parser_metadata(yp_parser_t *parser, const char *metadata);
|
36
|
+
|
37
|
+
// Generate a scope node from the given node.
|
38
|
+
void yp_scope_node_init(yp_node_t *node, yp_scope_node_t *dest);
|
39
|
+
|
33
40
|
// The YARP version and the serialization format.
|
34
41
|
YP_EXPORTED_FUNCTION const char * yp_version(void);
|
35
42
|
|
36
43
|
// Initialize a parser with the given start and end pointers.
|
37
|
-
YP_EXPORTED_FUNCTION void yp_parser_init(yp_parser_t *parser, const
|
44
|
+
YP_EXPORTED_FUNCTION void yp_parser_init(yp_parser_t *parser, const uint8_t *source, size_t size, const char *filepath);
|
38
45
|
|
39
46
|
// Register a callback that will be called whenever YARP changes the encoding it
|
40
47
|
// is using to parse based on the magic comment.
|
@@ -60,10 +67,14 @@ YP_EXPORTED_FUNCTION void yp_prettyprint(yp_parser_t *parser, yp_node_t *node, y
|
|
60
67
|
YP_EXPORTED_FUNCTION void yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);
|
61
68
|
|
62
69
|
// Parse the given source to the AST and serialize the AST to the given buffer.
|
63
|
-
YP_EXPORTED_FUNCTION void yp_parse_serialize(const
|
70
|
+
YP_EXPORTED_FUNCTION void yp_parse_serialize(const uint8_t *source, size_t size, yp_buffer_t *buffer, const char *metadata);
|
64
71
|
|
65
72
|
// Lex the given source and serialize to the given buffer.
|
66
|
-
YP_EXPORTED_FUNCTION void yp_lex_serialize(const
|
73
|
+
YP_EXPORTED_FUNCTION void yp_lex_serialize(const uint8_t *source, size_t size, const char *filepath, yp_buffer_t *buffer);
|
74
|
+
|
75
|
+
// Parse and serialize both the AST and the tokens represented by the given
|
76
|
+
// source to the given buffer.
|
77
|
+
YP_EXPORTED_FUNCTION void yp_parse_lex_serialize(const uint8_t *source, size_t size, yp_buffer_t *buffer, const char *metadata);
|
67
78
|
|
68
79
|
// Returns a string representation of the given token type.
|
69
80
|
YP_EXPORTED_FUNCTION const char * yp_token_type_to_str(yp_token_type_t token_type);
|