yarp 0.9.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -1
- data/Makefile +5 -1
- data/config.yml +156 -125
- data/docs/encoding.md +5 -5
- data/docs/serialization.md +2 -2
- data/ext/yarp/api_node.c +142 -98
- data/ext/yarp/extension.c +21 -7
- data/ext/yarp/extension.h +1 -1
- data/include/yarp/ast.h +327 -18
- data/include/yarp/defines.h +2 -1
- data/include/yarp/diagnostic.h +3 -3
- data/include/yarp/enc/yp_encoding.h +10 -10
- data/include/yarp/parser.h +19 -19
- data/include/yarp/regexp.h +1 -1
- data/include/yarp/unescape.h +4 -4
- data/include/yarp/util/yp_buffer.h +3 -0
- data/include/yarp/util/yp_char.h +16 -16
- data/include/yarp/util/yp_constant_pool.h +2 -2
- data/include/yarp/util/yp_newline_list.h +5 -5
- data/include/yarp/util/yp_string.h +4 -4
- data/include/yarp/util/yp_string_list.h +0 -3
- data/include/yarp/util/yp_strpbrk.h +1 -1
- data/include/yarp/version.h +2 -2
- data/include/yarp.h +5 -4
- data/lib/yarp/desugar_visitor.rb +59 -122
- data/lib/yarp/node.rb +230 -240
- data/lib/yarp/serialize.rb +16 -16
- data/lib/yarp.rb +5 -5
- data/src/diagnostic.c +1 -1
- data/src/enc/yp_big5.c +15 -42
- data/src/enc/yp_euc_jp.c +16 -43
- data/src/enc/yp_gbk.c +19 -46
- data/src/enc/yp_shift_jis.c +16 -43
- data/src/enc/yp_tables.c +36 -38
- data/src/enc/yp_unicode.c +20 -25
- data/src/enc/yp_windows_31j.c +16 -43
- data/src/node.c +1271 -899
- data/src/prettyprint.c +87 -48
- data/src/regexp.c +21 -21
- data/src/serialize.c +28 -15
- data/src/unescape.c +151 -121
- data/src/util/yp_buffer.c +7 -2
- data/src/util/yp_char.c +34 -34
- data/src/util/yp_constant_pool.c +4 -4
- data/src/util/yp_memchr.c +1 -1
- data/src/util/yp_newline_list.c +5 -4
- data/src/util/yp_string.c +22 -20
- data/src/util/yp_string_list.c +0 -6
- data/src/util/yp_strncasecmp.c +3 -6
- data/src/util/yp_strpbrk.c +8 -8
- data/src/yarp.c +355 -216
- data/yarp.gemspec +1 -1
- metadata +2 -2
data/include/yarp/defines.h
CHANGED
@@ -6,6 +6,7 @@
|
|
6
6
|
#include <ctype.h>
|
7
7
|
#include <stdarg.h>
|
8
8
|
#include <stddef.h>
|
9
|
+
#include <stdint.h>
|
9
10
|
#include <stdio.h>
|
10
11
|
#include <string.h>
|
11
12
|
|
@@ -39,6 +40,6 @@
|
|
39
40
|
# define snprintf _snprintf
|
40
41
|
#endif
|
41
42
|
|
42
|
-
int yp_strncasecmp(const
|
43
|
+
int yp_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length);
|
43
44
|
|
44
45
|
#endif
|
data/include/yarp/diagnostic.h
CHANGED
@@ -10,13 +10,13 @@
|
|
10
10
|
// This struct represents a diagnostic found during parsing.
|
11
11
|
typedef struct {
|
12
12
|
yp_list_node_t node;
|
13
|
-
const
|
14
|
-
const
|
13
|
+
const uint8_t *start;
|
14
|
+
const uint8_t *end;
|
15
15
|
const char *message;
|
16
16
|
} yp_diagnostic_t;
|
17
17
|
|
18
18
|
// Append a diagnostic to the given list of diagnostics.
|
19
|
-
bool yp_diagnostic_list_append(yp_list_t *list, const
|
19
|
+
bool yp_diagnostic_list_append(yp_list_t *list, const uint8_t *start, const uint8_t *end, const char *message);
|
20
20
|
|
21
21
|
// Deallocate the internal state of the given diagnostic list.
|
22
22
|
void yp_diagnostic_list_free(yp_list_t *list);
|
@@ -16,22 +16,22 @@ typedef struct {
|
|
16
16
|
// Return the number of bytes that the next character takes if it is valid
|
17
17
|
// in the encoding. Does not read more than n bytes. It is assumed that n is
|
18
18
|
// at least 1.
|
19
|
-
size_t (*char_width)(const
|
19
|
+
size_t (*char_width)(const uint8_t *b, ptrdiff_t n);
|
20
20
|
|
21
21
|
// Return the number of bytes that the next character takes if it is valid
|
22
22
|
// in the encoding and is alphabetical. Does not read more than n bytes. It
|
23
23
|
// is assumed that n is at least 1.
|
24
|
-
size_t (*alpha_char)(const
|
24
|
+
size_t (*alpha_char)(const uint8_t *b, ptrdiff_t n);
|
25
25
|
|
26
26
|
// Return the number of bytes that the next character takes if it is valid
|
27
27
|
// in the encoding and is alphanumeric. Does not read more than n bytes. It
|
28
28
|
// is assumed that n is at least 1.
|
29
|
-
size_t (*alnum_char)(const
|
29
|
+
size_t (*alnum_char)(const uint8_t *b, ptrdiff_t n);
|
30
30
|
|
31
31
|
// Return true if the next character is valid in the encoding and is an
|
32
32
|
// uppercase character. Does not read more than n bytes. It is assumed that
|
33
33
|
// n is at least 1.
|
34
|
-
bool (*isupper_char)(const
|
34
|
+
bool (*isupper_char)(const uint8_t *b, ptrdiff_t n);
|
35
35
|
|
36
36
|
// The name of the encoding. This should correspond to a value that can be
|
37
37
|
// passed to Encoding.find in Ruby.
|
@@ -49,18 +49,18 @@ typedef struct {
|
|
49
49
|
|
50
50
|
// These functions are reused by some other encodings, so they are defined here
|
51
51
|
// so they can be shared.
|
52
|
-
size_t yp_encoding_ascii_alpha_char(const
|
53
|
-
size_t yp_encoding_ascii_alnum_char(const
|
54
|
-
bool yp_encoding_ascii_isupper_char(const
|
52
|
+
size_t yp_encoding_ascii_alpha_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n);
|
53
|
+
size_t yp_encoding_ascii_alnum_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n);
|
54
|
+
bool yp_encoding_ascii_isupper_char(const uint8_t *b, YP_ATTRIBUTE_UNUSED ptrdiff_t n);
|
55
55
|
|
56
56
|
// These functions are shared between the actual encoding and the fast path in
|
57
57
|
// the parser so they need to be internally visible.
|
58
|
-
size_t yp_encoding_utf_8_alpha_char(const
|
59
|
-
size_t yp_encoding_utf_8_alnum_char(const
|
58
|
+
size_t yp_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n);
|
59
|
+
size_t yp_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n);
|
60
60
|
|
61
61
|
// This lookup table is referenced in both the UTF-8 encoding file and the
|
62
62
|
// parser directly in order to speed up the default encoding processing.
|
63
|
-
extern
|
63
|
+
extern uint8_t yp_encoding_unicode_table[256];
|
64
64
|
|
65
65
|
// These are the encodings that are supported by the parser. They are defined in
|
66
66
|
// their own files in the src/enc directory.
|
data/include/yarp/parser.h
CHANGED
@@ -109,14 +109,14 @@ typedef struct yp_lex_mode {
|
|
109
109
|
|
110
110
|
// When lexing a list, it takes into account balancing the
|
111
111
|
// terminator if the terminator is one of (), [], {}, or <>.
|
112
|
-
|
112
|
+
uint8_t incrementor;
|
113
113
|
|
114
114
|
// This is the terminator of the list literal.
|
115
|
-
|
115
|
+
uint8_t terminator;
|
116
116
|
|
117
117
|
// This is the character set that should be used to delimit the
|
118
118
|
// tokens within the list.
|
119
|
-
|
119
|
+
uint8_t breakpoints[11];
|
120
120
|
} list;
|
121
121
|
|
122
122
|
struct {
|
@@ -125,14 +125,14 @@ typedef struct yp_lex_mode {
|
|
125
125
|
|
126
126
|
// When lexing a regular expression, it takes into account balancing
|
127
127
|
// the terminator if the terminator is one of (), [], {}, or <>.
|
128
|
-
|
128
|
+
uint8_t incrementor;
|
129
129
|
|
130
130
|
// This is the terminator of the regular expression.
|
131
|
-
|
131
|
+
uint8_t terminator;
|
132
132
|
|
133
133
|
// This is the character set that should be used to delimit the
|
134
134
|
// tokens within the regular expression.
|
135
|
-
|
135
|
+
uint8_t breakpoints[6];
|
136
136
|
} regexp;
|
137
137
|
|
138
138
|
struct {
|
@@ -149,21 +149,21 @@ typedef struct yp_lex_mode {
|
|
149
149
|
|
150
150
|
// When lexing a string, it takes into account balancing the
|
151
151
|
// terminator if the terminator is one of (), [], {}, or <>.
|
152
|
-
|
152
|
+
uint8_t incrementor;
|
153
153
|
|
154
154
|
// This is the terminator of the string. It is typically either a
|
155
155
|
// single or double quote.
|
156
|
-
|
156
|
+
uint8_t terminator;
|
157
157
|
|
158
158
|
// This is the character set that should be used to delimit the
|
159
159
|
// tokens within the string.
|
160
|
-
|
160
|
+
uint8_t breakpoints[6];
|
161
161
|
} string;
|
162
162
|
|
163
163
|
struct {
|
164
164
|
// These pointers point to the beginning and end of the heredoc
|
165
165
|
// identifier.
|
166
|
-
const
|
166
|
+
const uint8_t *ident_start;
|
167
167
|
size_t ident_length;
|
168
168
|
|
169
169
|
yp_heredoc_quote_t quote;
|
@@ -171,7 +171,7 @@ typedef struct yp_lex_mode {
|
|
171
171
|
|
172
172
|
// This is the pointer to the character where lexing should resume
|
173
173
|
// once the heredoc has been completely processed.
|
174
|
-
const
|
174
|
+
const uint8_t *next_start;
|
175
175
|
} heredoc;
|
176
176
|
} as;
|
177
177
|
|
@@ -239,8 +239,8 @@ typedef enum {
|
|
239
239
|
// This is a node in the linked list of comments that we've found while parsing.
|
240
240
|
typedef struct yp_comment {
|
241
241
|
yp_list_node_t node;
|
242
|
-
const
|
243
|
-
const
|
242
|
+
const uint8_t *start;
|
243
|
+
const uint8_t *end;
|
244
244
|
yp_comment_type_t type;
|
245
245
|
} yp_comment_t;
|
246
246
|
|
@@ -252,7 +252,7 @@ typedef void (*yp_encoding_changed_callback_t)(yp_parser_t *parser);
|
|
252
252
|
// the ability here to call out to a user-defined function to get an encoding
|
253
253
|
// struct. If the function returns something that isn't NULL, we set that to
|
254
254
|
// our encoding and use it to parse identifiers.
|
255
|
-
typedef yp_encoding_t *(*yp_encoding_decode_callback_t)(yp_parser_t *parser, const
|
255
|
+
typedef yp_encoding_t *(*yp_encoding_decode_callback_t)(yp_parser_t *parser, const uint8_t *name, size_t width);
|
256
256
|
|
257
257
|
// When you are lexing through a file, the lexer needs all of the information
|
258
258
|
// that the parser additionally provides (for example, the local table). So if
|
@@ -316,21 +316,21 @@ struct yp_parser {
|
|
316
316
|
size_t index; // the current index into the lexer mode stack
|
317
317
|
} lex_modes;
|
318
318
|
|
319
|
-
const
|
320
|
-
const
|
319
|
+
const uint8_t *start; // the pointer to the start of the source
|
320
|
+
const uint8_t *end; // the pointer to the end of the source
|
321
321
|
yp_token_t previous; // the previous token we were considering
|
322
322
|
yp_token_t current; // the current token we're considering
|
323
323
|
|
324
324
|
// This is a special field set on the parser when we need the parser to jump
|
325
325
|
// to a specific location when lexing the next token, as opposed to just
|
326
326
|
// using the end of the previous token. Normally this is NULL.
|
327
|
-
const
|
327
|
+
const uint8_t *next_start;
|
328
328
|
|
329
329
|
// This field indicates the end of a heredoc whose identifier was found on
|
330
330
|
// the current line. If another heredoc is found on the same line, then this
|
331
331
|
// will be moved forward to the end of that heredoc. If no heredocs are
|
332
332
|
// found on a line then this is NULL.
|
333
|
-
const
|
333
|
+
const uint8_t *heredoc_end;
|
334
334
|
|
335
335
|
yp_list_t comment_list; // the list of comments that have been found while parsing
|
336
336
|
yp_list_t warning_list; // the list of warnings that have been found while parsing
|
@@ -361,7 +361,7 @@ struct yp_parser {
|
|
361
361
|
|
362
362
|
// This pointer indicates where a comment must start if it is to be
|
363
363
|
// considered an encoding comment.
|
364
|
-
const
|
364
|
+
const uint8_t *encoding_comment_start;
|
365
365
|
|
366
366
|
// This is an optional callback that can be attached to the parser that will
|
367
367
|
// be called whenever a new token is lexed by the parser.
|
data/include/yarp/regexp.h
CHANGED
@@ -14,6 +14,6 @@
|
|
14
14
|
|
15
15
|
// Parse a regular expression and extract the names of all of the named capture
|
16
16
|
// groups.
|
17
|
-
YP_EXPORTED_FUNCTION bool yp_regexp_named_capture_group_names(const
|
17
|
+
YP_EXPORTED_FUNCTION bool yp_regexp_named_capture_group_names(const uint8_t *source, size_t size, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding);
|
18
18
|
|
19
19
|
#endif
|
data/include/yarp/unescape.h
CHANGED
@@ -29,16 +29,16 @@ typedef enum {
|
|
29
29
|
YP_UNESCAPE_ALL
|
30
30
|
} yp_unescape_type_t;
|
31
31
|
|
32
|
-
// Unescape the contents of the given token into the given string using the
|
33
|
-
// given unescape mode.
|
32
|
+
// Unescape the contents of the given token into the given string using the given unescape mode.
|
34
33
|
YP_EXPORTED_FUNCTION void yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type);
|
34
|
+
void yp_unescape_manipulate_char_literal(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type);
|
35
35
|
|
36
36
|
// Accepts a source string and a type of unescaping and returns the unescaped version.
|
37
37
|
// The caller must yp_string_free(result); after calling this function.
|
38
|
-
YP_EXPORTED_FUNCTION bool yp_unescape_string(const
|
38
|
+
YP_EXPORTED_FUNCTION bool yp_unescape_string(const uint8_t *start, size_t length, yp_unescape_type_t unescape_type, yp_string_t *result);
|
39
39
|
|
40
40
|
// Returns the number of bytes that encompass the first escape sequence in the
|
41
41
|
// given string.
|
42
|
-
size_t yp_unescape_calculate_difference(yp_parser_t *parser, const
|
42
|
+
size_t yp_unescape_calculate_difference(yp_parser_t *parser, const uint8_t *value, yp_unescape_type_t unescape_type, bool expect_single_codepoint);
|
43
43
|
|
44
44
|
#endif
|
@@ -36,6 +36,9 @@ void yp_buffer_append_zeroes(yp_buffer_t *buffer, size_t length);
|
|
36
36
|
// Append a string to the buffer.
|
37
37
|
void yp_buffer_append_str(yp_buffer_t *buffer, const char *value, size_t length);
|
38
38
|
|
39
|
+
// Append a list of bytes to the buffer.
|
40
|
+
void yp_buffer_append_bytes(yp_buffer_t *buffer, const uint8_t *value, size_t length);
|
41
|
+
|
39
42
|
// Append a single byte to the buffer.
|
40
43
|
void yp_buffer_append_u8(yp_buffer_t *buffer, uint8_t value);
|
41
44
|
|
data/include/yarp/util/yp_char.h
CHANGED
@@ -9,67 +9,67 @@
|
|
9
9
|
|
10
10
|
// Returns the number of characters at the start of the string that are
|
11
11
|
// whitespace. Disallows searching past the given maximum number of characters.
|
12
|
-
size_t yp_strspn_whitespace(const
|
12
|
+
size_t yp_strspn_whitespace(const uint8_t *string, ptrdiff_t length);
|
13
13
|
|
14
14
|
// Returns the number of characters at the start of the string that are
|
15
15
|
// whitespace while also tracking the location of each newline. Disallows
|
16
16
|
// searching past the given maximum number of characters.
|
17
17
|
size_t
|
18
|
-
yp_strspn_whitespace_newlines(const
|
18
|
+
yp_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, yp_newline_list_t *newline_list, bool stop_at_newline);
|
19
19
|
|
20
20
|
// Returns the number of characters at the start of the string that are inline
|
21
21
|
// whitespace. Disallows searching past the given maximum number of characters.
|
22
|
-
size_t yp_strspn_inline_whitespace(const
|
22
|
+
size_t yp_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length);
|
23
23
|
|
24
24
|
// Returns the number of characters at the start of the string that are decimal
|
25
25
|
// digits. Disallows searching past the given maximum number of characters.
|
26
|
-
size_t yp_strspn_decimal_digit(const
|
26
|
+
size_t yp_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length);
|
27
27
|
|
28
28
|
// Returns the number of characters at the start of the string that are
|
29
29
|
// hexadecimal digits. Disallows searching past the given maximum number of
|
30
30
|
// characters.
|
31
|
-
size_t yp_strspn_hexadecimal_digit(const
|
31
|
+
size_t yp_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length);
|
32
32
|
|
33
33
|
// Returns the number of characters at the start of the string that are octal
|
34
34
|
// digits or underscores. Disallows searching past the given maximum number of
|
35
35
|
// characters.
|
36
|
-
size_t yp_strspn_octal_number(const
|
36
|
+
size_t yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length);
|
37
37
|
|
38
38
|
// Returns the number of characters at the start of the string that are decimal
|
39
39
|
// digits or underscores. Disallows searching past the given maximum number of
|
40
40
|
// characters.
|
41
|
-
size_t yp_strspn_decimal_number(const
|
41
|
+
size_t yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length);
|
42
42
|
|
43
43
|
// Returns the number of characters at the start of the string that are
|
44
44
|
// hexadecimal digits or underscores. Disallows searching past the given maximum
|
45
45
|
// number of characters.
|
46
|
-
size_t yp_strspn_hexadecimal_number(const
|
46
|
+
size_t yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length);
|
47
47
|
|
48
48
|
// Returns the number of characters at the start of the string that are regexp
|
49
49
|
// options. Disallows searching past the given maximum number of characters.
|
50
|
-
size_t yp_strspn_regexp_option(const
|
50
|
+
size_t yp_strspn_regexp_option(const uint8_t *string, ptrdiff_t length);
|
51
51
|
|
52
52
|
// Returns the number of characters at the start of the string that are binary
|
53
53
|
// digits or underscores. Disallows searching past the given maximum number of
|
54
54
|
// characters.
|
55
|
-
size_t yp_strspn_binary_number(const
|
55
|
+
size_t yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length);
|
56
56
|
|
57
57
|
// Returns true if the given character is a whitespace character.
|
58
|
-
bool yp_char_is_whitespace(const
|
58
|
+
bool yp_char_is_whitespace(const uint8_t b);
|
59
59
|
|
60
60
|
// Returns true if the given character is an inline whitespace character.
|
61
|
-
bool yp_char_is_inline_whitespace(const
|
61
|
+
bool yp_char_is_inline_whitespace(const uint8_t b);
|
62
62
|
|
63
63
|
// Returns true if the given character is a binary digit.
|
64
|
-
bool yp_char_is_binary_digit(const
|
64
|
+
bool yp_char_is_binary_digit(const uint8_t b);
|
65
65
|
|
66
66
|
// Returns true if the given character is an octal digit.
|
67
|
-
bool yp_char_is_octal_digit(const
|
67
|
+
bool yp_char_is_octal_digit(const uint8_t b);
|
68
68
|
|
69
69
|
// Returns true if the given character is a decimal digit.
|
70
|
-
bool yp_char_is_decimal_digit(const
|
70
|
+
bool yp_char_is_decimal_digit(const uint8_t b);
|
71
71
|
|
72
72
|
// Returns true if the given character is a hexadecimal digit.
|
73
|
-
bool yp_char_is_hexadecimal_digit(const
|
73
|
+
bool yp_char_is_hexadecimal_digit(const uint8_t b);
|
74
74
|
|
75
75
|
#endif
|
@@ -40,7 +40,7 @@ void yp_constant_id_list_free(yp_constant_id_list_t *list);
|
|
40
40
|
|
41
41
|
typedef struct {
|
42
42
|
yp_constant_id_t id;
|
43
|
-
const
|
43
|
+
const uint8_t *start;
|
44
44
|
size_t length;
|
45
45
|
size_t hash;
|
46
46
|
} yp_constant_t;
|
@@ -59,7 +59,7 @@ bool yp_constant_pool_init(yp_constant_pool_t *pool, size_t capacity);
|
|
59
59
|
|
60
60
|
// Insert a constant into a constant pool. Returns the id of the constant, or 0
|
61
61
|
// if any potential calls to resize fail.
|
62
|
-
yp_constant_id_t yp_constant_pool_insert(yp_constant_pool_t *pool, const
|
62
|
+
yp_constant_id_t yp_constant_pool_insert(yp_constant_pool_t *pool, const uint8_t *start, size_t length);
|
63
63
|
|
64
64
|
// Free the memory associated with a constant pool.
|
65
65
|
void yp_constant_pool_free(yp_constant_pool_t *pool);
|
@@ -19,7 +19,7 @@
|
|
19
19
|
// A list of offsets of newlines in a string. The offsets are assumed to be
|
20
20
|
// sorted/inserted in ascending order.
|
21
21
|
typedef struct {
|
22
|
-
const
|
22
|
+
const uint8_t *start;
|
23
23
|
|
24
24
|
size_t *offsets;
|
25
25
|
size_t size;
|
@@ -41,19 +41,19 @@ typedef struct {
|
|
41
41
|
|
42
42
|
// Initialize a new newline list with the given capacity. Returns true if the
|
43
43
|
// allocation of the offsets succeeds, otherwise returns false.
|
44
|
-
bool yp_newline_list_init(yp_newline_list_t *list, const
|
44
|
+
bool yp_newline_list_init(yp_newline_list_t *list, const uint8_t *start, size_t capacity);
|
45
45
|
|
46
46
|
// Append a new offset to the newline list. Returns true if the reallocation of
|
47
47
|
// the offsets succeeds (if one was necessary), otherwise returns false.
|
48
|
-
bool yp_newline_list_append(yp_newline_list_t *list, const
|
48
|
+
bool yp_newline_list_append(yp_newline_list_t *list, const uint8_t *cursor);
|
49
49
|
|
50
50
|
// Conditionally append a new offset to the newline list, if the value passed in is a newline.
|
51
|
-
bool yp_newline_list_check_append(yp_newline_list_t *list, const
|
51
|
+
bool yp_newline_list_check_append(yp_newline_list_t *list, const uint8_t *cursor);
|
52
52
|
|
53
53
|
// Returns the line and column of the given offset. If the offset is not in the
|
54
54
|
// list, the line and column of the closest offset less than the given offset
|
55
55
|
// are returned.
|
56
|
-
yp_line_column_t yp_newline_list_line_column(yp_newline_list_t *list, const
|
56
|
+
yp_line_column_t yp_newline_list_line_column(yp_newline_list_t *list, const uint8_t *cursor);
|
57
57
|
|
58
58
|
// Free the internal memory allocated for the newline list.
|
59
59
|
void yp_newline_list_free(yp_newline_list_t *list);
|
@@ -12,17 +12,17 @@
|
|
12
12
|
// This struct represents a string value.
|
13
13
|
typedef struct {
|
14
14
|
enum { YP_STRING_SHARED, YP_STRING_OWNED, YP_STRING_CONSTANT, YP_STRING_MAPPED } type;
|
15
|
-
|
15
|
+
const uint8_t *source;
|
16
16
|
size_t length;
|
17
17
|
} yp_string_t;
|
18
18
|
|
19
19
|
#define YP_EMPTY_STRING ((yp_string_t) { .type = YP_STRING_CONSTANT, .source = NULL, .length = 0 })
|
20
20
|
|
21
21
|
// Initialize a shared string that is based on initial input.
|
22
|
-
void yp_string_shared_init(yp_string_t *string, const
|
22
|
+
void yp_string_shared_init(yp_string_t *string, const uint8_t *start, const uint8_t *end);
|
23
23
|
|
24
24
|
// Initialize an owned string that is responsible for freeing allocated memory.
|
25
|
-
void yp_string_owned_init(yp_string_t *string,
|
25
|
+
void yp_string_owned_init(yp_string_t *string, uint8_t *source, size_t length);
|
26
26
|
|
27
27
|
// Initialize a constant string that doesn't own its memory source.
|
28
28
|
void yp_string_constant_init(yp_string_t *string, const char *source, size_t length);
|
@@ -49,7 +49,7 @@ void yp_string_ensure_owned(yp_string_t *string);
|
|
49
49
|
YP_EXPORTED_FUNCTION size_t yp_string_length(const yp_string_t *string);
|
50
50
|
|
51
51
|
// Returns the start pointer associated with the string.
|
52
|
-
YP_EXPORTED_FUNCTION const
|
52
|
+
YP_EXPORTED_FUNCTION const uint8_t * yp_string_source(const yp_string_t *string);
|
53
53
|
|
54
54
|
// Free the associated memory of the given string.
|
55
55
|
YP_EXPORTED_FUNCTION void yp_string_free(yp_string_t *string);
|
@@ -13,9 +13,6 @@ typedef struct {
|
|
13
13
|
size_t capacity;
|
14
14
|
} yp_string_list_t;
|
15
15
|
|
16
|
-
// Allocate a new yp_string_list_t.
|
17
|
-
yp_string_list_t * yp_string_list_alloc(void);
|
18
|
-
|
19
16
|
// Initialize a yp_string_list_t with its default values.
|
20
17
|
YP_EXPORTED_FUNCTION void yp_string_list_init(yp_string_list_t *string_list);
|
21
18
|
|
@@ -24,6 +24,6 @@
|
|
24
24
|
// characters that are trailing bytes of multi-byte characters. For example, in
|
25
25
|
// Shift-JIS, the backslash character can be a trailing byte. In that case we
|
26
26
|
// need to take a slower path and iterate one multi-byte character at a time.
|
27
|
-
const
|
27
|
+
const uint8_t * yp_strpbrk(yp_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length);
|
28
28
|
|
29
29
|
#endif
|
data/include/yarp/version.h
CHANGED
data/include/yarp.h
CHANGED
@@ -16,6 +16,7 @@
|
|
16
16
|
#include "yarp/version.h"
|
17
17
|
|
18
18
|
#include <assert.h>
|
19
|
+
#include <errno.h>
|
19
20
|
#include <stdarg.h>
|
20
21
|
#include <stdbool.h>
|
21
22
|
#include <stdint.h>
|
@@ -40,7 +41,7 @@ void yp_scope_node_init(yp_node_t *node, yp_scope_node_t *dest);
|
|
40
41
|
YP_EXPORTED_FUNCTION const char * yp_version(void);
|
41
42
|
|
42
43
|
// Initialize a parser with the given start and end pointers.
|
43
|
-
YP_EXPORTED_FUNCTION void yp_parser_init(yp_parser_t *parser, const
|
44
|
+
YP_EXPORTED_FUNCTION void yp_parser_init(yp_parser_t *parser, const uint8_t *source, size_t size, const char *filepath);
|
44
45
|
|
45
46
|
// Register a callback that will be called whenever YARP changes the encoding it
|
46
47
|
// is using to parse based on the magic comment.
|
@@ -66,14 +67,14 @@ YP_EXPORTED_FUNCTION void yp_prettyprint(yp_parser_t *parser, yp_node_t *node, y
|
|
66
67
|
YP_EXPORTED_FUNCTION void yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);
|
67
68
|
|
68
69
|
// Parse the given source to the AST and serialize the AST to the given buffer.
|
69
|
-
YP_EXPORTED_FUNCTION void yp_parse_serialize(const
|
70
|
+
YP_EXPORTED_FUNCTION void yp_parse_serialize(const uint8_t *source, size_t size, yp_buffer_t *buffer, const char *metadata);
|
70
71
|
|
71
72
|
// Lex the given source and serialize to the given buffer.
|
72
|
-
YP_EXPORTED_FUNCTION void yp_lex_serialize(const
|
73
|
+
YP_EXPORTED_FUNCTION void yp_lex_serialize(const uint8_t *source, size_t size, const char *filepath, yp_buffer_t *buffer);
|
73
74
|
|
74
75
|
// Parse and serialize both the AST and the tokens represented by the given
|
75
76
|
// source to the given buffer.
|
76
|
-
YP_EXPORTED_FUNCTION void yp_parse_lex_serialize(const
|
77
|
+
YP_EXPORTED_FUNCTION void yp_parse_lex_serialize(const uint8_t *source, size_t size, yp_buffer_t *buffer, const char *metadata);
|
77
78
|
|
78
79
|
// Returns a string representation of the given token type.
|
79
80
|
YP_EXPORTED_FUNCTION const char * yp_token_type_to_str(yp_token_type_t token_type);
|