yarp 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/CODE_OF_CONDUCT.md +76 -0
  3. data/CONTRIBUTING.md +51 -0
  4. data/LICENSE.md +7 -0
  5. data/Makefile.in +79 -0
  6. data/README.md +86 -0
  7. data/config.h.in +25 -0
  8. data/config.yml +2147 -0
  9. data/configure +4487 -0
  10. data/docs/build_system.md +85 -0
  11. data/docs/building.md +26 -0
  12. data/docs/configuration.md +56 -0
  13. data/docs/design.md +53 -0
  14. data/docs/encoding.md +116 -0
  15. data/docs/extension.md +20 -0
  16. data/docs/fuzzing.md +93 -0
  17. data/docs/heredocs.md +36 -0
  18. data/docs/mapping.md +117 -0
  19. data/docs/ripper.md +36 -0
  20. data/docs/serialization.md +130 -0
  21. data/docs/testing.md +55 -0
  22. data/ext/yarp/api_node.c +3680 -0
  23. data/ext/yarp/api_pack.c +256 -0
  24. data/ext/yarp/extconf.rb +131 -0
  25. data/ext/yarp/extension.c +547 -0
  26. data/ext/yarp/extension.h +18 -0
  27. data/include/yarp/ast.h +1412 -0
  28. data/include/yarp/defines.h +54 -0
  29. data/include/yarp/diagnostic.h +24 -0
  30. data/include/yarp/enc/yp_encoding.h +94 -0
  31. data/include/yarp/node.h +36 -0
  32. data/include/yarp/pack.h +141 -0
  33. data/include/yarp/parser.h +389 -0
  34. data/include/yarp/regexp.h +19 -0
  35. data/include/yarp/unescape.h +42 -0
  36. data/include/yarp/util/yp_buffer.h +39 -0
  37. data/include/yarp/util/yp_char.h +75 -0
  38. data/include/yarp/util/yp_constant_pool.h +64 -0
  39. data/include/yarp/util/yp_list.h +67 -0
  40. data/include/yarp/util/yp_memchr.h +14 -0
  41. data/include/yarp/util/yp_newline_list.h +54 -0
  42. data/include/yarp/util/yp_state_stack.h +24 -0
  43. data/include/yarp/util/yp_string.h +57 -0
  44. data/include/yarp/util/yp_string_list.h +28 -0
  45. data/include/yarp/util/yp_strpbrk.h +29 -0
  46. data/include/yarp/version.h +5 -0
  47. data/include/yarp.h +69 -0
  48. data/lib/yarp/lex_compat.rb +759 -0
  49. data/lib/yarp/node.rb +7428 -0
  50. data/lib/yarp/pack.rb +185 -0
  51. data/lib/yarp/ripper_compat.rb +174 -0
  52. data/lib/yarp/serialize.rb +389 -0
  53. data/lib/yarp.rb +330 -0
  54. data/src/diagnostic.c +25 -0
  55. data/src/enc/yp_big5.c +79 -0
  56. data/src/enc/yp_euc_jp.c +85 -0
  57. data/src/enc/yp_gbk.c +88 -0
  58. data/src/enc/yp_shift_jis.c +83 -0
  59. data/src/enc/yp_tables.c +509 -0
  60. data/src/enc/yp_unicode.c +2320 -0
  61. data/src/enc/yp_windows_31j.c +83 -0
  62. data/src/node.c +2011 -0
  63. data/src/pack.c +493 -0
  64. data/src/prettyprint.c +1782 -0
  65. data/src/regexp.c +580 -0
  66. data/src/serialize.c +1576 -0
  67. data/src/token_type.c +347 -0
  68. data/src/unescape.c +576 -0
  69. data/src/util/yp_buffer.c +78 -0
  70. data/src/util/yp_char.c +229 -0
  71. data/src/util/yp_constant_pool.c +147 -0
  72. data/src/util/yp_list.c +50 -0
  73. data/src/util/yp_memchr.c +31 -0
  74. data/src/util/yp_newline_list.c +119 -0
  75. data/src/util/yp_state_stack.c +25 -0
  76. data/src/util/yp_string.c +207 -0
  77. data/src/util/yp_string_list.c +32 -0
  78. data/src/util/yp_strncasecmp.c +20 -0
  79. data/src/util/yp_strpbrk.c +66 -0
  80. data/src/yarp.c +13211 -0
  81. data/yarp.gemspec +100 -0
  82. metadata +125 -0
@@ -0,0 +1,389 @@
1
+ #ifndef YARP_PARSER_H
2
+ #define YARP_PARSER_H
3
+
4
+ #include "yarp/ast.h"
5
+ #include "yarp/defines.h"
6
+ #include "yarp/enc/yp_encoding.h"
7
+ #include "yarp/util/yp_constant_pool.h"
8
+ #include "yarp/util/yp_list.h"
9
+ #include "yarp/util/yp_newline_list.h"
10
+ #include "yarp/util/yp_state_stack.h"
11
+
12
+ #include <stdbool.h>
13
+
14
+ // This enum provides various bits that represent different kinds of states that
15
+ // the lexer can track. This is used to determine which kind of token to return
16
+ // based on the context of the parser.
17
+ typedef enum {
18
+ YP_LEX_STATE_BIT_BEG,
19
+ YP_LEX_STATE_BIT_END,
20
+ YP_LEX_STATE_BIT_ENDARG,
21
+ YP_LEX_STATE_BIT_ENDFN,
22
+ YP_LEX_STATE_BIT_ARG,
23
+ YP_LEX_STATE_BIT_CMDARG,
24
+ YP_LEX_STATE_BIT_MID,
25
+ YP_LEX_STATE_BIT_FNAME,
26
+ YP_LEX_STATE_BIT_DOT,
27
+ YP_LEX_STATE_BIT_CLASS,
28
+ YP_LEX_STATE_BIT_LABEL,
29
+ YP_LEX_STATE_BIT_LABELED,
30
+ YP_LEX_STATE_BIT_FITEM
31
+ } yp_lex_state_bit_t;
32
+
33
+ // This enum combines the various bits from the above enum into individual
34
+ // values that represent the various states of the lexer.
35
+ typedef enum {
36
+ YP_LEX_STATE_NONE = 0,
37
+ YP_LEX_STATE_BEG = (1 << YP_LEX_STATE_BIT_BEG),
38
+ YP_LEX_STATE_END = (1 << YP_LEX_STATE_BIT_END),
39
+ YP_LEX_STATE_ENDARG = (1 << YP_LEX_STATE_BIT_ENDARG),
40
+ YP_LEX_STATE_ENDFN = (1 << YP_LEX_STATE_BIT_ENDFN),
41
+ YP_LEX_STATE_ARG = (1 << YP_LEX_STATE_BIT_ARG),
42
+ YP_LEX_STATE_CMDARG = (1 << YP_LEX_STATE_BIT_CMDARG),
43
+ YP_LEX_STATE_MID = (1 << YP_LEX_STATE_BIT_MID),
44
+ YP_LEX_STATE_FNAME = (1 << YP_LEX_STATE_BIT_FNAME),
45
+ YP_LEX_STATE_DOT = (1 << YP_LEX_STATE_BIT_DOT),
46
+ YP_LEX_STATE_CLASS = (1 << YP_LEX_STATE_BIT_CLASS),
47
+ YP_LEX_STATE_LABEL = (1 << YP_LEX_STATE_BIT_LABEL),
48
+ YP_LEX_STATE_LABELED = (1 << YP_LEX_STATE_BIT_LABELED),
49
+ YP_LEX_STATE_FITEM = (1 << YP_LEX_STATE_BIT_FITEM),
50
+ YP_LEX_STATE_BEG_ANY = YP_LEX_STATE_BEG | YP_LEX_STATE_MID | YP_LEX_STATE_CLASS,
51
+ YP_LEX_STATE_ARG_ANY = YP_LEX_STATE_ARG | YP_LEX_STATE_CMDARG,
52
+ YP_LEX_STATE_END_ANY = YP_LEX_STATE_END | YP_LEX_STATE_ENDARG | YP_LEX_STATE_ENDFN
53
+ } yp_lex_state_t;
54
+
55
+ typedef enum {
56
+ YP_HEREDOC_QUOTE_NONE,
57
+ YP_HEREDOC_QUOTE_SINGLE = '\'',
58
+ YP_HEREDOC_QUOTE_DOUBLE = '"',
59
+ YP_HEREDOC_QUOTE_BACKTICK = '`',
60
+ } yp_heredoc_quote_t;
61
+
62
+ typedef enum {
63
+ YP_HEREDOC_INDENT_NONE,
64
+ YP_HEREDOC_INDENT_DASH,
65
+ YP_HEREDOC_INDENT_TILDE,
66
+ } yp_heredoc_indent_t;
67
+
68
+ // When lexing Ruby source, the lexer has a small amount of state to tell which
69
+ // kind of token it is currently lexing. For example, when we find the start of
70
+ // a string, the first token that we return is a TOKEN_STRING_BEGIN token. After
71
+ // that the lexer is now in the YP_LEX_STRING mode, and will return tokens that
72
+ // are found as part of a string.
73
+ typedef struct yp_lex_mode {
74
+ enum {
75
+ // This state is used when any given token is being lexed.
76
+ YP_LEX_DEFAULT,
77
+
78
+ // This state is used when we're lexing as normal but inside an embedded
79
+ // expression of a string.
80
+ YP_LEX_EMBEXPR,
81
+
82
+ // This state is used when we're lexing a variable that is embedded
83
+ // directly inside of a string with the # shorthand.
84
+ YP_LEX_EMBVAR,
85
+
86
+ // This state is used when you are inside the content of a heredoc.
87
+ YP_LEX_HEREDOC,
88
+
89
+ // This state is used when we are lexing a list of tokens, as in a %w
90
+ // word list literal or a %i symbol list literal.
91
+ YP_LEX_LIST,
92
+
93
+ // This state is used when a regular expression has been begun and we
94
+ // are looking for the terminator.
95
+ YP_LEX_REGEXP,
96
+
97
+ // This state is used when we are lexing a string or a string-like
98
+ // token, as in string content with either quote or an xstring.
99
+ YP_LEX_STRING
100
+ } mode;
101
+
102
+ union {
103
+ struct {
104
+ // This keeps track of the nesting level of the list.
105
+ size_t nesting;
106
+
107
+ // Whether or not interpolation is allowed in this list.
108
+ bool interpolation;
109
+
110
+ // When lexing a list, it takes into account balancing the
111
+ // terminator if the terminator is one of (), [], {}, or <>.
112
+ char incrementor;
113
+
114
+ // This is the terminator of the list literal.
115
+ char terminator;
116
+
117
+ // This is the character set that should be used to delimit the
118
+ // tokens within the list.
119
+ char breakpoints[11];
120
+ } list;
121
+
122
+ struct {
123
+ // This keeps track of the nesting level of the regular expression.
124
+ size_t nesting;
125
+
126
+ // When lexing a regular expression, it takes into account balancing
127
+ // the terminator if the terminator is one of (), [], {}, or <>.
128
+ char incrementor;
129
+
130
+ // This is the terminator of the regular expression.
131
+ char terminator;
132
+
133
+ // This is the character set that should be used to delimit the
134
+ // tokens within the regular expression.
135
+ char breakpoints[6];
136
+ } regexp;
137
+
138
+ struct {
139
+ // This keeps track of the nesting level of the string.
140
+ size_t nesting;
141
+
142
+ // Whether or not interpolation is allowed in this string.
143
+ bool interpolation;
144
+
145
+ // Whether or not at the end of the string we should allow a :,
146
+ // which would indicate this was a dynamic symbol instead of a
147
+ // string.
148
+ bool label_allowed;
149
+
150
+ // When lexing a string, it takes into account balancing the
151
+ // terminator if the terminator is one of (), [], {}, or <>.
152
+ char incrementor;
153
+
154
+ // This is the terminator of the string. It is typically either a
155
+ // single or double quote.
156
+ char terminator;
157
+
158
+ // This is the character set that should be used to delimit the
159
+ // tokens within the string.
160
+ char breakpoints[6];
161
+ } string;
162
+
163
+ struct {
164
+ // These pointers point to the beginning and end of the heredoc
165
+ // identifier.
166
+ const char *ident_start;
167
+ size_t ident_length;
168
+
169
+ yp_heredoc_quote_t quote;
170
+ yp_heredoc_indent_t indent;
171
+
172
+ // This is the pointer to the character where lexing should resume
173
+ // once the heredoc has been completely processed.
174
+ const char *next_start;
175
+ } heredoc;
176
+ } as;
177
+
178
+ // The previous lex state so that it knows how to pop.
179
+ struct yp_lex_mode *prev;
180
+ } yp_lex_mode_t;
181
+
182
+ // We pre-allocate a certain number of lex states in order to avoid having to
183
+ // call malloc too many times while parsing. You really shouldn't need more than
184
+ // this because you only really nest deeply when doing string interpolation.
185
+ #define YP_LEX_STACK_SIZE 4
186
+
187
+ // A forward declaration since our error handler struct accepts a parser for
188
+ // each of its function calls.
189
+ typedef struct yp_parser yp_parser_t;
190
+
191
+ // While parsing, we keep track of a stack of contexts. This is helpful for
192
+ // error recovery so that we can pop back to a previous context when we hit a
193
+ // token that is understood by a parent context but not by the current context.
194
+ typedef enum {
195
+ YP_CONTEXT_BEGIN, // a begin statement
196
+ YP_CONTEXT_BLOCK_BRACES, // expressions in block arguments using braces
197
+ YP_CONTEXT_BLOCK_KEYWORDS, // expressions in block arguments using do..end
198
+ YP_CONTEXT_CASE_WHEN, // a case when statements
199
+ YP_CONTEXT_CASE_IN, // a case in statements
200
+ YP_CONTEXT_CLASS, // a class declaration
201
+ YP_CONTEXT_DEF, // a method definition
202
+ YP_CONTEXT_DEF_PARAMS, // a method definition's parameters
203
+ YP_CONTEXT_DEFAULT_PARAMS, // a method definition's default parameter
204
+ YP_CONTEXT_ELSE, // an else clause
205
+ YP_CONTEXT_ELSIF, // an elsif clause
206
+ YP_CONTEXT_EMBEXPR, // an interpolated expression
207
+ YP_CONTEXT_ENSURE, // an ensure statement
208
+ YP_CONTEXT_FOR, // a for loop
209
+ YP_CONTEXT_IF, // an if statement
210
+ YP_CONTEXT_LAMBDA_BRACES, // a lambda expression with braces
211
+ YP_CONTEXT_LAMBDA_DO_END, // a lambda expression with do..end
212
+ YP_CONTEXT_MAIN, // the top level context
213
+ YP_CONTEXT_MODULE, // a module declaration
214
+ YP_CONTEXT_PARENS, // a parenthesized expression
215
+ YP_CONTEXT_POSTEXE, // an END block
216
+ YP_CONTEXT_PREDICATE, // a predicate inside an if/elsif/unless statement
217
+ YP_CONTEXT_PREEXE, // a BEGIN block
218
+ YP_CONTEXT_RESCUE_ELSE, // a rescue else statement
219
+ YP_CONTEXT_RESCUE, // a rescue statement
220
+ YP_CONTEXT_SCLASS, // a singleton class definition
221
+ YP_CONTEXT_UNLESS, // an unless statement
222
+ YP_CONTEXT_UNTIL, // an until statement
223
+ YP_CONTEXT_WHILE, // a while statement
224
+ } yp_context_t;
225
+
226
+ // This is a node in a linked list of contexts.
227
+ typedef struct yp_context_node {
228
+ yp_context_t context;
229
+ struct yp_context_node *prev;
230
+ } yp_context_node_t;
231
+
232
+ // This is the type of a comment that we've found while parsing.
233
+ typedef enum {
234
+ YP_COMMENT_INLINE,
235
+ YP_COMMENT_EMBDOC,
236
+ YP_COMMENT___END__
237
+ } yp_comment_type_t;
238
+
239
+ // This is a node in the linked list of comments that we've found while parsing.
240
+ typedef struct yp_comment {
241
+ yp_list_node_t node;
242
+ const char *start;
243
+ const char *end;
244
+ yp_comment_type_t type;
245
+ } yp_comment_t;
246
+
247
+ // When the encoding that is being used to parse the source is changed by YARP,
248
+ // we provide the ability here to call out to a user-defined function.
249
+ typedef void (*yp_encoding_changed_callback_t)(yp_parser_t *parser);
250
+
251
+ // When an encoding is encountered that isn't understood by YARP, we provide
252
+ // the ability here to call out to a user-defined function to get an encoding
253
+ // struct. If the function returns something that isn't NULL, we set that to
254
+ // our encoding and use it to parse identifiers.
255
+ typedef yp_encoding_t *(*yp_encoding_decode_callback_t)(yp_parser_t *parser, const char *name, size_t width);
256
+
257
+ // When you are lexing through a file, the lexer needs all of the information
258
+ // that the parser additionally provides (for example, the local table). So if
259
+ // you want to properly lex Ruby, you need to actually lex it in the context of
260
+ // the parser. In order to provide this functionality, we optionally allow a
261
+ // struct to be attached to the parser that calls back out to a user-provided
262
+ // callback when each token is lexed.
263
+ typedef struct {
264
+ // This opaque pointer is used to provide whatever information the user
265
+ // deemed necessary to the callback. In our case we use it to pass the array
266
+ // that the tokens get appended into.
267
+ void *data;
268
+
269
+ // This is the callback that is called when a token is lexed. It is passed
270
+ // the opaque data pointer, the parser, and the token that was lexed.
271
+ void (*callback)(void *data, yp_parser_t *parser, yp_token_t *token);
272
+ } yp_lex_callback_t;
273
+
274
+ // This struct represents a node in a linked list of scopes. Some scopes can see
275
+ // into their parent scopes, while others cannot.
276
+ typedef struct yp_scope {
277
+ // The IDs of the locals in the given scope.
278
+ yp_constant_id_list_t locals;
279
+
280
+ // A boolean indicating whether or not this scope can see into its parent.
281
+ // If closed is true, then the scope cannot see into its parent.
282
+ bool closed;
283
+
284
+ // A pointer to the previous scope in the linked list.
285
+ struct yp_scope *previous;
286
+ } yp_scope_t;
287
+
288
+ // This struct represents the overall parser. It contains a reference to the
289
+ // source file, as well as pointers that indicate where in the source it's
290
+ // currently parsing. It also contains the most recent and current token that
291
+ // it's considering.
292
+ struct yp_parser {
293
+ yp_lex_state_t lex_state; // the current state of the lexer
294
+ bool command_start; // whether or not we're at the beginning of a command
295
+ int enclosure_nesting; // tracks the current nesting of (), [], and {}
296
+
297
+ // Used to temporarily track the nesting of enclosures to determine if a {
298
+ // is the beginning of a lambda following the parameters of a lambda.
299
+ int lambda_enclosure_nesting;
300
+
301
+ // Used to track the nesting of braces to ensure we get the correct value
302
+ // when we are interpolating blocks with braces.
303
+ int brace_nesting;
304
+
305
+ // the stack used to determine if a do keyword belongs to the predicate of a
306
+ // while, until, or for loop
307
+ yp_state_stack_t do_loop_stack;
308
+
309
+ // the stack used to determine if a do keyword belongs to the beginning of a
310
+ // block
311
+ yp_state_stack_t accepts_block_stack;
312
+
313
+ struct {
314
+ yp_lex_mode_t *current; // the current mode of the lexer
315
+ yp_lex_mode_t stack[YP_LEX_STACK_SIZE]; // the stack of lexer modes
316
+ size_t index; // the current index into the lexer mode stack
317
+ } lex_modes;
318
+
319
+ const char *start; // the pointer to the start of the source
320
+ const char *end; // the pointer to the end of the source
321
+ yp_token_t previous; // the previous token we were considering
322
+ yp_token_t current; // the current token we're considering
323
+
324
+ // This is a special field set on the parser when we need the parser to jump
325
+ // to a specific location when lexing the next token, as opposed to just
326
+ // using the end of the previous token. Normally this is NULL.
327
+ const char *next_start;
328
+
329
+ // This field indicates the end of a heredoc whose identifier was found on
330
+ // the current line. If another heredoc is found on the same line, then this
331
+ // will be moved forward to the end of that heredoc. If no heredocs are
332
+ // found on a line then this is NULL.
333
+ const char *heredoc_end;
334
+
335
+ yp_list_t comment_list; // the list of comments that have been found while parsing
336
+ yp_list_t warning_list; // the list of warnings that have been found while parsing
337
+ yp_list_t error_list; // the list of errors that have been found while parsing
338
+ yp_scope_t *current_scope; // the current local scope
339
+
340
+ yp_context_node_t *current_context; // the current parsing context
341
+ bool recovering; // whether or not we're currently recovering from a syntax error
342
+
343
+ // The encoding functions for the current file is attached to the parser as
344
+ // it's parsing so that it can change with a magic comment.
345
+ yp_encoding_t encoding;
346
+
347
+ // Whether or not the encoding has been changed by a magic comment. We use
348
+ // this to provide a fast path for the lexer instead of going through the
349
+ // function pointer.
350
+ bool encoding_changed;
351
+
352
+ // When the encoding that is being used to parse the source is changed by
353
+ // YARP, we provide the ability here to call out to a user-defined function.
354
+ yp_encoding_changed_callback_t encoding_changed_callback;
355
+
356
+ // When an encoding is encountered that isn't understood by YARP, we provide
357
+ // the ability here to call out to a user-defined function to get an
358
+ // encoding struct. If the function returns something that isn't NULL, we
359
+ // set that to our encoding and use it to parse identifiers.
360
+ yp_encoding_decode_callback_t encoding_decode_callback;
361
+
362
+ // This pointer indicates where a comment must start if it is to be
363
+ // considered an encoding comment.
364
+ const char *encoding_comment_start;
365
+
366
+ // This is an optional callback that can be attached to the parser that will
367
+ // be called whenever a new token is lexed by the parser.
368
+ yp_lex_callback_t *lex_callback;
369
+
370
+ // This flag indicates that we are currently parsing a pattern matching
371
+ // expression and impacts that calculation of newlines.
372
+ bool pattern_matching_newlines;
373
+
374
+ // This flag indicates that we are currently parsing a keyword argument.
375
+ bool in_keyword_arg;
376
+
377
+ // This is the path of the file being parsed
378
+ // We use the filepath when constructing SourceFileNodes
379
+ yp_string_t filepath_string;
380
+
381
+ // This constant pool keeps all of the constants defined throughout the file
382
+ // so that we can reference them later.
383
+ yp_constant_pool_t constant_pool;
384
+
385
+ // This is the list of newline offsets in the source file.
386
+ yp_newline_list_t newline_list;
387
+ };
388
+
389
+ #endif // YARP_PARSER_H
@@ -0,0 +1,19 @@
1
+ #ifndef YARP_REGEXP_H
2
+ #define YARP_REGEXP_H
3
+
4
+ #include "yarp/defines.h"
5
+ #include "yarp/parser.h"
6
+ #include "yarp/enc/yp_encoding.h"
7
+ #include "yarp/util/yp_memchr.h"
8
+ #include "yarp/util/yp_string_list.h"
9
+ #include "yarp/util/yp_string.h"
10
+
11
+ #include <stdbool.h>
12
+ #include <stddef.h>
13
+ #include <string.h>
14
+
15
+ // Parse a regular expression and extract the names of all of the named capture
16
+ // groups.
17
+ YP_EXPORTED_FUNCTION bool yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding);
18
+
19
+ #endif
@@ -0,0 +1,42 @@
1
+ #ifndef YARP_UNESCAPE_H
2
+ #define YARP_UNESCAPE_H
3
+
4
+ #include "yarp/defines.h"
5
+ #include "yarp/diagnostic.h"
6
+ #include "yarp/parser.h"
7
+ #include "yarp/util/yp_char.h"
8
+ #include "yarp/util/yp_list.h"
9
+ #include "yarp/util/yp_memchr.h"
10
+ #include "yarp/util/yp_string.h"
11
+
12
+ #include <assert.h>
13
+ #include <stdbool.h>
14
+ #include <stdint.h>
15
+ #include <string.h>
16
+
17
+ // The type of unescape we are performing.
18
+ typedef enum {
19
+ // When we're creating a string inside of a list literal like %w, we
20
+ // shouldn't escape anything.
21
+ YP_UNESCAPE_NONE,
22
+
23
+ // When we're unescaping a single-quoted string, we only need to unescape
24
+ // single quotes and backslashes.
25
+ YP_UNESCAPE_MINIMAL,
26
+
27
+ // When we're unescaping a double-quoted string, we need to unescape all
28
+ // escapes.
29
+ YP_UNESCAPE_ALL
30
+ } yp_unescape_type_t;
31
+
32
+ // Unescape the contents of the given token into the given string using the
33
+ // given unescape mode.
34
+ YP_EXPORTED_FUNCTION void yp_unescape_manipulate_string(yp_parser_t *parser, const char *value, size_t length, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list);
35
+
36
+ // Accepts a source string and a type of unescaping and returns the unescaped version.
37
+ // The caller must yp_string_free(result); after calling this function.
38
+ YP_EXPORTED_FUNCTION bool yp_unescape_string(const char *start, size_t length, yp_unescape_type_t unescape_type, yp_string_t *result);
39
+
40
+ YP_EXPORTED_FUNCTION size_t yp_unescape_calculate_difference(const char *value, const char *end, yp_unescape_type_t unescape_type, bool expect_single_codepoint, yp_list_t *error_list);
41
+
42
+ #endif
@@ -0,0 +1,39 @@
1
+ #ifndef YARP_BUFFER_H
2
+ #define YARP_BUFFER_H
3
+
4
+ #include "yarp/defines.h"
5
+
6
+ #include <assert.h>
7
+ #include <stdbool.h>
8
+ #include <stdint.h>
9
+ #include <stdlib.h>
10
+ #include <string.h>
11
+
12
+ // A yp_buffer_t is a simple memory buffer that stores data in a contiguous
13
+ // block of memory. It is used to store the serialized representation of a
14
+ // YARP tree.
15
+ typedef struct {
16
+ char *value;
17
+ size_t length;
18
+ size_t capacity;
19
+ } yp_buffer_t;
20
+
21
+ // Initialize a yp_buffer_t with its default values.
22
+ YP_EXPORTED_FUNCTION bool yp_buffer_init(yp_buffer_t *buffer);
23
+
24
+ // Append the given amount of space as zeroes to the buffer.
25
+ void yp_buffer_append_zeroes(yp_buffer_t *buffer, size_t length);
26
+
27
+ // Append a string to the buffer.
28
+ void yp_buffer_append_str(yp_buffer_t *buffer, const char *value, size_t length);
29
+
30
+ // Append a single byte to the buffer.
31
+ void yp_buffer_append_u8(yp_buffer_t *buffer, uint8_t value);
32
+
33
+ // Append a 32-bit unsigned integer to the buffer.
34
+ void yp_buffer_append_u32(yp_buffer_t *buffer, uint32_t value);
35
+
36
+ // Free the memory associated with the buffer.
37
+ YP_EXPORTED_FUNCTION void yp_buffer_free(yp_buffer_t *buffer);
38
+
39
+ #endif
@@ -0,0 +1,75 @@
1
+ #ifndef YP_CHAR_H
2
+ #define YP_CHAR_H
3
+
4
+ #include "yarp/defines.h"
5
+ #include "yarp/util/yp_newline_list.h"
6
+
7
+ #include <stdbool.h>
8
+ #include <stddef.h>
9
+
10
+ // Returns the number of characters at the start of the string that are
11
+ // whitespace. Disallows searching past the given maximum number of characters.
12
+ size_t yp_strspn_whitespace(const char *string, ptrdiff_t length);
13
+
14
+ // Returns the number of characters at the start of the string that are
15
+ // whitespace while also tracking the location of each newline. Disallows
16
+ // searching past the given maximum number of characters.
17
+ size_t
18
+ yp_strspn_whitespace_newlines(const char *string, ptrdiff_t length, yp_newline_list_t *newline_list, bool);
19
+
20
+ // Returns the number of characters at the start of the string that are inline
21
+ // whitespace. Disallows searching past the given maximum number of characters.
22
+ size_t yp_strspn_inline_whitespace(const char *string, ptrdiff_t length);
23
+
24
+ // Returns the number of characters at the start of the string that are decimal
25
+ // digits. Disallows searching past the given maximum number of characters.
26
+ size_t yp_strspn_decimal_digit(const char *string, ptrdiff_t length);
27
+
28
+ // Returns the number of characters at the start of the string that are
29
+ // hexadecimal digits. Disallows searching past the given maximum number of
30
+ // characters.
31
+ size_t yp_strspn_hexadecimal_digit(const char *string, ptrdiff_t length);
32
+
33
+ // Returns the number of characters at the start of the string that are octal
34
+ // digits or underscores. Disallows searching past the given maximum number of
35
+ // characters.
36
+ size_t yp_strspn_octal_number(const char *string, ptrdiff_t length);
37
+
38
+ // Returns the number of characters at the start of the string that are decimal
39
+ // digits or underscores. Disallows searching past the given maximum number of
40
+ // characters.
41
+ size_t yp_strspn_decimal_number(const char *string, ptrdiff_t length);
42
+
43
+ // Returns the number of characters at the start of the string that are
44
+ // hexadecimal digits or underscores. Disallows searching past the given maximum
45
+ // number of characters.
46
+ size_t yp_strspn_hexadecimal_number(const char *string, ptrdiff_t length);
47
+
48
+ // Returns the number of characters at the start of the string that are regexp
49
+ // options. Disallows searching past the given maximum number of characters.
50
+ size_t yp_strspn_regexp_option(const char *string, ptrdiff_t length);
51
+
52
+ // Returns the number of characters at the start of the string that are binary
53
+ // digits or underscores. Disallows searching past the given maximum number of
54
+ // characters.
55
+ size_t yp_strspn_binary_number(const char *string, ptrdiff_t length);
56
+
57
+ // Returns true if the given character is a whitespace character.
58
+ bool yp_char_is_whitespace(const char c);
59
+
60
+ // Returns true if the given character is an inline whitespace character.
61
+ bool yp_char_is_inline_whitespace(const char c);
62
+
63
+ // Returns true if the given character is a binary digit.
64
+ bool yp_char_is_binary_digit(const char c);
65
+
66
+ // Returns true if the given character is an octal digit.
67
+ bool yp_char_is_octal_digit(const char c);
68
+
69
+ // Returns true if the given character is a decimal digit.
70
+ bool yp_char_is_decimal_digit(const char c);
71
+
72
+ // Returns true if the given character is a hexadecimal digit.
73
+ bool yp_char_is_hexadecimal_digit(const char c);
74
+
75
+ #endif
@@ -0,0 +1,64 @@
1
+ // The constant pool is a data structure that stores a set of strings. Each
2
+ // string is assigned a unique id, which can be used to compare strings for
3
+ // equality. This comparison ends up being much faster than strcmp, since it
4
+ // only requires a single integer comparison.
5
+
6
+ #ifndef YP_CONSTANT_POOL_H
7
+ #define YP_CONSTANT_POOL_H
8
+
9
+ #include "yarp/defines.h"
10
+
11
+ #include <stdbool.h>
12
+ #include <stdint.h>
13
+ #include <stdlib.h>
14
+ #include <string.h>
15
+
16
+ typedef uint32_t yp_constant_id_t;
17
+
18
+ typedef struct {
19
+ yp_constant_id_t *ids;
20
+ size_t size;
21
+ size_t capacity;
22
+ } yp_constant_id_list_t;
23
+
24
+ // Initialize a list of constant ids.
25
+ void yp_constant_id_list_init(yp_constant_id_list_t *list);
26
+
27
+ // Append a constant id to a list of constant ids. Returns false if any
28
+ // potential reallocations fail.
29
+ bool yp_constant_id_list_append(yp_constant_id_list_t *list, yp_constant_id_t id);
30
+
31
+ // Checks if the current constant id list includes the given constant id.
32
+ bool
33
+ yp_constant_id_list_includes(yp_constant_id_list_t *list, yp_constant_id_t id);
34
+
35
+ // Get the memory size of a list of constant ids.
36
+ size_t yp_constant_id_list_memsize(yp_constant_id_list_t *list);
37
+
38
+ // Free the memory associated with a list of constant ids.
39
+ void yp_constant_id_list_free(yp_constant_id_list_t *list);
40
+
41
+ typedef struct {
42
+ yp_constant_id_t id;
43
+ const char *start;
44
+ size_t length;
45
+ size_t hash;
46
+ } yp_constant_t;
47
+
48
+ typedef struct {
49
+ yp_constant_t *constants;
50
+ size_t size;
51
+ size_t capacity;
52
+ } yp_constant_pool_t;
53
+
54
+ // Initialize a new constant pool with a given capacity.
55
+ bool yp_constant_pool_init(yp_constant_pool_t *pool, size_t capacity);
56
+
57
+ // Insert a constant into a constant pool. Returns the id of the constant, or 0
58
+ // if any potential calls to resize fail.
59
+ yp_constant_id_t yp_constant_pool_insert(yp_constant_pool_t *pool, const char *start, size_t length);
60
+
61
+ // Free the memory associated with a constant pool.
62
+ void yp_constant_pool_free(yp_constant_pool_t *pool);
63
+
64
+ #endif