prism 0.15.1 → 0.17.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (91) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -1
  3. data/Makefile +12 -0
  4. data/README.md +3 -1
  5. data/config.yml +66 -50
  6. data/docs/configuration.md +2 -0
  7. data/docs/fuzzing.md +1 -1
  8. data/docs/javascript.md +90 -0
  9. data/docs/releasing.md +27 -0
  10. data/docs/ruby_api.md +2 -0
  11. data/docs/serialization.md +28 -29
  12. data/ext/prism/api_node.c +856 -826
  13. data/ext/prism/api_pack.c +20 -9
  14. data/ext/prism/extension.c +494 -119
  15. data/ext/prism/extension.h +1 -1
  16. data/include/prism/ast.h +3157 -747
  17. data/include/prism/defines.h +40 -8
  18. data/include/prism/diagnostic.h +36 -3
  19. data/include/prism/enc/pm_encoding.h +119 -28
  20. data/include/prism/node.h +38 -30
  21. data/include/prism/options.h +204 -0
  22. data/include/prism/pack.h +44 -33
  23. data/include/prism/parser.h +445 -199
  24. data/include/prism/prettyprint.h +26 -0
  25. data/include/prism/regexp.h +16 -2
  26. data/include/prism/util/pm_buffer.h +102 -18
  27. data/include/prism/util/pm_char.h +162 -48
  28. data/include/prism/util/pm_constant_pool.h +128 -34
  29. data/include/prism/util/pm_list.h +68 -38
  30. data/include/prism/util/pm_memchr.h +18 -3
  31. data/include/prism/util/pm_newline_list.h +71 -28
  32. data/include/prism/util/pm_state_stack.h +25 -7
  33. data/include/prism/util/pm_string.h +115 -27
  34. data/include/prism/util/pm_string_list.h +25 -6
  35. data/include/prism/util/pm_strncasecmp.h +32 -0
  36. data/include/prism/util/pm_strpbrk.h +31 -17
  37. data/include/prism/version.h +28 -3
  38. data/include/prism.h +229 -36
  39. data/lib/prism/compiler.rb +5 -5
  40. data/lib/prism/debug.rb +43 -13
  41. data/lib/prism/desugar_compiler.rb +1 -1
  42. data/lib/prism/dispatcher.rb +27 -26
  43. data/lib/prism/dsl.rb +16 -16
  44. data/lib/prism/ffi.rb +138 -61
  45. data/lib/prism/lex_compat.rb +26 -16
  46. data/lib/prism/mutation_compiler.rb +11 -11
  47. data/lib/prism/node.rb +426 -227
  48. data/lib/prism/node_ext.rb +23 -16
  49. data/lib/prism/node_inspector.rb +1 -1
  50. data/lib/prism/pack.rb +79 -40
  51. data/lib/prism/parse_result/comments.rb +7 -2
  52. data/lib/prism/parse_result/newlines.rb +4 -0
  53. data/lib/prism/parse_result.rb +157 -21
  54. data/lib/prism/pattern.rb +14 -3
  55. data/lib/prism/ripper_compat.rb +28 -10
  56. data/lib/prism/serialize.rb +935 -307
  57. data/lib/prism/visitor.rb +9 -5
  58. data/lib/prism.rb +20 -2
  59. data/prism.gemspec +11 -2
  60. data/rbi/prism.rbi +7305 -0
  61. data/rbi/prism_static.rbi +196 -0
  62. data/sig/prism.rbs +4468 -0
  63. data/sig/prism_static.rbs +123 -0
  64. data/src/diagnostic.c +56 -53
  65. data/src/enc/pm_big5.c +1 -0
  66. data/src/enc/pm_euc_jp.c +1 -0
  67. data/src/enc/pm_gbk.c +1 -0
  68. data/src/enc/pm_shift_jis.c +1 -0
  69. data/src/enc/pm_tables.c +316 -80
  70. data/src/enc/pm_unicode.c +54 -9
  71. data/src/enc/pm_windows_31j.c +1 -0
  72. data/src/node.c +357 -345
  73. data/src/options.c +170 -0
  74. data/src/prettyprint.c +7697 -1643
  75. data/src/prism.c +1964 -1125
  76. data/src/regexp.c +153 -95
  77. data/src/serialize.c +432 -397
  78. data/src/token_type.c +3 -1
  79. data/src/util/pm_buffer.c +88 -23
  80. data/src/util/pm_char.c +103 -57
  81. data/src/util/pm_constant_pool.c +52 -22
  82. data/src/util/pm_list.c +12 -4
  83. data/src/util/pm_memchr.c +5 -3
  84. data/src/util/pm_newline_list.c +25 -63
  85. data/src/util/pm_state_stack.c +9 -3
  86. data/src/util/pm_string.c +95 -85
  87. data/src/util/pm_string_list.c +14 -15
  88. data/src/util/pm_strncasecmp.c +10 -3
  89. data/src/util/pm_strpbrk.c +25 -19
  90. metadata +12 -3
  91. data/docs/prism.png +0 -0
@@ -1,3 +1,8 @@
1
+ /**
2
+ * @file parser.h
3
+ *
4
+ * The parser used to parse Ruby source.
5
+ */
1
6
  #ifndef PRISM_PARSER_H
2
7
  #define PRISM_PARSER_H
3
8
 
@@ -12,9 +17,11 @@
12
17
 
13
18
  #include <stdbool.h>
14
19
 
15
- // This enum provides various bits that represent different kinds of states that
16
- // the lexer can track. This is used to determine which kind of token to return
17
- // based on the context of the parser.
20
+ /**
21
+ * This enum provides various bits that represent different kinds of states that
22
+ * the lexer can track. This is used to determine which kind of token to return
23
+ * based on the context of the parser.
24
+ */
18
25
  typedef enum {
19
26
  PM_LEX_STATE_BIT_BEG,
20
27
  PM_LEX_STATE_BIT_END,
@@ -31,8 +38,10 @@ typedef enum {
31
38
  PM_LEX_STATE_BIT_FITEM
32
39
  } pm_lex_state_bit_t;
33
40
 
34
- // This enum combines the various bits from the above enum into individual
35
- // values that represent the various states of the lexer.
41
+ /**
42
+ * This enum combines the various bits from the above enum into individual
43
+ * values that represent the various states of the lexer.
44
+ */
36
45
  typedef enum {
37
46
  PM_LEX_STATE_NONE = 0,
38
47
  PM_LEX_STATE_BEG = (1 << PM_LEX_STATE_BIT_BEG),
@@ -53,6 +62,9 @@ typedef enum {
53
62
  PM_LEX_STATE_END_ANY = PM_LEX_STATE_END | PM_LEX_STATE_ENDARG | PM_LEX_STATE_ENDFN
54
63
  } pm_lex_state_t;
55
64
 
65
+ /**
66
+ * The type of quote that a heredoc uses.
67
+ */
56
68
  typedef enum {
57
69
  PM_HEREDOC_QUOTE_NONE,
58
70
  PM_HEREDOC_QUOTE_SINGLE = '\'',
@@ -60,385 +72,619 @@ typedef enum {
60
72
  PM_HEREDOC_QUOTE_BACKTICK = '`',
61
73
  } pm_heredoc_quote_t;
62
74
 
75
+ /**
76
+ * The type of indentation that a heredoc uses.
77
+ */
63
78
  typedef enum {
64
79
  PM_HEREDOC_INDENT_NONE,
65
80
  PM_HEREDOC_INDENT_DASH,
66
81
  PM_HEREDOC_INDENT_TILDE,
67
82
  } pm_heredoc_indent_t;
68
83
 
69
- // When lexing Ruby source, the lexer has a small amount of state to tell which
70
- // kind of token it is currently lexing. For example, when we find the start of
71
- // a string, the first token that we return is a TOKEN_STRING_BEGIN token. After
72
- // that the lexer is now in the PM_LEX_STRING mode, and will return tokens that
73
- // are found as part of a string.
84
+ /**
85
+ * When lexing Ruby source, the lexer has a small amount of state to tell which
86
+ * kind of token it is currently lexing. For example, when we find the start of
87
+ * a string, the first token that we return is a TOKEN_STRING_BEGIN token. After
88
+ * that the lexer is now in the PM_LEX_STRING mode, and will return tokens that
89
+ * are found as part of a string.
90
+ */
74
91
  typedef struct pm_lex_mode {
92
+ /** The type of this lex mode. */
75
93
  enum {
76
- // This state is used when any given token is being lexed.
94
+ /** This state is used when any given token is being lexed. */
77
95
  PM_LEX_DEFAULT,
78
96
 
79
- // This state is used when we're lexing as normal but inside an embedded
80
- // expression of a string.
97
+ /**
98
+ * This state is used when we're lexing as normal but inside an embedded
99
+ * expression of a string.
100
+ */
81
101
  PM_LEX_EMBEXPR,
82
102
 
83
- // This state is used when we're lexing a variable that is embedded
84
- // directly inside of a string with the # shorthand.
103
+ /**
104
+ * This state is used when we're lexing a variable that is embedded
105
+ * directly inside of a string with the # shorthand.
106
+ */
85
107
  PM_LEX_EMBVAR,
86
108
 
87
- // This state is used when you are inside the content of a heredoc.
109
+ /** This state is used when you are inside the content of a heredoc. */
88
110
  PM_LEX_HEREDOC,
89
111
 
90
- // This state is used when we are lexing a list of tokens, as in a %w
91
- // word list literal or a %i symbol list literal.
112
+ /**
113
+ * This state is used when we are lexing a list of tokens, as in a %w
114
+ * word list literal or a %i symbol list literal.
115
+ */
92
116
  PM_LEX_LIST,
93
117
 
94
- // This state is used when a regular expression has been begun and we
95
- // are looking for the terminator.
118
+ /**
119
+ * This state is used when a regular expression has been begun and we
120
+ * are looking for the terminator.
121
+ */
96
122
  PM_LEX_REGEXP,
97
123
 
98
- // This state is used when we are lexing a string or a string-like
99
- // token, as in string content with either quote or an xstring.
124
+ /**
125
+ * This state is used when we are lexing a string or a string-like
126
+ * token, as in string content with either quote or an xstring.
127
+ */
100
128
  PM_LEX_STRING
101
129
  } mode;
102
130
 
131
+ /** The data associated with this type of lex mode. */
103
132
  union {
104
133
  struct {
105
- // This keeps track of the nesting level of the list.
134
+ /** This keeps track of the nesting level of the list. */
106
135
  size_t nesting;
107
136
 
108
- // Whether or not interpolation is allowed in this list.
137
+ /** Whether or not interpolation is allowed in this list. */
109
138
  bool interpolation;
110
139
 
111
- // When lexing a list, it takes into account balancing the
112
- // terminator if the terminator is one of (), [], {}, or <>.
140
+ /**
141
+ * When lexing a list, it takes into account balancing the
142
+ * terminator if the terminator is one of (), [], {}, or <>.
143
+ */
113
144
  uint8_t incrementor;
114
145
 
115
- // This is the terminator of the list literal.
146
+ /** This is the terminator of the list literal. */
116
147
  uint8_t terminator;
117
148
 
118
- // This is the character set that should be used to delimit the
119
- // tokens within the list.
149
+ /**
150
+ * This is the character set that should be used to delimit the
151
+ * tokens within the list.
152
+ */
120
153
  uint8_t breakpoints[11];
121
154
  } list;
122
155
 
123
156
  struct {
124
- // This keeps track of the nesting level of the regular expression.
157
+ /**
158
+ * This keeps track of the nesting level of the regular expression.
159
+ */
125
160
  size_t nesting;
126
161
 
127
- // When lexing a regular expression, it takes into account balancing
128
- // the terminator if the terminator is one of (), [], {}, or <>.
162
+ /**
163
+ * When lexing a regular expression, it takes into account balancing
164
+ * the terminator if the terminator is one of (), [], {}, or <>.
165
+ */
129
166
  uint8_t incrementor;
130
167
 
131
- // This is the terminator of the regular expression.
168
+ /** This is the terminator of the regular expression. */
132
169
  uint8_t terminator;
133
170
 
134
- // This is the character set that should be used to delimit the
135
- // tokens within the regular expression.
171
+ /**
172
+ * This is the character set that should be used to delimit the
173
+ * tokens within the regular expression.
174
+ */
136
175
  uint8_t breakpoints[6];
137
176
  } regexp;
138
177
 
139
178
  struct {
140
- // This keeps track of the nesting level of the string.
179
+ /** This keeps track of the nesting level of the string. */
141
180
  size_t nesting;
142
181
 
143
- // Whether or not interpolation is allowed in this string.
182
+ /** Whether or not interpolation is allowed in this string. */
144
183
  bool interpolation;
145
184
 
146
- // Whether or not at the end of the string we should allow a :,
147
- // which would indicate this was a dynamic symbol instead of a
148
- // string.
185
+ /**
186
+ * Whether or not at the end of the string we should allow a :,
187
+ * which would indicate this was a dynamic symbol instead of a
188
+ * string.
189
+ */
149
190
  bool label_allowed;
150
191
 
151
- // When lexing a string, it takes into account balancing the
152
- // terminator if the terminator is one of (), [], {}, or <>.
192
+ /**
193
+ * When lexing a string, it takes into account balancing the
194
+ * terminator if the terminator is one of (), [], {}, or <>.
195
+ */
153
196
  uint8_t incrementor;
154
197
 
155
- // This is the terminator of the string. It is typically either a
156
- // single or double quote.
198
+ /**
199
+ * This is the terminator of the string. It is typically either a
200
+ * single or double quote.
201
+ */
157
202
  uint8_t terminator;
158
203
 
159
- // This is the character set that should be used to delimit the
160
- // tokens within the string.
204
+ /**
205
+ * This is the character set that should be used to delimit the
206
+ * tokens within the string.
207
+ */
161
208
  uint8_t breakpoints[6];
162
209
  } string;
163
210
 
164
211
  struct {
165
- // These pointers point to the beginning and end of the heredoc
166
- // identifier.
212
+ /** A pointer to the start of the heredoc identifier. */
167
213
  const uint8_t *ident_start;
214
+
215
+ /** The length of the heredoc identifier. */
168
216
  size_t ident_length;
169
217
 
218
+ /** The type of quote that the heredoc uses. */
170
219
  pm_heredoc_quote_t quote;
220
+
221
+ /** The type of indentation that the heredoc uses. */
171
222
  pm_heredoc_indent_t indent;
172
223
 
173
- // This is the pointer to the character where lexing should resume
174
- // once the heredoc has been completely processed.
224
+ /**
225
+ * This is the pointer to the character where lexing should resume
226
+ * once the heredoc has been completely processed.
227
+ */
175
228
  const uint8_t *next_start;
176
229
 
177
- // This is used to track the amount of common whitespace on each
178
- // line so that we know how much to dedent each line in the case of
179
- // a tilde heredoc.
230
+ /**
231
+ * This is used to track the amount of common whitespace on each
232
+ * line so that we know how much to dedent each line in the case of
233
+ * a tilde heredoc.
234
+ */
180
235
  size_t common_whitespace;
181
236
  } heredoc;
182
237
  } as;
183
238
 
184
- // The previous lex state so that it knows how to pop.
239
+ /** The previous lex state so that it knows how to pop. */
185
240
  struct pm_lex_mode *prev;
186
241
  } pm_lex_mode_t;
187
242
 
188
- // We pre-allocate a certain number of lex states in order to avoid having to
189
- // call malloc too many times while parsing. You really shouldn't need more than
190
- // this because you only really nest deeply when doing string interpolation.
243
+ /**
244
+ * We pre-allocate a certain number of lex states in order to avoid having to
245
+ * call malloc too many times while parsing. You really shouldn't need more than
246
+ * this because you only really nest deeply when doing string interpolation.
247
+ */
191
248
  #define PM_LEX_STACK_SIZE 4
192
249
 
193
- // A forward declaration since our error handler struct accepts a parser for
194
- // each of its function calls.
250
+ /**
251
+ * The parser used to parse Ruby source.
252
+ */
195
253
  typedef struct pm_parser pm_parser_t;
196
254
 
197
- // While parsing, we keep track of a stack of contexts. This is helpful for
198
- // error recovery so that we can pop back to a previous context when we hit a
199
- // token that is understood by a parent context but not by the current context.
255
+ /**
256
+ * While parsing, we keep track of a stack of contexts. This is helpful for
257
+ * error recovery so that we can pop back to a previous context when we hit a
258
+ * token that is understood by a parent context but not by the current context.
259
+ */
200
260
  typedef enum {
201
- PM_CONTEXT_BEGIN, // a begin statement
202
- PM_CONTEXT_BLOCK_BRACES, // expressions in block arguments using braces
203
- PM_CONTEXT_BLOCK_KEYWORDS, // expressions in block arguments using do..end
204
- PM_CONTEXT_CASE_WHEN, // a case when statements
205
- PM_CONTEXT_CASE_IN, // a case in statements
206
- PM_CONTEXT_CLASS, // a class declaration
207
- PM_CONTEXT_DEF, // a method definition
208
- PM_CONTEXT_DEF_PARAMS, // a method definition's parameters
209
- PM_CONTEXT_DEFAULT_PARAMS, // a method definition's default parameter
210
- PM_CONTEXT_ELSE, // an else clause
211
- PM_CONTEXT_ELSIF, // an elsif clause
212
- PM_CONTEXT_EMBEXPR, // an interpolated expression
213
- PM_CONTEXT_ENSURE, // an ensure statement
214
- PM_CONTEXT_FOR, // a for loop
215
- PM_CONTEXT_IF, // an if statement
216
- PM_CONTEXT_LAMBDA_BRACES, // a lambda expression with braces
217
- PM_CONTEXT_LAMBDA_DO_END, // a lambda expression with do..end
218
- PM_CONTEXT_MAIN, // the top level context
219
- PM_CONTEXT_MODULE, // a module declaration
220
- PM_CONTEXT_PARENS, // a parenthesized expression
221
- PM_CONTEXT_POSTEXE, // an END block
222
- PM_CONTEXT_PREDICATE, // a predicate inside an if/elsif/unless statement
223
- PM_CONTEXT_PREEXE, // a BEGIN block
224
- PM_CONTEXT_RESCUE_ELSE, // a rescue else statement
225
- PM_CONTEXT_RESCUE, // a rescue statement
226
- PM_CONTEXT_SCLASS, // a singleton class definition
227
- PM_CONTEXT_UNLESS, // an unless statement
228
- PM_CONTEXT_UNTIL, // an until statement
229
- PM_CONTEXT_WHILE, // a while statement
261
+ /** a begin statement */
262
+ PM_CONTEXT_BEGIN,
263
+
264
+ /** expressions in block arguments using braces */
265
+ PM_CONTEXT_BLOCK_BRACES,
266
+
267
+ /** expressions in block arguments using do..end */
268
+ PM_CONTEXT_BLOCK_KEYWORDS,
269
+
270
+ /** a case when statements */
271
+ PM_CONTEXT_CASE_WHEN,
272
+
273
+ /** a case in statements */
274
+ PM_CONTEXT_CASE_IN,
275
+
276
+ /** a class declaration */
277
+ PM_CONTEXT_CLASS,
278
+
279
+ /** a method definition */
280
+ PM_CONTEXT_DEF,
281
+
282
+ /** a method definition's parameters */
283
+ PM_CONTEXT_DEF_PARAMS,
284
+
285
+ /** a method definition's default parameter */
286
+ PM_CONTEXT_DEFAULT_PARAMS,
287
+
288
+ /** an else clause */
289
+ PM_CONTEXT_ELSE,
290
+
291
+ /** an elsif clause */
292
+ PM_CONTEXT_ELSIF,
293
+
294
+ /** an interpolated expression */
295
+ PM_CONTEXT_EMBEXPR,
296
+
297
+ /** an ensure statement */
298
+ PM_CONTEXT_ENSURE,
299
+
300
+ /** a for loop */
301
+ PM_CONTEXT_FOR,
302
+
303
+ /** a for loop's index */
304
+ PM_CONTEXT_FOR_INDEX,
305
+
306
+ /** an if statement */
307
+ PM_CONTEXT_IF,
308
+
309
+ /** a lambda expression with braces */
310
+ PM_CONTEXT_LAMBDA_BRACES,
311
+
312
+ /** a lambda expression with do..end */
313
+ PM_CONTEXT_LAMBDA_DO_END,
314
+
315
+ /** the top level context */
316
+ PM_CONTEXT_MAIN,
317
+
318
+ /** a module declaration */
319
+ PM_CONTEXT_MODULE,
320
+
321
+ /** a parenthesized expression */
322
+ PM_CONTEXT_PARENS,
323
+
324
+ /** an END block */
325
+ PM_CONTEXT_POSTEXE,
326
+
327
+ /** a predicate inside an if/elsif/unless statement */
328
+ PM_CONTEXT_PREDICATE,
329
+
330
+ /** a BEGIN block */
331
+ PM_CONTEXT_PREEXE,
332
+
333
+ /** a rescue else statement */
334
+ PM_CONTEXT_RESCUE_ELSE,
335
+
336
+ /** a rescue statement */
337
+ PM_CONTEXT_RESCUE,
338
+
339
+ /** a singleton class definition */
340
+ PM_CONTEXT_SCLASS,
341
+
342
+ /** an unless statement */
343
+ PM_CONTEXT_UNLESS,
344
+
345
+ /** an until statement */
346
+ PM_CONTEXT_UNTIL,
347
+
348
+ /** a while statement */
349
+ PM_CONTEXT_WHILE,
230
350
  } pm_context_t;
231
351
 
232
- // This is a node in a linked list of contexts.
352
+ /** This is a node in a linked list of contexts. */
233
353
  typedef struct pm_context_node {
354
+ /** The context that this node represents. */
234
355
  pm_context_t context;
356
+
357
+ /** A pointer to the previous context in the linked list. */
235
358
  struct pm_context_node *prev;
236
359
  } pm_context_node_t;
237
360
 
238
- // This is the type of a comment that we've found while parsing.
361
+ /** This is the type of a comment that we've found while parsing. */
239
362
  typedef enum {
240
363
  PM_COMMENT_INLINE,
241
364
  PM_COMMENT_EMBDOC,
242
365
  PM_COMMENT___END__
243
366
  } pm_comment_type_t;
244
367
 
245
- // This is a node in the linked list of comments that we've found while parsing.
368
+ /**
369
+ * This is a node in the linked list of comments that we've found while parsing.
370
+ *
371
+ * @extends pm_list_node_t
372
+ */
246
373
  typedef struct pm_comment {
374
+ /** The embedded base node. */
247
375
  pm_list_node_t node;
376
+
377
+ /** A pointer to the start of the comment in the source. */
248
378
  const uint8_t *start;
379
+
380
+ /** A pointer to the end of the comment in the source. */
249
381
  const uint8_t *end;
382
+
383
+ /** The type of comment that we've found. */
250
384
  pm_comment_type_t type;
251
385
  } pm_comment_t;
252
386
 
253
- // This is a node in the linked list of magic comments that we've found while
254
- // parsing.
387
+ /**
388
+ * This is a node in the linked list of magic comments that we've found while
389
+ * parsing.
390
+ *
391
+ * @extends pm_list_node_t
392
+ */
255
393
  typedef struct {
394
+ /** The embedded base node. */
256
395
  pm_list_node_t node;
396
+
397
+ /** A pointer to the start of the key in the source. */
257
398
  const uint8_t *key_start;
399
+
400
+ /** A pointer to the start of the value in the source. */
258
401
  const uint8_t *value_start;
402
+
403
+ /** The length of the key in the source. */
259
404
  uint32_t key_length;
405
+
406
+ /** The length of the value in the source. */
260
407
  uint32_t value_length;
261
408
  } pm_magic_comment_t;
262
409
 
263
- // When the encoding that is being used to parse the source is changed by prism,
264
- // we provide the ability here to call out to a user-defined function.
410
+ /**
411
+ * When the encoding that is being used to parse the source is changed by prism,
412
+ * we provide the ability here to call out to a user-defined function.
413
+ */
265
414
  typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
266
415
 
267
- // When an encoding is encountered that isn't understood by prism, we provide
268
- // the ability here to call out to a user-defined function to get an encoding
269
- // struct. If the function returns something that isn't NULL, we set that to
270
- // our encoding and use it to parse identifiers.
416
+ /**
417
+ * When an encoding is encountered that isn't understood by prism, we provide
418
+ * the ability here to call out to a user-defined function to get an encoding
419
+ * struct. If the function returns something that isn't NULL, we set that to
420
+ * our encoding and use it to parse identifiers.
421
+ */
271
422
  typedef pm_encoding_t *(*pm_encoding_decode_callback_t)(pm_parser_t *parser, const uint8_t *name, size_t width);
272
423
 
273
- // When you are lexing through a file, the lexer needs all of the information
274
- // that the parser additionally provides (for example, the local table). So if
275
- // you want to properly lex Ruby, you need to actually lex it in the context of
276
- // the parser. In order to provide this functionality, we optionally allow a
277
- // struct to be attached to the parser that calls back out to a user-provided
278
- // callback when each token is lexed.
424
+ /**
425
+ * When you are lexing through a file, the lexer needs all of the information
426
+ * that the parser additionally provides (for example, the local table). So if
427
+ * you want to properly lex Ruby, you need to actually lex it in the context of
428
+ * the parser. In order to provide this functionality, we optionally allow a
429
+ * struct to be attached to the parser that calls back out to a user-provided
430
+ * callback when each token is lexed.
431
+ */
279
432
  typedef struct {
280
- // This opaque pointer is used to provide whatever information the user
281
- // deemed necessary to the callback. In our case we use it to pass the array
282
- // that the tokens get appended into.
433
+ /**
434
+ * This opaque pointer is used to provide whatever information the user
435
+ * deemed necessary to the callback. In our case we use it to pass the array
436
+ * that the tokens get appended into.
437
+ */
283
438
  void *data;
284
439
 
285
- // This is the callback that is called when a token is lexed. It is passed
286
- // the opaque data pointer, the parser, and the token that was lexed.
440
+ /**
441
+ * This is the callback that is called when a token is lexed. It is passed
442
+ * the opaque data pointer, the parser, and the token that was lexed.
443
+ */
287
444
  void (*callback)(void *data, pm_parser_t *parser, pm_token_t *token);
288
445
  } pm_lex_callback_t;
289
446
 
290
- // This struct represents a node in a linked list of scopes. Some scopes can see
291
- // into their parent scopes, while others cannot.
447
+ /**
448
+ * This struct represents a node in a linked list of scopes. Some scopes can see
449
+ * into their parent scopes, while others cannot.
450
+ */
292
451
  typedef struct pm_scope {
293
- // The IDs of the locals in the given scope.
452
+ /** The IDs of the locals in the given scope. */
294
453
  pm_constant_id_list_t locals;
295
454
 
296
- // A pointer to the previous scope in the linked list.
455
+ /** A pointer to the previous scope in the linked list. */
297
456
  struct pm_scope *previous;
298
457
 
299
- // A boolean indicating whether or not this scope can see into its parent.
300
- // If closed is true, then the scope cannot see into its parent.
458
+ /**
459
+ * A boolean indicating whether or not this scope can see into its parent.
460
+ * If closed is true, then the scope cannot see into its parent.
461
+ */
301
462
  bool closed;
302
463
 
303
- // A boolean indicating whether or not this scope has explicit parameters.
304
- // This is necessary to determine whether or not numbered parameters are
305
- // allowed.
464
+ /**
465
+ * A boolean indicating whether or not this scope has explicit parameters.
466
+ * This is necessary to determine whether or not numbered parameters are
467
+ * allowed.
468
+ */
306
469
  bool explicit_params;
307
470
 
308
- // A boolean indicating whether or not this scope has numbered parameters.
309
- // This is necessary to determine if child blocks are allowed to use
310
- // numbered parameters.
471
+ /**
472
+ * A boolean indicating whether or not this scope has numbered parameters.
473
+ * This is necessary to determine if child blocks are allowed to use
474
+ * numbered parameters.
475
+ */
311
476
  bool numbered_params;
312
477
 
313
- // A transparent scope is a scope that cannot have locals set on itself.
314
- // When a local is set on this scope, it will instead be set on the parent
315
- // scope's local table.
478
+ /**
479
+ * A transparent scope is a scope that cannot have locals set on itself.
480
+ * When a local is set on this scope, it will instead be set on the parent
481
+ * scope's local table.
482
+ */
316
483
  bool transparent;
317
484
  } pm_scope_t;
318
485
 
319
- // This struct represents the overall parser. It contains a reference to the
320
- // source file, as well as pointers that indicate where in the source it's
321
- // currently parsing. It also contains the most recent and current token that
322
- // it's considering.
486
+ /**
487
+ * This struct represents the overall parser. It contains a reference to the
488
+ * source file, as well as pointers that indicate where in the source it's
489
+ * currently parsing. It also contains the most recent and current token that
490
+ * it's considering.
491
+ */
323
492
  struct pm_parser {
324
- pm_lex_state_t lex_state; // the current state of the lexer
325
- int enclosure_nesting; // tracks the current nesting of (), [], and {}
493
+ /** The current state of the lexer. */
494
+ pm_lex_state_t lex_state;
495
+
496
+ /** Tracks the current nesting of (), [], and {}. */
497
+ int enclosure_nesting;
326
498
 
327
- // Used to temporarily track the nesting of enclosures to determine if a {
328
- // is the beginning of a lambda following the parameters of a lambda.
499
+ /**
500
+ * Used to temporarily track the nesting of enclosures to determine if a {
501
+ * is the beginning of a lambda following the parameters of a lambda.
502
+ */
329
503
  int lambda_enclosure_nesting;
330
504
 
331
- // Used to track the nesting of braces to ensure we get the correct value
332
- // when we are interpolating blocks with braces.
505
+ /**
506
+ * Used to track the nesting of braces to ensure we get the correct value
507
+ * when we are interpolating blocks with braces.
508
+ */
333
509
  int brace_nesting;
334
510
 
335
- // the stack used to determine if a do keyword belongs to the predicate of a
336
- // while, until, or for loop
511
+ /**
512
+ * The stack used to determine if a do keyword belongs to the predicate of a
513
+ * while, until, or for loop.
514
+ */
337
515
  pm_state_stack_t do_loop_stack;
338
516
 
339
- // the stack used to determine if a do keyword belongs to the beginning of a
340
- // block
517
+ /**
518
+ * The stack used to determine if a do keyword belongs to the beginning of a
519
+ * block.
520
+ */
341
521
  pm_state_stack_t accepts_block_stack;
342
522
 
523
+ /** A stack of lex modes. */
343
524
  struct {
344
- pm_lex_mode_t *current; // the current mode of the lexer
345
- pm_lex_mode_t stack[PM_LEX_STACK_SIZE]; // the stack of lexer modes
346
- size_t index; // the current index into the lexer mode stack
525
+ /** The current mode of the lexer. */
526
+ pm_lex_mode_t *current;
527
+
528
+ /** The stack of lexer modes. */
529
+ pm_lex_mode_t stack[PM_LEX_STACK_SIZE];
530
+
531
+ /** The current index into the lexer mode stack. */
532
+ size_t index;
347
533
  } lex_modes;
348
534
 
349
- const uint8_t *start; // the pointer to the start of the source
350
- const uint8_t *end; // the pointer to the end of the source
351
- pm_token_t previous; // the previous token we were considering
352
- pm_token_t current; // the current token we're considering
535
+ /** The pointer to the start of the source. */
536
+ const uint8_t *start;
537
+
538
+ /** The pointer to the end of the source. */
539
+ const uint8_t *end;
540
+
541
+ /** The previous token we were considering. */
542
+ pm_token_t previous;
543
+
544
+ /** The current token we're considering. */
545
+ pm_token_t current;
353
546
 
354
- // This is a special field set on the parser when we need the parser to jump
355
- // to a specific location when lexing the next token, as opposed to just
356
- // using the end of the previous token. Normally this is NULL.
547
+ /**
548
+ * This is a special field set on the parser when we need the parser to jump
549
+ * to a specific location when lexing the next token, as opposed to just
550
+ * using the end of the previous token. Normally this is NULL.
551
+ */
357
552
  const uint8_t *next_start;
358
553
 
359
- // This field indicates the end of a heredoc whose identifier was found on
360
- // the current line. If another heredoc is found on the same line, then this
361
- // will be moved forward to the end of that heredoc. If no heredocs are
362
- // found on a line then this is NULL.
554
+ /**
555
+ * This field indicates the end of a heredoc whose identifier was found on
556
+ * the current line. If another heredoc is found on the same line, then this
557
+ * will be moved forward to the end of that heredoc. If no heredocs are
558
+ * found on a line then this is NULL.
559
+ */
363
560
  const uint8_t *heredoc_end;
364
561
 
365
- pm_list_t comment_list; // the list of comments that have been found while parsing
366
- pm_list_t magic_comment_list; // the list of magic comments that have been found while parsing.
367
- pm_list_t warning_list; // the list of warnings that have been found while parsing
368
- pm_list_t error_list; // the list of errors that have been found while parsing
369
- pm_scope_t *current_scope; // the current local scope
562
+ /** The list of comments that have been found while parsing. */
563
+ pm_list_t comment_list;
370
564
 
371
- pm_context_node_t *current_context; // the current parsing context
565
+ /** The list of magic comments that have been found while parsing. */
566
+ pm_list_t magic_comment_list;
372
567
 
373
- // The encoding functions for the current file is attached to the parser as
374
- // it's parsing so that it can change with a magic comment.
568
+ /** The list of warnings that have been found while parsing. */
569
+ pm_list_t warning_list;
570
+
571
+ /** The list of errors that have been found while parsing. */
572
+ pm_list_t error_list;
573
+
574
+ /** The current local scope. */
575
+ pm_scope_t *current_scope;
576
+
577
+ /** The current parsing context. */
578
+ pm_context_node_t *current_context;
579
+
580
+ /**
581
+ * The encoding functions for the current file is attached to the parser as
582
+ * it's parsing so that it can change with a magic comment.
583
+ */
375
584
  pm_encoding_t encoding;
376
585
 
377
- // When the encoding that is being used to parse the source is changed by
378
- // prism, we provide the ability here to call out to a user-defined
379
- // function.
586
+ /**
587
+ * When the encoding that is being used to parse the source is changed by
588
+ * prism, we provide the ability here to call out to a user-defined
589
+ * function.
590
+ */
380
591
  pm_encoding_changed_callback_t encoding_changed_callback;
381
592
 
382
- // When an encoding is encountered that isn't understood by prism, we
383
- // provide the ability here to call out to a user-defined function to get an
384
- // encoding struct. If the function returns something that isn't NULL, we
385
- // set that to our encoding and use it to parse identifiers.
593
+ /**
594
+ * When an encoding is encountered that isn't understood by prism, we
595
+ * provide the ability here to call out to a user-defined function to get an
596
+ * encoding struct. If the function returns something that isn't NULL, we
597
+ * set that to our encoding and use it to parse identifiers.
598
+ */
386
599
  pm_encoding_decode_callback_t encoding_decode_callback;
387
600
 
388
- // This pointer indicates where a comment must start if it is to be
389
- // considered an encoding comment.
601
+ /**
602
+ * This pointer indicates where a comment must start if it is to be
603
+ * considered an encoding comment.
604
+ */
390
605
  const uint8_t *encoding_comment_start;
391
606
 
392
- // This is an optional callback that can be attached to the parser that will
393
- // be called whenever a new token is lexed by the parser.
607
+ /**
608
+ * This is an optional callback that can be attached to the parser that will
609
+ * be called whenever a new token is lexed by the parser.
610
+ */
394
611
  pm_lex_callback_t *lex_callback;
395
612
 
396
- // This is the path of the file being parsed
397
- // We use the filepath when constructing SourceFileNodes
613
+ /**
614
+ * This is the path of the file being parsed. We use the filepath when
615
+ * constructing SourceFileNodes.
616
+ */
398
617
  pm_string_t filepath_string;
399
618
 
400
- // This constant pool keeps all of the constants defined throughout the file
401
- // so that we can reference them later.
619
+ /**
620
+ * This constant pool keeps all of the constants defined throughout the file
621
+ * so that we can reference them later.
622
+ */
402
623
  pm_constant_pool_t constant_pool;
403
624
 
404
- // This is the list of newline offsets in the source file.
625
+ /** This is the list of newline offsets in the source file. */
405
626
  pm_newline_list_t newline_list;
406
627
 
407
- // We want to add a flag to integer nodes that indicates their base. We only
408
- // want to parse these once, but we don't have space on the token itself to
409
- // communicate this information. So we store it here and pass it through
410
- // when we find tokens that we need it for.
628
+ /**
629
+ * We want to add a flag to integer nodes that indicates their base. We only
630
+ * want to parse these once, but we don't have space on the token itself to
631
+ * communicate this information. So we store it here and pass it through
632
+ * when we find tokens that we need it for.
633
+ */
411
634
  pm_node_flags_t integer_base;
412
635
 
413
- // This string is used to pass information from the lexer to the parser. It
414
- // is particularly necessary because of escape sequences.
636
+ /**
637
+ * This string is used to pass information from the lexer to the parser. It
638
+ * is particularly necessary because of escape sequences.
639
+ */
415
640
  pm_string_t current_string;
416
641
 
417
- // Whether or not we're at the beginning of a command
642
+ /**
643
+ * The line number at the start of the parse. This will be used to offset
644
+ * the line numbers of all of the locations.
645
+ */
646
+ uint32_t start_line;
647
+
648
+ /** Whether or not we're at the beginning of a command. */
418
649
  bool command_start;
419
650
 
420
- // Whether or not we're currently recovering from a syntax error
651
+ /** Whether or not we're currently recovering from a syntax error. */
421
652
  bool recovering;
422
653
 
423
- // Whether or not the encoding has been changed by a magic comment. We use
424
- // this to provide a fast path for the lexer instead of going through the
425
- // function pointer.
654
+ /**
655
+ * Whether or not the encoding has been changed by a magic comment. We use
656
+ * this to provide a fast path for the lexer instead of going through the
657
+ * function pointer.
658
+ */
426
659
  bool encoding_changed;
427
660
 
428
- // This flag indicates that we are currently parsing a pattern matching
429
- // expression and impacts that calculation of newlines.
661
+ /**
662
+ * This flag indicates that we are currently parsing a pattern matching
663
+ * expression and impacts that calculation of newlines.
664
+ */
430
665
  bool pattern_matching_newlines;
431
666
 
432
- // This flag indicates that we are currently parsing a keyword argument.
667
+ /** This flag indicates that we are currently parsing a keyword argument. */
433
668
  bool in_keyword_arg;
434
669
 
435
- // Whether or not the parser has seen a token that has semantic meaning
436
- // (i.e., a token that is not a comment or whitespace).
670
+ /**
671
+ * Whether or not the parser has seen a token that has semantic meaning
672
+ * (i.e., a token that is not a comment or whitespace).
673
+ */
437
674
  bool semantic_token_seen;
438
675
 
439
- // Whether or not we have found a frozen_string_literal magic comment with
440
- // a true value.
676
+ /**
677
+ * Whether or not we have found a frozen_string_literal magic comment with
678
+ * a true value.
679
+ */
441
680
  bool frozen_string_literal;
681
+
682
+ /**
683
+ * Whether or not we should emit warnings. This will be set to false if the
684
+ * consumer of the library specified it, usually because they are parsing
685
+ * when $VERBOSE is nil.
686
+ */
687
+ bool suppress_warnings;
442
688
  };
443
689
 
444
- #endif // PRISM_PARSER_H
690
+ #endif