yarp 0.12.0 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +29 -8
  3. data/CONTRIBUTING.md +2 -2
  4. data/Makefile +5 -5
  5. data/README.md +11 -12
  6. data/config.yml +6 -2
  7. data/docs/build_system.md +21 -21
  8. data/docs/building.md +4 -4
  9. data/docs/configuration.md +25 -21
  10. data/docs/design.md +2 -2
  11. data/docs/encoding.md +17 -17
  12. data/docs/fuzzing.md +4 -4
  13. data/docs/heredocs.md +3 -3
  14. data/docs/mapping.md +94 -94
  15. data/docs/ripper.md +4 -4
  16. data/docs/ruby_api.md +11 -11
  17. data/docs/serialization.md +17 -16
  18. data/docs/testing.md +6 -6
  19. data/ext/prism/api_node.c +4725 -0
  20. data/ext/{yarp → prism}/api_pack.c +82 -82
  21. data/ext/{yarp → prism}/extconf.rb +13 -13
  22. data/ext/{yarp → prism}/extension.c +175 -168
  23. data/ext/prism/extension.h +18 -0
  24. data/include/prism/ast.h +1932 -0
  25. data/include/prism/defines.h +45 -0
  26. data/include/prism/diagnostic.h +231 -0
  27. data/include/{yarp/enc/yp_encoding.h → prism/enc/pm_encoding.h} +40 -40
  28. data/include/prism/node.h +41 -0
  29. data/include/prism/pack.h +141 -0
  30. data/include/{yarp → prism}/parser.h +143 -142
  31. data/include/prism/regexp.h +19 -0
  32. data/include/prism/unescape.h +48 -0
  33. data/include/prism/util/pm_buffer.h +51 -0
  34. data/include/{yarp/util/yp_char.h → prism/util/pm_char.h} +20 -20
  35. data/include/{yarp/util/yp_constant_pool.h → prism/util/pm_constant_pool.h} +26 -22
  36. data/include/{yarp/util/yp_list.h → prism/util/pm_list.h} +21 -21
  37. data/include/prism/util/pm_memchr.h +14 -0
  38. data/include/{yarp/util/yp_newline_list.h → prism/util/pm_newline_list.h} +11 -11
  39. data/include/prism/util/pm_state_stack.h +24 -0
  40. data/include/{yarp/util/yp_string.h → prism/util/pm_string.h} +20 -20
  41. data/include/prism/util/pm_string_list.h +25 -0
  42. data/include/{yarp/util/yp_strpbrk.h → prism/util/pm_strpbrk.h} +7 -7
  43. data/include/prism/version.h +4 -0
  44. data/include/prism.h +82 -0
  45. data/lib/prism/compiler.rb +465 -0
  46. data/lib/prism/debug.rb +157 -0
  47. data/lib/{yarp/desugar_visitor.rb → prism/desugar_compiler.rb} +4 -2
  48. data/lib/prism/dispatcher.rb +2051 -0
  49. data/lib/prism/dsl.rb +750 -0
  50. data/lib/{yarp → prism}/ffi.rb +66 -67
  51. data/lib/{yarp → prism}/lex_compat.rb +40 -43
  52. data/lib/{yarp/mutation_visitor.rb → prism/mutation_compiler.rb} +3 -3
  53. data/lib/{yarp → prism}/node.rb +2012 -2593
  54. data/lib/prism/node_ext.rb +55 -0
  55. data/lib/prism/node_inspector.rb +68 -0
  56. data/lib/{yarp → prism}/pack.rb +1 -1
  57. data/lib/{yarp → prism}/parse_result/comments.rb +1 -1
  58. data/lib/{yarp → prism}/parse_result/newlines.rb +1 -1
  59. data/lib/prism/parse_result.rb +266 -0
  60. data/lib/{yarp → prism}/pattern.rb +14 -14
  61. data/lib/{yarp → prism}/ripper_compat.rb +5 -5
  62. data/lib/{yarp → prism}/serialize.rb +12 -7
  63. data/lib/prism/visitor.rb +470 -0
  64. data/lib/prism.rb +64 -0
  65. data/lib/yarp.rb +2 -614
  66. data/src/diagnostic.c +213 -208
  67. data/src/enc/pm_big5.c +52 -0
  68. data/src/enc/pm_euc_jp.c +58 -0
  69. data/src/enc/{yp_gbk.c → pm_gbk.c} +16 -16
  70. data/src/enc/pm_shift_jis.c +56 -0
  71. data/src/enc/{yp_tables.c → pm_tables.c} +69 -69
  72. data/src/enc/{yp_unicode.c → pm_unicode.c} +40 -40
  73. data/src/enc/pm_windows_31j.c +56 -0
  74. data/src/node.c +1293 -1233
  75. data/src/pack.c +247 -247
  76. data/src/prettyprint.c +1479 -1479
  77. data/src/{yarp.c → prism.c} +5205 -5083
  78. data/src/regexp.c +132 -132
  79. data/src/serialize.c +1121 -1121
  80. data/src/token_type.c +169 -167
  81. data/src/unescape.c +106 -87
  82. data/src/util/pm_buffer.c +103 -0
  83. data/src/util/{yp_char.c → pm_char.c} +72 -72
  84. data/src/util/{yp_constant_pool.c → pm_constant_pool.c} +85 -64
  85. data/src/util/{yp_list.c → pm_list.c} +10 -10
  86. data/src/util/{yp_memchr.c → pm_memchr.c} +6 -4
  87. data/src/util/{yp_newline_list.c → pm_newline_list.c} +21 -21
  88. data/src/util/{yp_state_stack.c → pm_state_stack.c} +4 -4
  89. data/src/util/{yp_string.c → pm_string.c} +38 -38
  90. data/src/util/pm_string_list.c +29 -0
  91. data/src/util/{yp_strncasecmp.c → pm_strncasecmp.c} +1 -1
  92. data/src/util/{yp_strpbrk.c → pm_strpbrk.c} +8 -8
  93. data/yarp.gemspec +68 -59
  94. metadata +70 -61
  95. data/ext/yarp/api_node.c +0 -4728
  96. data/ext/yarp/extension.h +0 -18
  97. data/include/yarp/ast.h +0 -1929
  98. data/include/yarp/defines.h +0 -45
  99. data/include/yarp/diagnostic.h +0 -226
  100. data/include/yarp/node.h +0 -42
  101. data/include/yarp/pack.h +0 -141
  102. data/include/yarp/regexp.h +0 -19
  103. data/include/yarp/unescape.h +0 -44
  104. data/include/yarp/util/yp_buffer.h +0 -51
  105. data/include/yarp/util/yp_memchr.h +0 -14
  106. data/include/yarp/util/yp_state_stack.h +0 -24
  107. data/include/yarp/util/yp_string_list.h +0 -25
  108. data/include/yarp/version.h +0 -4
  109. data/include/yarp.h +0 -82
  110. data/src/enc/yp_big5.c +0 -52
  111. data/src/enc/yp_euc_jp.c +0 -58
  112. data/src/enc/yp_shift_jis.c +0 -56
  113. data/src/enc/yp_windows_31j.c +0 -56
  114. data/src/util/yp_buffer.c +0 -101
  115. data/src/util/yp_string_list.c +0 -29
@@ -1,13 +1,13 @@
1
- #ifndef YARP_PARSER_H
2
- #define YARP_PARSER_H
1
+ #ifndef PRISM_PARSER_H
2
+ #define PRISM_PARSER_H
3
3
 
4
- #include "yarp/ast.h"
5
- #include "yarp/defines.h"
6
- #include "yarp/enc/yp_encoding.h"
7
- #include "yarp/util/yp_constant_pool.h"
8
- #include "yarp/util/yp_list.h"
9
- #include "yarp/util/yp_newline_list.h"
10
- #include "yarp/util/yp_state_stack.h"
4
+ #include "prism/ast.h"
5
+ #include "prism/defines.h"
6
+ #include "prism/enc/pm_encoding.h"
7
+ #include "prism/util/pm_constant_pool.h"
8
+ #include "prism/util/pm_list.h"
9
+ #include "prism/util/pm_newline_list.h"
10
+ #include "prism/util/pm_state_stack.h"
11
11
 
12
12
  #include <stdbool.h>
13
13
 
@@ -15,88 +15,88 @@
15
15
  // the lexer can track. This is used to determine which kind of token to return
16
16
  // based on the context of the parser.
17
17
  typedef enum {
18
- YP_LEX_STATE_BIT_BEG,
19
- YP_LEX_STATE_BIT_END,
20
- YP_LEX_STATE_BIT_ENDARG,
21
- YP_LEX_STATE_BIT_ENDFN,
22
- YP_LEX_STATE_BIT_ARG,
23
- YP_LEX_STATE_BIT_CMDARG,
24
- YP_LEX_STATE_BIT_MID,
25
- YP_LEX_STATE_BIT_FNAME,
26
- YP_LEX_STATE_BIT_DOT,
27
- YP_LEX_STATE_BIT_CLASS,
28
- YP_LEX_STATE_BIT_LABEL,
29
- YP_LEX_STATE_BIT_LABELED,
30
- YP_LEX_STATE_BIT_FITEM
31
- } yp_lex_state_bit_t;
18
+ PM_LEX_STATE_BIT_BEG,
19
+ PM_LEX_STATE_BIT_END,
20
+ PM_LEX_STATE_BIT_ENDARG,
21
+ PM_LEX_STATE_BIT_ENDFN,
22
+ PM_LEX_STATE_BIT_ARG,
23
+ PM_LEX_STATE_BIT_CMDARG,
24
+ PM_LEX_STATE_BIT_MID,
25
+ PM_LEX_STATE_BIT_FNAME,
26
+ PM_LEX_STATE_BIT_DOT,
27
+ PM_LEX_STATE_BIT_CLASS,
28
+ PM_LEX_STATE_BIT_LABEL,
29
+ PM_LEX_STATE_BIT_LABELED,
30
+ PM_LEX_STATE_BIT_FITEM
31
+ } pm_lex_state_bit_t;
32
32
 
33
33
  // This enum combines the various bits from the above enum into individual
34
34
  // values that represent the various states of the lexer.
35
35
  typedef enum {
36
- YP_LEX_STATE_NONE = 0,
37
- YP_LEX_STATE_BEG = (1 << YP_LEX_STATE_BIT_BEG),
38
- YP_LEX_STATE_END = (1 << YP_LEX_STATE_BIT_END),
39
- YP_LEX_STATE_ENDARG = (1 << YP_LEX_STATE_BIT_ENDARG),
40
- YP_LEX_STATE_ENDFN = (1 << YP_LEX_STATE_BIT_ENDFN),
41
- YP_LEX_STATE_ARG = (1 << YP_LEX_STATE_BIT_ARG),
42
- YP_LEX_STATE_CMDARG = (1 << YP_LEX_STATE_BIT_CMDARG),
43
- YP_LEX_STATE_MID = (1 << YP_LEX_STATE_BIT_MID),
44
- YP_LEX_STATE_FNAME = (1 << YP_LEX_STATE_BIT_FNAME),
45
- YP_LEX_STATE_DOT = (1 << YP_LEX_STATE_BIT_DOT),
46
- YP_LEX_STATE_CLASS = (1 << YP_LEX_STATE_BIT_CLASS),
47
- YP_LEX_STATE_LABEL = (1 << YP_LEX_STATE_BIT_LABEL),
48
- YP_LEX_STATE_LABELED = (1 << YP_LEX_STATE_BIT_LABELED),
49
- YP_LEX_STATE_FITEM = (1 << YP_LEX_STATE_BIT_FITEM),
50
- YP_LEX_STATE_BEG_ANY = YP_LEX_STATE_BEG | YP_LEX_STATE_MID | YP_LEX_STATE_CLASS,
51
- YP_LEX_STATE_ARG_ANY = YP_LEX_STATE_ARG | YP_LEX_STATE_CMDARG,
52
- YP_LEX_STATE_END_ANY = YP_LEX_STATE_END | YP_LEX_STATE_ENDARG | YP_LEX_STATE_ENDFN
53
- } yp_lex_state_t;
36
+ PM_LEX_STATE_NONE = 0,
37
+ PM_LEX_STATE_BEG = (1 << PM_LEX_STATE_BIT_BEG),
38
+ PM_LEX_STATE_END = (1 << PM_LEX_STATE_BIT_END),
39
+ PM_LEX_STATE_ENDARG = (1 << PM_LEX_STATE_BIT_ENDARG),
40
+ PM_LEX_STATE_ENDFN = (1 << PM_LEX_STATE_BIT_ENDFN),
41
+ PM_LEX_STATE_ARG = (1 << PM_LEX_STATE_BIT_ARG),
42
+ PM_LEX_STATE_CMDARG = (1 << PM_LEX_STATE_BIT_CMDARG),
43
+ PM_LEX_STATE_MID = (1 << PM_LEX_STATE_BIT_MID),
44
+ PM_LEX_STATE_FNAME = (1 << PM_LEX_STATE_BIT_FNAME),
45
+ PM_LEX_STATE_DOT = (1 << PM_LEX_STATE_BIT_DOT),
46
+ PM_LEX_STATE_CLASS = (1 << PM_LEX_STATE_BIT_CLASS),
47
+ PM_LEX_STATE_LABEL = (1 << PM_LEX_STATE_BIT_LABEL),
48
+ PM_LEX_STATE_LABELED = (1 << PM_LEX_STATE_BIT_LABELED),
49
+ PM_LEX_STATE_FITEM = (1 << PM_LEX_STATE_BIT_FITEM),
50
+ PM_LEX_STATE_BEG_ANY = PM_LEX_STATE_BEG | PM_LEX_STATE_MID | PM_LEX_STATE_CLASS,
51
+ PM_LEX_STATE_ARG_ANY = PM_LEX_STATE_ARG | PM_LEX_STATE_CMDARG,
52
+ PM_LEX_STATE_END_ANY = PM_LEX_STATE_END | PM_LEX_STATE_ENDARG | PM_LEX_STATE_ENDFN
53
+ } pm_lex_state_t;
54
54
 
55
55
  typedef enum {
56
- YP_HEREDOC_QUOTE_NONE,
57
- YP_HEREDOC_QUOTE_SINGLE = '\'',
58
- YP_HEREDOC_QUOTE_DOUBLE = '"',
59
- YP_HEREDOC_QUOTE_BACKTICK = '`',
60
- } yp_heredoc_quote_t;
56
+ PM_HEREDOC_QUOTE_NONE,
57
+ PM_HEREDOC_QUOTE_SINGLE = '\'',
58
+ PM_HEREDOC_QUOTE_DOUBLE = '"',
59
+ PM_HEREDOC_QUOTE_BACKTICK = '`',
60
+ } pm_heredoc_quote_t;
61
61
 
62
62
  typedef enum {
63
- YP_HEREDOC_INDENT_NONE,
64
- YP_HEREDOC_INDENT_DASH,
65
- YP_HEREDOC_INDENT_TILDE,
66
- } yp_heredoc_indent_t;
63
+ PM_HEREDOC_INDENT_NONE,
64
+ PM_HEREDOC_INDENT_DASH,
65
+ PM_HEREDOC_INDENT_TILDE,
66
+ } pm_heredoc_indent_t;
67
67
 
68
68
  // When lexing Ruby source, the lexer has a small amount of state to tell which
69
69
  // kind of token it is currently lexing. For example, when we find the start of
70
70
  // a string, the first token that we return is a TOKEN_STRING_BEGIN token. After
71
- // that the lexer is now in the YP_LEX_STRING mode, and will return tokens that
71
+ // that the lexer is now in the PM_LEX_STRING mode, and will return tokens that
72
72
  // are found as part of a string.
73
- typedef struct yp_lex_mode {
73
+ typedef struct pm_lex_mode {
74
74
  enum {
75
75
  // This state is used when any given token is being lexed.
76
- YP_LEX_DEFAULT,
76
+ PM_LEX_DEFAULT,
77
77
 
78
78
  // This state is used when we're lexing as normal but inside an embedded
79
79
  // expression of a string.
80
- YP_LEX_EMBEXPR,
80
+ PM_LEX_EMBEXPR,
81
81
 
82
82
  // This state is used when we're lexing a variable that is embedded
83
83
  // directly inside of a string with the # shorthand.
84
- YP_LEX_EMBVAR,
84
+ PM_LEX_EMBVAR,
85
85
 
86
86
  // This state is used when you are inside the content of a heredoc.
87
- YP_LEX_HEREDOC,
87
+ PM_LEX_HEREDOC,
88
88
 
89
89
  // This state is used when we are lexing a list of tokens, as in a %w
90
90
  // word list literal or a %i symbol list literal.
91
- YP_LEX_LIST,
91
+ PM_LEX_LIST,
92
92
 
93
93
  // This state is used when a regular expression has been begun and we
94
94
  // are looking for the terminator.
95
- YP_LEX_REGEXP,
95
+ PM_LEX_REGEXP,
96
96
 
97
97
  // This state is used when we are lexing a string or a string-like
98
98
  // token, as in string content with either quote or an xstring.
99
- YP_LEX_STRING
99
+ PM_LEX_STRING
100
100
  } mode;
101
101
 
102
102
  union {
@@ -166,8 +166,8 @@ typedef struct yp_lex_mode {
166
166
  const uint8_t *ident_start;
167
167
  size_t ident_length;
168
168
 
169
- yp_heredoc_quote_t quote;
170
- yp_heredoc_indent_t indent;
169
+ pm_heredoc_quote_t quote;
170
+ pm_heredoc_indent_t indent;
171
171
 
172
172
  // This is the pointer to the character where lexing should resume
173
173
  // once the heredoc has been completely processed.
@@ -176,83 +176,83 @@ typedef struct yp_lex_mode {
176
176
  } as;
177
177
 
178
178
  // The previous lex state so that it knows how to pop.
179
- struct yp_lex_mode *prev;
180
- } yp_lex_mode_t;
179
+ struct pm_lex_mode *prev;
180
+ } pm_lex_mode_t;
181
181
 
182
182
  // We pre-allocate a certain number of lex states in order to avoid having to
183
183
  // call malloc too many times while parsing. You really shouldn't need more than
184
184
  // this because you only really nest deeply when doing string interpolation.
185
- #define YP_LEX_STACK_SIZE 4
185
+ #define PM_LEX_STACK_SIZE 4
186
186
 
187
187
  // A forward declaration since our error handler struct accepts a parser for
188
188
  // each of its function calls.
189
- typedef struct yp_parser yp_parser_t;
189
+ typedef struct pm_parser pm_parser_t;
190
190
 
191
191
  // While parsing, we keep track of a stack of contexts. This is helpful for
192
192
  // error recovery so that we can pop back to a previous context when we hit a
193
193
  // token that is understood by a parent context but not by the current context.
194
194
  typedef enum {
195
- YP_CONTEXT_BEGIN, // a begin statement
196
- YP_CONTEXT_BLOCK_BRACES, // expressions in block arguments using braces
197
- YP_CONTEXT_BLOCK_KEYWORDS, // expressions in block arguments using do..end
198
- YP_CONTEXT_CASE_WHEN, // a case when statements
199
- YP_CONTEXT_CASE_IN, // a case in statements
200
- YP_CONTEXT_CLASS, // a class declaration
201
- YP_CONTEXT_DEF, // a method definition
202
- YP_CONTEXT_DEF_PARAMS, // a method definition's parameters
203
- YP_CONTEXT_DEFAULT_PARAMS, // a method definition's default parameter
204
- YP_CONTEXT_ELSE, // an else clause
205
- YP_CONTEXT_ELSIF, // an elsif clause
206
- YP_CONTEXT_EMBEXPR, // an interpolated expression
207
- YP_CONTEXT_ENSURE, // an ensure statement
208
- YP_CONTEXT_FOR, // a for loop
209
- YP_CONTEXT_IF, // an if statement
210
- YP_CONTEXT_LAMBDA_BRACES, // a lambda expression with braces
211
- YP_CONTEXT_LAMBDA_DO_END, // a lambda expression with do..end
212
- YP_CONTEXT_MAIN, // the top level context
213
- YP_CONTEXT_MODULE, // a module declaration
214
- YP_CONTEXT_PARENS, // a parenthesized expression
215
- YP_CONTEXT_POSTEXE, // an END block
216
- YP_CONTEXT_PREDICATE, // a predicate inside an if/elsif/unless statement
217
- YP_CONTEXT_PREEXE, // a BEGIN block
218
- YP_CONTEXT_RESCUE_ELSE, // a rescue else statement
219
- YP_CONTEXT_RESCUE, // a rescue statement
220
- YP_CONTEXT_SCLASS, // a singleton class definition
221
- YP_CONTEXT_UNLESS, // an unless statement
222
- YP_CONTEXT_UNTIL, // an until statement
223
- YP_CONTEXT_WHILE, // a while statement
224
- } yp_context_t;
195
+ PM_CONTEXT_BEGIN, // a begin statement
196
+ PM_CONTEXT_BLOCK_BRACES, // expressions in block arguments using braces
197
+ PM_CONTEXT_BLOCK_KEYWORDS, // expressions in block arguments using do..end
198
+ PM_CONTEXT_CASE_WHEN, // a case when statements
199
+ PM_CONTEXT_CASE_IN, // a case in statements
200
+ PM_CONTEXT_CLASS, // a class declaration
201
+ PM_CONTEXT_DEF, // a method definition
202
+ PM_CONTEXT_DEF_PARAMS, // a method definition's parameters
203
+ PM_CONTEXT_DEFAULT_PARAMS, // a method definition's default parameter
204
+ PM_CONTEXT_ELSE, // an else clause
205
+ PM_CONTEXT_ELSIF, // an elsif clause
206
+ PM_CONTEXT_EMBEXPR, // an interpolated expression
207
+ PM_CONTEXT_ENSURE, // an ensure statement
208
+ PM_CONTEXT_FOR, // a for loop
209
+ PM_CONTEXT_IF, // an if statement
210
+ PM_CONTEXT_LAMBDA_BRACES, // a lambda expression with braces
211
+ PM_CONTEXT_LAMBDA_DO_END, // a lambda expression with do..end
212
+ PM_CONTEXT_MAIN, // the top level context
213
+ PM_CONTEXT_MODULE, // a module declaration
214
+ PM_CONTEXT_PARENS, // a parenthesized expression
215
+ PM_CONTEXT_POSTEXE, // an END block
216
+ PM_CONTEXT_PREDICATE, // a predicate inside an if/elsif/unless statement
217
+ PM_CONTEXT_PREEXE, // a BEGIN block
218
+ PM_CONTEXT_RESCUE_ELSE, // a rescue else statement
219
+ PM_CONTEXT_RESCUE, // a rescue statement
220
+ PM_CONTEXT_SCLASS, // a singleton class definition
221
+ PM_CONTEXT_UNLESS, // an unless statement
222
+ PM_CONTEXT_UNTIL, // an until statement
223
+ PM_CONTEXT_WHILE, // a while statement
224
+ } pm_context_t;
225
225
 
226
226
  // This is a node in a linked list of contexts.
227
- typedef struct yp_context_node {
228
- yp_context_t context;
229
- struct yp_context_node *prev;
230
- } yp_context_node_t;
227
+ typedef struct pm_context_node {
228
+ pm_context_t context;
229
+ struct pm_context_node *prev;
230
+ } pm_context_node_t;
231
231
 
232
232
  // This is the type of a comment that we've found while parsing.
233
233
  typedef enum {
234
- YP_COMMENT_INLINE,
235
- YP_COMMENT_EMBDOC,
236
- YP_COMMENT___END__
237
- } yp_comment_type_t;
234
+ PM_COMMENT_INLINE,
235
+ PM_COMMENT_EMBDOC,
236
+ PM_COMMENT___END__
237
+ } pm_comment_type_t;
238
238
 
239
239
  // This is a node in the linked list of comments that we've found while parsing.
240
- typedef struct yp_comment {
241
- yp_list_node_t node;
240
+ typedef struct pm_comment {
241
+ pm_list_node_t node;
242
242
  const uint8_t *start;
243
243
  const uint8_t *end;
244
- yp_comment_type_t type;
245
- } yp_comment_t;
244
+ pm_comment_type_t type;
245
+ } pm_comment_t;
246
246
 
247
- // When the encoding that is being used to parse the source is changed by YARP,
247
+ // When the encoding that is being used to parse the source is changed by prism,
248
248
  // we provide the ability here to call out to a user-defined function.
249
- typedef void (*yp_encoding_changed_callback_t)(yp_parser_t *parser);
249
+ typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
250
250
 
251
- // When an encoding is encountered that isn't understood by YARP, we provide
251
+ // When an encoding is encountered that isn't understood by prism, we provide
252
252
  // the ability here to call out to a user-defined function to get an encoding
253
253
  // struct. If the function returns something that isn't NULL, we set that to
254
254
  // our encoding and use it to parse identifiers.
255
- typedef yp_encoding_t *(*yp_encoding_decode_callback_t)(yp_parser_t *parser, const uint8_t *name, size_t width);
255
+ typedef pm_encoding_t *(*pm_encoding_decode_callback_t)(pm_parser_t *parser, const uint8_t *name, size_t width);
256
256
 
257
257
  // When you are lexing through a file, the lexer needs all of the information
258
258
  // that the parser additionally provides (for example, the local table). So if
@@ -268,17 +268,17 @@ typedef struct {
268
268
 
269
269
  // This is the callback that is called when a token is lexed. It is passed
270
270
  // the opaque data pointer, the parser, and the token that was lexed.
271
- void (*callback)(void *data, yp_parser_t *parser, yp_token_t *token);
272
- } yp_lex_callback_t;
271
+ void (*callback)(void *data, pm_parser_t *parser, pm_token_t *token);
272
+ } pm_lex_callback_t;
273
273
 
274
274
  // This struct represents a node in a linked list of scopes. Some scopes can see
275
275
  // into their parent scopes, while others cannot.
276
- typedef struct yp_scope {
276
+ typedef struct pm_scope {
277
277
  // The IDs of the locals in the given scope.
278
- yp_constant_id_list_t locals;
278
+ pm_constant_id_list_t locals;
279
279
 
280
280
  // A pointer to the previous scope in the linked list.
281
- struct yp_scope *previous;
281
+ struct pm_scope *previous;
282
282
 
283
283
  // A boolean indicating whether or not this scope can see into its parent.
284
284
  // If closed is true, then the scope cannot see into its parent.
@@ -293,14 +293,14 @@ typedef struct yp_scope {
293
293
  // This is necessary to determine if child blocks are allowed to use
294
294
  // numbered parameters.
295
295
  bool numbered_params;
296
- } yp_scope_t;
296
+ } pm_scope_t;
297
297
 
298
298
  // This struct represents the overall parser. It contains a reference to the
299
299
  // source file, as well as pointers that indicate where in the source it's
300
300
  // currently parsing. It also contains the most recent and current token that
301
301
  // it's considering.
302
- struct yp_parser {
303
- yp_lex_state_t lex_state; // the current state of the lexer
302
+ struct pm_parser {
303
+ pm_lex_state_t lex_state; // the current state of the lexer
304
304
  int enclosure_nesting; // tracks the current nesting of (), [], and {}
305
305
 
306
306
  // Used to temporarily track the nesting of enclosures to determine if a {
@@ -313,22 +313,22 @@ struct yp_parser {
313
313
 
314
314
  // the stack used to determine if a do keyword belongs to the predicate of a
315
315
  // while, until, or for loop
316
- yp_state_stack_t do_loop_stack;
316
+ pm_state_stack_t do_loop_stack;
317
317
 
318
318
  // the stack used to determine if a do keyword belongs to the beginning of a
319
319
  // block
320
- yp_state_stack_t accepts_block_stack;
320
+ pm_state_stack_t accepts_block_stack;
321
321
 
322
322
  struct {
323
- yp_lex_mode_t *current; // the current mode of the lexer
324
- yp_lex_mode_t stack[YP_LEX_STACK_SIZE]; // the stack of lexer modes
323
+ pm_lex_mode_t *current; // the current mode of the lexer
324
+ pm_lex_mode_t stack[PM_LEX_STACK_SIZE]; // the stack of lexer modes
325
325
  size_t index; // the current index into the lexer mode stack
326
326
  } lex_modes;
327
327
 
328
328
  const uint8_t *start; // the pointer to the start of the source
329
329
  const uint8_t *end; // the pointer to the end of the source
330
- yp_token_t previous; // the previous token we were considering
331
- yp_token_t current; // the current token we're considering
330
+ pm_token_t previous; // the previous token we were considering
331
+ pm_token_t current; // the current token we're considering
332
332
 
333
333
  // This is a special field set on the parser when we need the parser to jump
334
334
  // to a specific location when lexing the next token, as opposed to just
@@ -341,26 +341,27 @@ struct yp_parser {
341
341
  // found on a line then this is NULL.
342
342
  const uint8_t *heredoc_end;
343
343
 
344
- yp_list_t comment_list; // the list of comments that have been found while parsing
345
- yp_list_t warning_list; // the list of warnings that have been found while parsing
346
- yp_list_t error_list; // the list of errors that have been found while parsing
347
- yp_scope_t *current_scope; // the current local scope
344
+ pm_list_t comment_list; // the list of comments that have been found while parsing
345
+ pm_list_t warning_list; // the list of warnings that have been found while parsing
346
+ pm_list_t error_list; // the list of errors that have been found while parsing
347
+ pm_scope_t *current_scope; // the current local scope
348
348
 
349
- yp_context_node_t *current_context; // the current parsing context
349
+ pm_context_node_t *current_context; // the current parsing context
350
350
 
351
351
  // The encoding functions for the current file is attached to the parser as
352
352
  // it's parsing so that it can change with a magic comment.
353
- yp_encoding_t encoding;
353
+ pm_encoding_t encoding;
354
354
 
355
355
  // When the encoding that is being used to parse the source is changed by
356
- // YARP, we provide the ability here to call out to a user-defined function.
357
- yp_encoding_changed_callback_t encoding_changed_callback;
356
+ // prism, we provide the ability here to call out to a user-defined
357
+ // function.
358
+ pm_encoding_changed_callback_t encoding_changed_callback;
358
359
 
359
- // When an encoding is encountered that isn't understood by YARP, we provide
360
- // the ability here to call out to a user-defined function to get an
360
+ // When an encoding is encountered that isn't understood by prism, we
361
+ // provide the ability here to call out to a user-defined function to get an
361
362
  // encoding struct. If the function returns something that isn't NULL, we
362
363
  // set that to our encoding and use it to parse identifiers.
363
- yp_encoding_decode_callback_t encoding_decode_callback;
364
+ pm_encoding_decode_callback_t encoding_decode_callback;
364
365
 
365
366
  // This pointer indicates where a comment must start if it is to be
366
367
  // considered an encoding comment.
@@ -368,24 +369,24 @@ struct yp_parser {
368
369
 
369
370
  // This is an optional callback that can be attached to the parser that will
370
371
  // be called whenever a new token is lexed by the parser.
371
- yp_lex_callback_t *lex_callback;
372
+ pm_lex_callback_t *lex_callback;
372
373
 
373
374
  // This is the path of the file being parsed
374
375
  // We use the filepath when constructing SourceFileNodes
375
- yp_string_t filepath_string;
376
+ pm_string_t filepath_string;
376
377
 
377
378
  // This constant pool keeps all of the constants defined throughout the file
378
379
  // so that we can reference them later.
379
- yp_constant_pool_t constant_pool;
380
+ pm_constant_pool_t constant_pool;
380
381
 
381
382
  // This is the list of newline offsets in the source file.
382
- yp_newline_list_t newline_list;
383
+ pm_newline_list_t newline_list;
383
384
 
384
385
  // We want to add a flag to integer nodes that indicates their base. We only
385
386
  // want to parse these once, but we don't have space on the token itself to
386
387
  // communicate this information. So we store it here and pass it through
387
388
  // when we find tokens that we need it for.
388
- yp_node_flags_t integer_base;
389
+ pm_node_flags_t integer_base;
389
390
 
390
391
  // Whether or not we're at the beginning of a command
391
392
  bool command_start;
@@ -414,4 +415,4 @@ struct yp_parser {
414
415
  bool frozen_string_literal;
415
416
  };
416
417
 
417
- #endif // YARP_PARSER_H
418
+ #endif // PRISM_PARSER_H
@@ -0,0 +1,19 @@
1
+ #ifndef PRISM_REGEXP_H
2
+ #define PRISM_REGEXP_H
3
+
4
+ #include "prism/defines.h"
5
+ #include "prism/parser.h"
6
+ #include "prism/enc/pm_encoding.h"
7
+ #include "prism/util/pm_memchr.h"
8
+ #include "prism/util/pm_string_list.h"
9
+ #include "prism/util/pm_string.h"
10
+
11
+ #include <stdbool.h>
12
+ #include <stddef.h>
13
+ #include <string.h>
14
+
15
+ // Parse a regular expression and extract the names of all of the named capture
16
+ // groups.
17
+ PRISM_EXPORTED_FUNCTION bool pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding);
18
+
19
+ #endif
@@ -0,0 +1,48 @@
1
+ #ifndef PRISM_UNESCAPE_H
2
+ #define PRISM_UNESCAPE_H
3
+
4
+ #include "prism/defines.h"
5
+ #include "prism/diagnostic.h"
6
+ #include "prism/parser.h"
7
+ #include "prism/util/pm_char.h"
8
+ #include "prism/util/pm_list.h"
9
+ #include "prism/util/pm_memchr.h"
10
+ #include "prism/util/pm_string.h"
11
+
12
+ #include <assert.h>
13
+ #include <stdbool.h>
14
+ #include <stdint.h>
15
+ #include <string.h>
16
+
17
+ // The type of unescape we are performing.
18
+ typedef enum {
19
+ // When we're creating a string inside of a list literal like %w, we
20
+ // shouldn't escape anything.
21
+ PM_UNESCAPE_NONE,
22
+
23
+ // When we're unescaping a single-quoted string, we only need to unescape
24
+ // single quotes and backslashes.
25
+ PM_UNESCAPE_MINIMAL,
26
+
27
+ // When we're unescaping a string list, in addition to MINIMAL, we need to
28
+ // unescape whitespace.
29
+ PM_UNESCAPE_WHITESPACE,
30
+
31
+ // When we're unescaping a double-quoted string, we need to unescape all
32
+ // escapes.
33
+ PM_UNESCAPE_ALL,
34
+ } pm_unescape_type_t;
35
+
36
+ // Unescape the contents of the given token into the given string using the given unescape mode.
37
+ PRISM_EXPORTED_FUNCTION void pm_unescape_manipulate_string(pm_parser_t *parser, pm_string_t *string, pm_unescape_type_t unescape_type);
38
+ void pm_unescape_manipulate_char_literal(pm_parser_t *parser, pm_string_t *string, pm_unescape_type_t unescape_type);
39
+
40
+ // Accepts a source string and a type of unescaping and returns the unescaped version.
41
+ // The caller must pm_string_free(result); after calling this function.
42
+ PRISM_EXPORTED_FUNCTION bool pm_unescape_string(const uint8_t *start, size_t length, pm_unescape_type_t unescape_type, pm_string_t *result);
43
+
44
+ // Returns the number of bytes that encompass the first escape sequence in the
45
+ // given string.
46
+ size_t pm_unescape_calculate_difference(pm_parser_t *parser, const uint8_t *value, pm_unescape_type_t unescape_type, bool expect_single_codepoint);
47
+
48
+ #endif
@@ -0,0 +1,51 @@
1
+ #ifndef PRISM_BUFFER_H
2
+ #define PRISM_BUFFER_H
3
+
4
+ #include "prism/defines.h"
5
+
6
+ #include <assert.h>
7
+ #include <stdbool.h>
8
+ #include <stdint.h>
9
+ #include <stdlib.h>
10
+ #include <string.h>
11
+
12
+ // A pm_buffer_t is a simple memory buffer that stores data in a contiguous
13
+ // block of memory. It is used to store the serialized representation of a
14
+ // prism tree.
15
+ typedef struct {
16
+ char *value;
17
+ size_t length;
18
+ size_t capacity;
19
+ } pm_buffer_t;
20
+
21
+ // Return the size of the pm_buffer_t struct.
22
+ PRISM_EXPORTED_FUNCTION size_t pm_buffer_sizeof(void);
23
+
24
+ // Initialize a pm_buffer_t with its default values.
25
+ PRISM_EXPORTED_FUNCTION bool pm_buffer_init(pm_buffer_t *buffer);
26
+
27
+ // Return the value of the buffer.
28
+ PRISM_EXPORTED_FUNCTION char * pm_buffer_value(pm_buffer_t *buffer);
29
+
30
+ // Return the length of the buffer.
31
+ PRISM_EXPORTED_FUNCTION size_t pm_buffer_length(pm_buffer_t *buffer);
32
+
33
+ // Append the given amount of space as zeroes to the buffer.
34
+ void pm_buffer_append_zeroes(pm_buffer_t *buffer, size_t length);
35
+
36
+ // Append a string to the buffer.
37
+ void pm_buffer_append_str(pm_buffer_t *buffer, const char *value, size_t length);
38
+
39
+ // Append a list of bytes to the buffer.
40
+ void pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length);
41
+
42
+ // Append a single byte to the buffer.
43
+ void pm_buffer_append_u8(pm_buffer_t *buffer, uint8_t value);
44
+
45
+ // Append a 32-bit unsigned integer to the buffer.
46
+ void pm_buffer_append_u32(pm_buffer_t *buffer, uint32_t value);
47
+
48
+ // Free the memory associated with the buffer.
49
+ PRISM_EXPORTED_FUNCTION void pm_buffer_free(pm_buffer_t *buffer);
50
+
51
+ #endif