jruby-prism-parser 0.23.0.pre.SNAPSHOT-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (110) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +401 -0
  3. data/CODE_OF_CONDUCT.md +76 -0
  4. data/CONTRIBUTING.md +62 -0
  5. data/LICENSE.md +7 -0
  6. data/Makefile +101 -0
  7. data/README.md +98 -0
  8. data/config.yml +2902 -0
  9. data/docs/build_system.md +91 -0
  10. data/docs/configuration.md +64 -0
  11. data/docs/cruby_compilation.md +27 -0
  12. data/docs/design.md +53 -0
  13. data/docs/encoding.md +121 -0
  14. data/docs/fuzzing.md +88 -0
  15. data/docs/heredocs.md +36 -0
  16. data/docs/javascript.md +118 -0
  17. data/docs/local_variable_depth.md +229 -0
  18. data/docs/mapping.md +117 -0
  19. data/docs/parser_translation.md +34 -0
  20. data/docs/parsing_rules.md +19 -0
  21. data/docs/releasing.md +98 -0
  22. data/docs/ripper.md +36 -0
  23. data/docs/ruby_api.md +43 -0
  24. data/docs/ruby_parser_translation.md +19 -0
  25. data/docs/serialization.md +209 -0
  26. data/docs/testing.md +55 -0
  27. data/ext/prism/api_node.c +5098 -0
  28. data/ext/prism/api_pack.c +267 -0
  29. data/ext/prism/extconf.rb +110 -0
  30. data/ext/prism/extension.c +1155 -0
  31. data/ext/prism/extension.h +18 -0
  32. data/include/prism/ast.h +5807 -0
  33. data/include/prism/defines.h +102 -0
  34. data/include/prism/diagnostic.h +339 -0
  35. data/include/prism/encoding.h +265 -0
  36. data/include/prism/node.h +57 -0
  37. data/include/prism/options.h +230 -0
  38. data/include/prism/pack.h +152 -0
  39. data/include/prism/parser.h +732 -0
  40. data/include/prism/prettyprint.h +26 -0
  41. data/include/prism/regexp.h +33 -0
  42. data/include/prism/util/pm_buffer.h +155 -0
  43. data/include/prism/util/pm_char.h +205 -0
  44. data/include/prism/util/pm_constant_pool.h +209 -0
  45. data/include/prism/util/pm_list.h +97 -0
  46. data/include/prism/util/pm_memchr.h +29 -0
  47. data/include/prism/util/pm_newline_list.h +93 -0
  48. data/include/prism/util/pm_state_stack.h +42 -0
  49. data/include/prism/util/pm_string.h +150 -0
  50. data/include/prism/util/pm_string_list.h +44 -0
  51. data/include/prism/util/pm_strncasecmp.h +32 -0
  52. data/include/prism/util/pm_strpbrk.h +46 -0
  53. data/include/prism/version.h +29 -0
  54. data/include/prism.h +289 -0
  55. data/jruby-prism.jar +0 -0
  56. data/lib/prism/compiler.rb +486 -0
  57. data/lib/prism/debug.rb +206 -0
  58. data/lib/prism/desugar_compiler.rb +207 -0
  59. data/lib/prism/dispatcher.rb +2150 -0
  60. data/lib/prism/dot_visitor.rb +4634 -0
  61. data/lib/prism/dsl.rb +785 -0
  62. data/lib/prism/ffi.rb +346 -0
  63. data/lib/prism/lex_compat.rb +908 -0
  64. data/lib/prism/mutation_compiler.rb +753 -0
  65. data/lib/prism/node.rb +17864 -0
  66. data/lib/prism/node_ext.rb +212 -0
  67. data/lib/prism/node_inspector.rb +68 -0
  68. data/lib/prism/pack.rb +224 -0
  69. data/lib/prism/parse_result/comments.rb +177 -0
  70. data/lib/prism/parse_result/newlines.rb +64 -0
  71. data/lib/prism/parse_result.rb +498 -0
  72. data/lib/prism/pattern.rb +250 -0
  73. data/lib/prism/serialize.rb +1354 -0
  74. data/lib/prism/translation/parser/compiler.rb +1838 -0
  75. data/lib/prism/translation/parser/lexer.rb +335 -0
  76. data/lib/prism/translation/parser/rubocop.rb +37 -0
  77. data/lib/prism/translation/parser.rb +178 -0
  78. data/lib/prism/translation/ripper.rb +577 -0
  79. data/lib/prism/translation/ruby_parser.rb +1521 -0
  80. data/lib/prism/translation.rb +11 -0
  81. data/lib/prism/version.rb +3 -0
  82. data/lib/prism/visitor.rb +495 -0
  83. data/lib/prism.rb +99 -0
  84. data/prism.gemspec +135 -0
  85. data/rbi/prism.rbi +7767 -0
  86. data/rbi/prism_static.rbi +207 -0
  87. data/sig/prism.rbs +4773 -0
  88. data/sig/prism_static.rbs +201 -0
  89. data/src/diagnostic.c +400 -0
  90. data/src/encoding.c +5132 -0
  91. data/src/node.c +2786 -0
  92. data/src/options.c +213 -0
  93. data/src/pack.c +493 -0
  94. data/src/prettyprint.c +8881 -0
  95. data/src/prism.c +18406 -0
  96. data/src/regexp.c +638 -0
  97. data/src/serialize.c +1554 -0
  98. data/src/token_type.c +700 -0
  99. data/src/util/pm_buffer.c +190 -0
  100. data/src/util/pm_char.c +318 -0
  101. data/src/util/pm_constant_pool.c +322 -0
  102. data/src/util/pm_list.c +49 -0
  103. data/src/util/pm_memchr.c +35 -0
  104. data/src/util/pm_newline_list.c +84 -0
  105. data/src/util/pm_state_stack.c +25 -0
  106. data/src/util/pm_string.c +203 -0
  107. data/src/util/pm_string_list.c +28 -0
  108. data/src/util/pm_strncasecmp.c +24 -0
  109. data/src/util/pm_strpbrk.c +180 -0
  110. metadata +156 -0
@@ -0,0 +1,57 @@
1
+ /**
2
+ * @file node.h
3
+ *
4
+ * Functions related to nodes in the AST.
5
+ */
6
+ #ifndef PRISM_NODE_H
7
+ #define PRISM_NODE_H
8
+
9
+ #include "prism/defines.h"
10
+ #include "prism/parser.h"
11
+
12
+ /**
13
+ * Append a new node onto the end of the node list.
14
+ *
15
+ * @param list The list to append to.
16
+ * @param node The node to append.
17
+ */
18
+ void pm_node_list_append(pm_node_list_t *list, pm_node_t *node);
19
+
20
+ /**
21
+ * Deallocate a node and all of its children.
22
+ *
23
+ * @param parser The parser that owns the node.
24
+ * @param node The node to deallocate.
25
+ */
26
+ PRISM_EXPORTED_FUNCTION void pm_node_destroy(pm_parser_t *parser, struct pm_node *node);
27
+
28
+ /**
29
+ * This struct stores the information gathered by the pm_node_memsize function.
30
+ * It contains both the memory footprint and additionally metadata about the
31
+ * shape of the tree.
32
+ */
33
+ typedef struct {
34
+ /** The total memory footprint of the node and all of its children. */
35
+ size_t memsize;
36
+
37
+ /** The number of children the node has. */
38
+ size_t node_count;
39
+ } pm_memsize_t;
40
+
41
+ /**
42
+ * Calculates the memory footprint of a given node.
43
+ *
44
+ * @param node The node to calculate the memory footprint of.
45
+ * @param memsize The memory footprint of the node and all of its children.
46
+ */
47
+ PRISM_EXPORTED_FUNCTION void pm_node_memsize(pm_node_t *node, pm_memsize_t *memsize);
48
+
49
+ /**
50
+ * Returns a string representation of the given node type.
51
+ *
52
+ * @param node_type The node type to convert to a string.
53
+ * @return A string representation of the given node type.
54
+ */
55
+ PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_type);
56
+
57
+ #endif
@@ -0,0 +1,230 @@
1
+ /**
2
+ * @file options.h
3
+ *
4
+ * The options that can be passed to parsing.
5
+ */
6
+ #ifndef PRISM_OPTIONS_H
7
+ #define PRISM_OPTIONS_H
8
+
9
+ #include "prism/defines.h"
10
+ #include "prism/util/pm_string.h"
11
+
12
+ #include <stdbool.h>
13
+ #include <stddef.h>
14
+ #include <stdint.h>
15
+
16
+ /**
17
+ * A scope of locals surrounding the code that is being parsed.
18
+ */
19
+ typedef struct pm_options_scope {
20
+ /** The number of locals in the scope. */
21
+ size_t locals_count;
22
+
23
+ /** The names of the locals in the scope. */
24
+ pm_string_t *locals;
25
+ } pm_options_scope_t;
26
+
27
+ /**
28
+ * The version of Ruby syntax that we should be parsing with. This is used to
29
+ * allow consumers to specify which behavior they want in case they need to
30
+ * parse in the same way as a specific version of CRuby would have.
31
+ */
32
+ typedef enum {
33
+ /** The current version of prism. */
34
+ PM_OPTIONS_VERSION_LATEST = 0,
35
+
36
+ /** The vendored version of prism in CRuby 3.3.0. */
37
+ PM_OPTIONS_VERSION_CRUBY_3_3_0 = 1
38
+ } pm_options_version_t;
39
+
40
+ /**
41
+ * The options that can be passed to the parser.
42
+ */
43
+ typedef struct {
44
+ /** The name of the file that is currently being parsed. */
45
+ pm_string_t filepath;
46
+
47
+ /**
48
+ * The line within the file that the parse starts on. This value is
49
+ * 1-indexed.
50
+ */
51
+ int32_t line;
52
+
53
+ /**
54
+ * The name of the encoding that the source file is in. Note that this must
55
+ * correspond to a name that can be found with Encoding.find in Ruby.
56
+ */
57
+ pm_string_t encoding;
58
+
59
+ /**
60
+ * The number of scopes surrounding the code that is being parsed.
61
+ */
62
+ size_t scopes_count;
63
+
64
+ /**
65
+ * The scopes surrounding the code that is being parsed. For most parses
66
+ * this will be NULL, but for evals it will be the locals that are in scope
67
+ * surrounding the eval. Scopes are ordered from the outermost scope to the
68
+ * innermost one.
69
+ */
70
+ pm_options_scope_t *scopes;
71
+
72
+ /**
73
+ * The version of prism that we should be parsing with. This is used to
74
+ * allow consumers to specify which behavior they want in case they need to
75
+ * parse exactly as a specific version of CRuby.
76
+ */
77
+ pm_options_version_t version;
78
+
79
+ /** Whether or not the frozen string literal option has been set. */
80
+ bool frozen_string_literal;
81
+ } pm_options_t;
82
+
83
+ /**
84
+ * Set the filepath option on the given options struct.
85
+ *
86
+ * @param options The options struct to set the filepath on.
87
+ * @param filepath The filepath to set.
88
+ */
89
+ PRISM_EXPORTED_FUNCTION void pm_options_filepath_set(pm_options_t *options, const char *filepath);
90
+
91
+ /**
92
+ * Set the line option on the given options struct.
93
+ *
94
+ * @param options The options struct to set the line on.
95
+ * @param line The line to set.
96
+ */
97
+ PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t line);
98
+
99
+ /**
100
+ * Set the encoding option on the given options struct.
101
+ *
102
+ * @param options The options struct to set the encoding on.
103
+ * @param encoding The encoding to set.
104
+ */
105
+ PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, const char *encoding);
106
+
107
+ /**
108
+ * Set the frozen string literal option on the given options struct.
109
+ *
110
+ * @param options The options struct to set the frozen string literal value on.
111
+ * @param frozen_string_literal The frozen string literal value to set.
112
+ */
113
+ PRISM_EXPORTED_FUNCTION void pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_literal);
114
+
115
+ /**
116
+ * Set the version option on the given options struct by parsing the given
117
+ * string. If the string contains an invalid option, this returns false.
118
+ * Otherwise, it returns true.
119
+ *
120
+ * @param options The options struct to set the version on.
121
+ * @param version The version to set.
122
+ * @param length The length of the version string.
123
+ * @return Whether or not the version was parsed successfully.
124
+ */
125
+ PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const char *version, size_t length);
126
+
127
+ /**
128
+ * Allocate and zero out the scopes array on the given options struct.
129
+ *
130
+ * @param options The options struct to initialize the scopes array on.
131
+ * @param scopes_count The number of scopes to allocate.
132
+ */
133
+ PRISM_EXPORTED_FUNCTION void pm_options_scopes_init(pm_options_t *options, size_t scopes_count);
134
+
135
+ /**
136
+ * Return a pointer to the scope at the given index within the given options.
137
+ *
138
+ * @param options The options struct to get the scope from.
139
+ * @param index The index of the scope to get.
140
+ * @return A pointer to the scope at the given index.
141
+ */
142
+ PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm_options_t *options, size_t index);
143
+
144
+ /**
145
+ * Create a new options scope struct. This will hold a set of locals that are in
146
+ * scope surrounding the code that is being parsed.
147
+ *
148
+ * @param scope The scope struct to initialize.
149
+ * @param locals_count The number of locals to allocate.
150
+ */
151
+ PRISM_EXPORTED_FUNCTION void pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count);
152
+
153
+ /**
154
+ * Return a pointer to the local at the given index within the given scope.
155
+ *
156
+ * @param scope The scope struct to get the local from.
157
+ * @param index The index of the local to get.
158
+ * @return A pointer to the local at the given index.
159
+ */
160
+ PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index);
161
+
162
+ /**
163
+ * Free the internal memory associated with the options.
164
+ *
165
+ * @param options The options struct whose internal memory should be freed.
166
+ */
167
+ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
168
+
169
+ /**
170
+ * Deserialize an options struct from the given binary string. This is used to
171
+ * pass options to the parser from an FFI call so that consumers of the library
172
+ * from an FFI perspective don't have to worry about the structure of our
173
+ * options structs. Since the source of these calls will be from Ruby
174
+ * implementation internals we assume it is from a trusted source.
175
+ *
176
+ * `data` is assumed to be a valid pointer pointing to well-formed data. The
177
+ * layout of this data should be the same every time, and is described below:
178
+ *
179
+ * | # bytes | field |
180
+ * | ------- | -------------------------- |
181
+ * | `4` | the length of the filepath |
182
+ * | ... | the filepath bytes |
183
+ * | `4` | the line number |
184
+ * | `4` | the length the encoding |
185
+ * | ... | the encoding bytes |
186
+ * | `1` | frozen string literal |
187
+ * | `1` | suppress warnings |
188
+ * | `1` | the version |
189
+ * | `4` | the number of scopes |
190
+ * | ... | the scopes |
191
+ *
192
+ * The version field is an enum, so it should be one of the following values:
193
+ *
194
+ * | value | version |
195
+ * | ----- | ------------------------- |
196
+ * | `0` | use the latest version of prism |
197
+ * | `1` | use the version of prism that is vendored in CRuby 3.3.0 |
198
+ *
199
+ * Each scope is layed out as follows:
200
+ *
201
+ * | # bytes | field |
202
+ * | ------- | -------------------------- |
203
+ * | `4` | the number of locals |
204
+ * | ... | the locals |
205
+ *
206
+ * Each local is layed out as follows:
207
+ *
208
+ * | # bytes | field |
209
+ * | ------- | -------------------------- |
210
+ * | `4` | the length of the local |
211
+ * | ... | the local bytes |
212
+ *
213
+ * Some additional things to note about this layout:
214
+ *
215
+ * * The filepath can have a length of 0, in which case we'll consider it an
216
+ * empty string.
217
+ * * The line number should be 0-indexed.
218
+ * * The encoding can have a length of 0, in which case we'll use the default
219
+ * encoding (UTF-8). If it's not 0, it should correspond to a name of an
220
+ * encoding that can be passed to `Encoding.find` in Ruby.
221
+ * * The frozen string literal and suppress warnings fields are booleans, so
222
+ * their values should be either 0 or 1.
223
+ * * The number of scopes can be 0.
224
+ *
225
+ * @param options The options struct to deserialize into.
226
+ * @param data The binary string to deserialize from.
227
+ */
228
+ void pm_options_read(pm_options_t *options, const char *data);
229
+
230
+ #endif
@@ -0,0 +1,152 @@
1
+ /**
2
+ * @file pack.h
3
+ *
4
+ * A pack template string parser.
5
+ */
6
+ #ifndef PRISM_PACK_H
7
+ #define PRISM_PACK_H
8
+
9
+ #include "prism/defines.h"
10
+
11
+ #include <stdint.h>
12
+ #include <stdlib.h>
13
+
14
+ /** The version of the pack template language that we are parsing. */
15
+ typedef enum pm_pack_version {
16
+ PM_PACK_VERSION_3_2_0
17
+ } pm_pack_version;
18
+
19
+ /** The type of pack template we are parsing. */
20
+ typedef enum pm_pack_variant {
21
+ PM_PACK_VARIANT_PACK,
22
+ PM_PACK_VARIANT_UNPACK
23
+ } pm_pack_variant;
24
+
25
+ /** A directive within the pack template. */
26
+ typedef enum pm_pack_type {
27
+ PM_PACK_SPACE,
28
+ PM_PACK_COMMENT,
29
+ PM_PACK_INTEGER,
30
+ PM_PACK_UTF8,
31
+ PM_PACK_BER,
32
+ PM_PACK_FLOAT,
33
+ PM_PACK_STRING_SPACE_PADDED,
34
+ PM_PACK_STRING_NULL_PADDED,
35
+ PM_PACK_STRING_NULL_TERMINATED,
36
+ PM_PACK_STRING_MSB,
37
+ PM_PACK_STRING_LSB,
38
+ PM_PACK_STRING_HEX_HIGH,
39
+ PM_PACK_STRING_HEX_LOW,
40
+ PM_PACK_STRING_UU,
41
+ PM_PACK_STRING_MIME,
42
+ PM_PACK_STRING_BASE64,
43
+ PM_PACK_STRING_FIXED,
44
+ PM_PACK_STRING_POINTER,
45
+ PM_PACK_MOVE,
46
+ PM_PACK_BACK,
47
+ PM_PACK_NULL,
48
+ PM_PACK_END
49
+ } pm_pack_type;
50
+
51
+ /** The signness of a pack directive. */
52
+ typedef enum pm_pack_signed {
53
+ PM_PACK_UNSIGNED,
54
+ PM_PACK_SIGNED,
55
+ PM_PACK_SIGNED_NA
56
+ } pm_pack_signed;
57
+
58
+ /** The endianness of a pack directive. */
59
+ typedef enum pm_pack_endian {
60
+ PM_PACK_AGNOSTIC_ENDIAN,
61
+ PM_PACK_LITTLE_ENDIAN, // aka 'VAX', or 'V'
62
+ PM_PACK_BIG_ENDIAN, // aka 'network', or 'N'
63
+ PM_PACK_NATIVE_ENDIAN,
64
+ PM_PACK_ENDIAN_NA
65
+ } pm_pack_endian;
66
+
67
+ /** The size of an integer pack directive. */
68
+ typedef enum pm_pack_size {
69
+ PM_PACK_SIZE_SHORT,
70
+ PM_PACK_SIZE_INT,
71
+ PM_PACK_SIZE_LONG,
72
+ PM_PACK_SIZE_LONG_LONG,
73
+ PM_PACK_SIZE_8,
74
+ PM_PACK_SIZE_16,
75
+ PM_PACK_SIZE_32,
76
+ PM_PACK_SIZE_64,
77
+ PM_PACK_SIZE_P,
78
+ PM_PACK_SIZE_NA
79
+ } pm_pack_size;
80
+
81
+ /** The type of length of a pack directive. */
82
+ typedef enum pm_pack_length_type {
83
+ PM_PACK_LENGTH_FIXED,
84
+ PM_PACK_LENGTH_MAX,
85
+ PM_PACK_LENGTH_RELATIVE, // special case for unpack @*
86
+ PM_PACK_LENGTH_NA
87
+ } pm_pack_length_type;
88
+
89
+ /** The type of encoding for a pack template string. */
90
+ typedef enum pm_pack_encoding {
91
+ PM_PACK_ENCODING_START,
92
+ PM_PACK_ENCODING_ASCII_8BIT,
93
+ PM_PACK_ENCODING_US_ASCII,
94
+ PM_PACK_ENCODING_UTF_8
95
+ } pm_pack_encoding;
96
+
97
+ /** The result of parsing a pack template. */
98
+ typedef enum pm_pack_result {
99
+ PM_PACK_OK,
100
+ PM_PACK_ERROR_UNSUPPORTED_DIRECTIVE,
101
+ PM_PACK_ERROR_UNKNOWN_DIRECTIVE,
102
+ PM_PACK_ERROR_LENGTH_TOO_BIG,
103
+ PM_PACK_ERROR_BANG_NOT_ALLOWED,
104
+ PM_PACK_ERROR_DOUBLE_ENDIAN
105
+ } pm_pack_result;
106
+
107
+ /**
108
+ * Parse a single directive from a pack or unpack format string.
109
+ *
110
+ * @param variant (in) pack or unpack
111
+ * @param format (in, out) the start of the next directive to parse on calling,
112
+ * and advanced beyond the parsed directive on return, or as much of it as
113
+ * was consumed until an error was encountered
114
+ * @param format_end (in) the end of the format string
115
+ * @param type (out) the type of the directive
116
+ * @param signed_type (out) whether the value is signed
117
+ * @param endian (out) the endianness of the value
118
+ * @param size (out) the size of the value
119
+ * @param length_type (out) what kind of length is specified
120
+ * @param length (out) the length of the directive
121
+ * @param encoding (in, out) takes the current encoding of the string which
122
+ * would result from parsing the whole format string, and returns a possibly
123
+ * changed directive - the encoding should be `PM_PACK_ENCODING_START` when
124
+ * pm_pack_parse is called for the first directive in a format string
125
+ *
126
+ * @return `PM_PACK_OK` on success or `PM_PACK_ERROR_*` on error
127
+ * @note Consult Ruby documentation for the meaning of directives.
128
+ */
129
+ PRISM_EXPORTED_FUNCTION pm_pack_result
130
+ pm_pack_parse(
131
+ pm_pack_variant variant,
132
+ const char **format,
133
+ const char *format_end,
134
+ pm_pack_type *type,
135
+ pm_pack_signed *signed_type,
136
+ pm_pack_endian *endian,
137
+ pm_pack_size *size,
138
+ pm_pack_length_type *length_type,
139
+ uint64_t *length,
140
+ pm_pack_encoding *encoding
141
+ );
142
+
143
+ /**
144
+ * Prism abstracts sizes away from the native system - this converts an abstract
145
+ * size to a native size.
146
+ *
147
+ * @param size The abstract size to convert.
148
+ * @return The native size.
149
+ */
150
+ PRISM_EXPORTED_FUNCTION size_t pm_size_to_native(pm_pack_size size);
151
+
152
+ #endif