jruby-prism-parser 0.23.0.pre.SNAPSHOT-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +401 -0
  3. data/CODE_OF_CONDUCT.md +76 -0
  4. data/CONTRIBUTING.md +62 -0
  5. data/LICENSE.md +7 -0
  6. data/Makefile +101 -0
  7. data/README.md +98 -0
  8. data/config.yml +2902 -0
  9. data/docs/build_system.md +91 -0
  10. data/docs/configuration.md +64 -0
  11. data/docs/cruby_compilation.md +27 -0
  12. data/docs/design.md +53 -0
  13. data/docs/encoding.md +121 -0
  14. data/docs/fuzzing.md +88 -0
  15. data/docs/heredocs.md +36 -0
  16. data/docs/javascript.md +118 -0
  17. data/docs/local_variable_depth.md +229 -0
  18. data/docs/mapping.md +117 -0
  19. data/docs/parser_translation.md +34 -0
  20. data/docs/parsing_rules.md +19 -0
  21. data/docs/releasing.md +98 -0
  22. data/docs/ripper.md +36 -0
  23. data/docs/ruby_api.md +43 -0
  24. data/docs/ruby_parser_translation.md +19 -0
  25. data/docs/serialization.md +209 -0
  26. data/docs/testing.md +55 -0
  27. data/ext/prism/api_node.c +5098 -0
  28. data/ext/prism/api_pack.c +267 -0
  29. data/ext/prism/extconf.rb +110 -0
  30. data/ext/prism/extension.c +1155 -0
  31. data/ext/prism/extension.h +18 -0
  32. data/include/prism/ast.h +5807 -0
  33. data/include/prism/defines.h +102 -0
  34. data/include/prism/diagnostic.h +339 -0
  35. data/include/prism/encoding.h +265 -0
  36. data/include/prism/node.h +57 -0
  37. data/include/prism/options.h +230 -0
  38. data/include/prism/pack.h +152 -0
  39. data/include/prism/parser.h +732 -0
  40. data/include/prism/prettyprint.h +26 -0
  41. data/include/prism/regexp.h +33 -0
  42. data/include/prism/util/pm_buffer.h +155 -0
  43. data/include/prism/util/pm_char.h +205 -0
  44. data/include/prism/util/pm_constant_pool.h +209 -0
  45. data/include/prism/util/pm_list.h +97 -0
  46. data/include/prism/util/pm_memchr.h +29 -0
  47. data/include/prism/util/pm_newline_list.h +93 -0
  48. data/include/prism/util/pm_state_stack.h +42 -0
  49. data/include/prism/util/pm_string.h +150 -0
  50. data/include/prism/util/pm_string_list.h +44 -0
  51. data/include/prism/util/pm_strncasecmp.h +32 -0
  52. data/include/prism/util/pm_strpbrk.h +46 -0
  53. data/include/prism/version.h +29 -0
  54. data/include/prism.h +289 -0
  55. data/jruby-prism.jar +0 -0
  56. data/lib/prism/compiler.rb +486 -0
  57. data/lib/prism/debug.rb +206 -0
  58. data/lib/prism/desugar_compiler.rb +207 -0
  59. data/lib/prism/dispatcher.rb +2150 -0
  60. data/lib/prism/dot_visitor.rb +4634 -0
  61. data/lib/prism/dsl.rb +785 -0
  62. data/lib/prism/ffi.rb +346 -0
  63. data/lib/prism/lex_compat.rb +908 -0
  64. data/lib/prism/mutation_compiler.rb +753 -0
  65. data/lib/prism/node.rb +17864 -0
  66. data/lib/prism/node_ext.rb +212 -0
  67. data/lib/prism/node_inspector.rb +68 -0
  68. data/lib/prism/pack.rb +224 -0
  69. data/lib/prism/parse_result/comments.rb +177 -0
  70. data/lib/prism/parse_result/newlines.rb +64 -0
  71. data/lib/prism/parse_result.rb +498 -0
  72. data/lib/prism/pattern.rb +250 -0
  73. data/lib/prism/serialize.rb +1354 -0
  74. data/lib/prism/translation/parser/compiler.rb +1838 -0
  75. data/lib/prism/translation/parser/lexer.rb +335 -0
  76. data/lib/prism/translation/parser/rubocop.rb +37 -0
  77. data/lib/prism/translation/parser.rb +178 -0
  78. data/lib/prism/translation/ripper.rb +577 -0
  79. data/lib/prism/translation/ruby_parser.rb +1521 -0
  80. data/lib/prism/translation.rb +11 -0
  81. data/lib/prism/version.rb +3 -0
  82. data/lib/prism/visitor.rb +495 -0
  83. data/lib/prism.rb +99 -0
  84. data/prism.gemspec +135 -0
  85. data/rbi/prism.rbi +7767 -0
  86. data/rbi/prism_static.rbi +207 -0
  87. data/sig/prism.rbs +4773 -0
  88. data/sig/prism_static.rbs +201 -0
  89. data/src/diagnostic.c +400 -0
  90. data/src/encoding.c +5132 -0
  91. data/src/node.c +2786 -0
  92. data/src/options.c +213 -0
  93. data/src/pack.c +493 -0
  94. data/src/prettyprint.c +8881 -0
  95. data/src/prism.c +18406 -0
  96. data/src/regexp.c +638 -0
  97. data/src/serialize.c +1554 -0
  98. data/src/token_type.c +700 -0
  99. data/src/util/pm_buffer.c +190 -0
  100. data/src/util/pm_char.c +318 -0
  101. data/src/util/pm_constant_pool.c +322 -0
  102. data/src/util/pm_list.c +49 -0
  103. data/src/util/pm_memchr.c +35 -0
  104. data/src/util/pm_newline_list.c +84 -0
  105. data/src/util/pm_state_stack.c +25 -0
  106. data/src/util/pm_string.c +203 -0
  107. data/src/util/pm_string_list.c +28 -0
  108. data/src/util/pm_strncasecmp.c +24 -0
  109. data/src/util/pm_strpbrk.c +180 -0
  110. metadata +156 -0
@@ -0,0 +1,57 @@
1
+ /**
2
+ * @file node.h
3
+ *
4
+ * Functions related to nodes in the AST.
5
+ */
6
+ #ifndef PRISM_NODE_H
7
+ #define PRISM_NODE_H
8
+
9
+ #include "prism/defines.h"
10
+ #include "prism/parser.h"
11
+
12
+ /**
13
+ * Append a new node onto the end of the node list.
14
+ *
15
+ * @param list The list to append to.
16
+ * @param node The node to append.
17
+ */
18
+ void pm_node_list_append(pm_node_list_t *list, pm_node_t *node);
19
+
20
+ /**
21
+ * Deallocate a node and all of its children.
22
+ *
23
+ * @param parser The parser that owns the node.
24
+ * @param node The node to deallocate.
25
+ */
26
+ PRISM_EXPORTED_FUNCTION void pm_node_destroy(pm_parser_t *parser, struct pm_node *node);
27
+
28
+ /**
29
+ * This struct stores the information gathered by the pm_node_memsize function.
30
+ * It contains both the memory footprint and additionally metadata about the
31
+ * shape of the tree.
32
+ */
33
+ typedef struct {
34
+ /** The total memory footprint of the node and all of its children. */
35
+ size_t memsize;
36
+
37
+ /** The number of children the node has. */
38
+ size_t node_count;
39
+ } pm_memsize_t;
40
+
41
+ /**
42
+ * Calculates the memory footprint of a given node.
43
+ *
44
+ * @param node The node to calculate the memory footprint of.
45
+ * @param memsize The memory footprint of the node and all of its children.
46
+ */
47
+ PRISM_EXPORTED_FUNCTION void pm_node_memsize(pm_node_t *node, pm_memsize_t *memsize);
48
+
49
+ /**
50
+ * Returns a string representation of the given node type.
51
+ *
52
+ * @param node_type The node type to convert to a string.
53
+ * @return A string representation of the given node type.
54
+ */
55
+ PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_type);
56
+
57
+ #endif
@@ -0,0 +1,230 @@
1
+ /**
2
+ * @file options.h
3
+ *
4
+ * The options that can be passed to parsing.
5
+ */
6
+ #ifndef PRISM_OPTIONS_H
7
+ #define PRISM_OPTIONS_H
8
+
9
+ #include "prism/defines.h"
10
+ #include "prism/util/pm_string.h"
11
+
12
+ #include <stdbool.h>
13
+ #include <stddef.h>
14
+ #include <stdint.h>
15
+
16
+ /**
17
+ * A scope of locals surrounding the code that is being parsed.
18
+ */
19
+ typedef struct pm_options_scope {
20
+ /** The number of locals in the scope. */
21
+ size_t locals_count;
22
+
23
+ /** The names of the locals in the scope. */
24
+ pm_string_t *locals;
25
+ } pm_options_scope_t;
26
+
27
+ /**
28
+ * The version of Ruby syntax that we should be parsing with. This is used to
29
+ * allow consumers to specify which behavior they want in case they need to
30
+ * parse in the same way as a specific version of CRuby would have.
31
+ */
32
+ typedef enum {
33
+ /** The current version of prism. */
34
+ PM_OPTIONS_VERSION_LATEST = 0,
35
+
36
+ /** The vendored version of prism in CRuby 3.3.0. */
37
+ PM_OPTIONS_VERSION_CRUBY_3_3_0 = 1
38
+ } pm_options_version_t;
39
+
40
+ /**
41
+ * The options that can be passed to the parser.
42
+ */
43
+ typedef struct {
44
+ /** The name of the file that is currently being parsed. */
45
+ pm_string_t filepath;
46
+
47
+ /**
48
+ * The line within the file that the parse starts on. This value is
49
+ * 1-indexed.
50
+ */
51
+ int32_t line;
52
+
53
+ /**
54
+ * The name of the encoding that the source file is in. Note that this must
55
+ * correspond to a name that can be found with Encoding.find in Ruby.
56
+ */
57
+ pm_string_t encoding;
58
+
59
+ /**
60
+ * The number of scopes surrounding the code that is being parsed.
61
+ */
62
+ size_t scopes_count;
63
+
64
+ /**
65
+ * The scopes surrounding the code that is being parsed. For most parses
66
+ * this will be NULL, but for evals it will be the locals that are in scope
67
+ * surrounding the eval. Scopes are ordered from the outermost scope to the
68
+ * innermost one.
69
+ */
70
+ pm_options_scope_t *scopes;
71
+
72
+ /**
73
+ * The version of prism that we should be parsing with. This is used to
74
+ * allow consumers to specify which behavior they want in case they need to
75
+ * parse exactly as a specific version of CRuby.
76
+ */
77
+ pm_options_version_t version;
78
+
79
+ /** Whether or not the frozen string literal option has been set. */
80
+ bool frozen_string_literal;
81
+ } pm_options_t;
82
+
83
+ /**
84
+ * Set the filepath option on the given options struct.
85
+ *
86
+ * @param options The options struct to set the filepath on.
87
+ * @param filepath The filepath to set.
88
+ */
89
+ PRISM_EXPORTED_FUNCTION void pm_options_filepath_set(pm_options_t *options, const char *filepath);
90
+
91
+ /**
92
+ * Set the line option on the given options struct.
93
+ *
94
+ * @param options The options struct to set the line on.
95
+ * @param line The line to set.
96
+ */
97
+ PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t line);
98
+
99
+ /**
100
+ * Set the encoding option on the given options struct.
101
+ *
102
+ * @param options The options struct to set the encoding on.
103
+ * @param encoding The encoding to set.
104
+ */
105
+ PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, const char *encoding);
106
+
107
+ /**
108
+ * Set the frozen string literal option on the given options struct.
109
+ *
110
+ * @param options The options struct to set the frozen string literal value on.
111
+ * @param frozen_string_literal The frozen string literal value to set.
112
+ */
113
+ PRISM_EXPORTED_FUNCTION void pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_literal);
114
+
115
+ /**
116
+ * Set the version option on the given options struct by parsing the given
117
+ * string. If the string contains an invalid option, this returns false.
118
+ * Otherwise, it returns true.
119
+ *
120
+ * @param options The options struct to set the version on.
121
+ * @param version The version to set.
122
+ * @param length The length of the version string.
123
+ * @return Whether or not the version was parsed successfully.
124
+ */
125
+ PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const char *version, size_t length);
126
+
127
+ /**
128
+ * Allocate and zero out the scopes array on the given options struct.
129
+ *
130
+ * @param options The options struct to initialize the scopes array on.
131
+ * @param scopes_count The number of scopes to allocate.
132
+ */
133
+ PRISM_EXPORTED_FUNCTION void pm_options_scopes_init(pm_options_t *options, size_t scopes_count);
134
+
135
+ /**
136
+ * Return a pointer to the scope at the given index within the given options.
137
+ *
138
+ * @param options The options struct to get the scope from.
139
+ * @param index The index of the scope to get.
140
+ * @return A pointer to the scope at the given index.
141
+ */
142
+ PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm_options_t *options, size_t index);
143
+
144
+ /**
145
+ * Create a new options scope struct. This will hold a set of locals that are in
146
+ * scope surrounding the code that is being parsed.
147
+ *
148
+ * @param scope The scope struct to initialize.
149
+ * @param locals_count The number of locals to allocate.
150
+ */
151
+ PRISM_EXPORTED_FUNCTION void pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count);
152
+
153
+ /**
154
+ * Return a pointer to the local at the given index within the given scope.
155
+ *
156
+ * @param scope The scope struct to get the local from.
157
+ * @param index The index of the local to get.
158
+ * @return A pointer to the local at the given index.
159
+ */
160
+ PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index);
161
+
162
+ /**
163
+ * Free the internal memory associated with the options.
164
+ *
165
+ * @param options The options struct whose internal memory should be freed.
166
+ */
167
+ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
168
+
169
+ /**
170
+ * Deserialize an options struct from the given binary string. This is used to
171
+ * pass options to the parser from an FFI call so that consumers of the library
172
+ * from an FFI perspective don't have to worry about the structure of our
173
+ * options structs. Since the source of these calls will be from Ruby
174
+ * implementation internals we assume it is from a trusted source.
175
+ *
176
+ * `data` is assumed to be a valid pointer pointing to well-formed data. The
177
+ * layout of this data should be the same every time, and is described below:
178
+ *
179
+ * | # bytes | field |
180
+ * | ------- | -------------------------- |
181
+ * | `4` | the length of the filepath |
182
+ * | ... | the filepath bytes |
183
+ * | `4` | the line number |
184
+ * | `4` | the length the encoding |
185
+ * | ... | the encoding bytes |
186
+ * | `1` | frozen string literal |
187
+ * | `1` | suppress warnings |
188
+ * | `1` | the version |
189
+ * | `4` | the number of scopes |
190
+ * | ... | the scopes |
191
+ *
192
+ * The version field is an enum, so it should be one of the following values:
193
+ *
194
+ * | value | version |
195
+ * | ----- | ------------------------- |
196
+ * | `0` | use the latest version of prism |
197
+ * | `1` | use the version of prism that is vendored in CRuby 3.3.0 |
198
+ *
199
+ * Each scope is layed out as follows:
200
+ *
201
+ * | # bytes | field |
202
+ * | ------- | -------------------------- |
203
+ * | `4` | the number of locals |
204
+ * | ... | the locals |
205
+ *
206
+ * Each local is layed out as follows:
207
+ *
208
+ * | # bytes | field |
209
+ * | ------- | -------------------------- |
210
+ * | `4` | the length of the local |
211
+ * | ... | the local bytes |
212
+ *
213
+ * Some additional things to note about this layout:
214
+ *
215
+ * * The filepath can have a length of 0, in which case we'll consider it an
216
+ * empty string.
217
+ * * The line number should be 0-indexed.
218
+ * * The encoding can have a length of 0, in which case we'll use the default
219
+ * encoding (UTF-8). If it's not 0, it should correspond to a name of an
220
+ * encoding that can be passed to `Encoding.find` in Ruby.
221
+ * * The frozen string literal and suppress warnings fields are booleans, so
222
+ * their values should be either 0 or 1.
223
+ * * The number of scopes can be 0.
224
+ *
225
+ * @param options The options struct to deserialize into.
226
+ * @param data The binary string to deserialize from.
227
+ */
228
+ void pm_options_read(pm_options_t *options, const char *data);
229
+
230
+ #endif
@@ -0,0 +1,152 @@
1
+ /**
2
+ * @file pack.h
3
+ *
4
+ * A pack template string parser.
5
+ */
6
+ #ifndef PRISM_PACK_H
7
+ #define PRISM_PACK_H
8
+
9
+ #include "prism/defines.h"
10
+
11
+ #include <stdint.h>
12
+ #include <stdlib.h>
13
+
14
+ /** The version of the pack template language that we are parsing. */
15
+ typedef enum pm_pack_version {
16
+ PM_PACK_VERSION_3_2_0
17
+ } pm_pack_version;
18
+
19
+ /** The type of pack template we are parsing. */
20
+ typedef enum pm_pack_variant {
21
+ PM_PACK_VARIANT_PACK,
22
+ PM_PACK_VARIANT_UNPACK
23
+ } pm_pack_variant;
24
+
25
+ /** A directive within the pack template. */
26
+ typedef enum pm_pack_type {
27
+ PM_PACK_SPACE,
28
+ PM_PACK_COMMENT,
29
+ PM_PACK_INTEGER,
30
+ PM_PACK_UTF8,
31
+ PM_PACK_BER,
32
+ PM_PACK_FLOAT,
33
+ PM_PACK_STRING_SPACE_PADDED,
34
+ PM_PACK_STRING_NULL_PADDED,
35
+ PM_PACK_STRING_NULL_TERMINATED,
36
+ PM_PACK_STRING_MSB,
37
+ PM_PACK_STRING_LSB,
38
+ PM_PACK_STRING_HEX_HIGH,
39
+ PM_PACK_STRING_HEX_LOW,
40
+ PM_PACK_STRING_UU,
41
+ PM_PACK_STRING_MIME,
42
+ PM_PACK_STRING_BASE64,
43
+ PM_PACK_STRING_FIXED,
44
+ PM_PACK_STRING_POINTER,
45
+ PM_PACK_MOVE,
46
+ PM_PACK_BACK,
47
+ PM_PACK_NULL,
48
+ PM_PACK_END
49
+ } pm_pack_type;
50
+
51
+ /** The signness of a pack directive. */
52
+ typedef enum pm_pack_signed {
53
+ PM_PACK_UNSIGNED,
54
+ PM_PACK_SIGNED,
55
+ PM_PACK_SIGNED_NA
56
+ } pm_pack_signed;
57
+
58
+ /** The endianness of a pack directive. */
59
+ typedef enum pm_pack_endian {
60
+ PM_PACK_AGNOSTIC_ENDIAN,
61
+ PM_PACK_LITTLE_ENDIAN, // aka 'VAX', or 'V'
62
+ PM_PACK_BIG_ENDIAN, // aka 'network', or 'N'
63
+ PM_PACK_NATIVE_ENDIAN,
64
+ PM_PACK_ENDIAN_NA
65
+ } pm_pack_endian;
66
+
67
+ /** The size of an integer pack directive. */
68
+ typedef enum pm_pack_size {
69
+ PM_PACK_SIZE_SHORT,
70
+ PM_PACK_SIZE_INT,
71
+ PM_PACK_SIZE_LONG,
72
+ PM_PACK_SIZE_LONG_LONG,
73
+ PM_PACK_SIZE_8,
74
+ PM_PACK_SIZE_16,
75
+ PM_PACK_SIZE_32,
76
+ PM_PACK_SIZE_64,
77
+ PM_PACK_SIZE_P,
78
+ PM_PACK_SIZE_NA
79
+ } pm_pack_size;
80
+
81
+ /** The type of length of a pack directive. */
82
+ typedef enum pm_pack_length_type {
83
+ PM_PACK_LENGTH_FIXED,
84
+ PM_PACK_LENGTH_MAX,
85
+ PM_PACK_LENGTH_RELATIVE, // special case for unpack @*
86
+ PM_PACK_LENGTH_NA
87
+ } pm_pack_length_type;
88
+
89
+ /** The type of encoding for a pack template string. */
90
+ typedef enum pm_pack_encoding {
91
+ PM_PACK_ENCODING_START,
92
+ PM_PACK_ENCODING_ASCII_8BIT,
93
+ PM_PACK_ENCODING_US_ASCII,
94
+ PM_PACK_ENCODING_UTF_8
95
+ } pm_pack_encoding;
96
+
97
+ /** The result of parsing a pack template. */
98
+ typedef enum pm_pack_result {
99
+ PM_PACK_OK,
100
+ PM_PACK_ERROR_UNSUPPORTED_DIRECTIVE,
101
+ PM_PACK_ERROR_UNKNOWN_DIRECTIVE,
102
+ PM_PACK_ERROR_LENGTH_TOO_BIG,
103
+ PM_PACK_ERROR_BANG_NOT_ALLOWED,
104
+ PM_PACK_ERROR_DOUBLE_ENDIAN
105
+ } pm_pack_result;
106
+
107
+ /**
108
+ * Parse a single directive from a pack or unpack format string.
109
+ *
110
+ * @param variant (in) pack or unpack
111
+ * @param format (in, out) the start of the next directive to parse on calling,
112
+ * and advanced beyond the parsed directive on return, or as much of it as
113
+ * was consumed until an error was encountered
114
+ * @param format_end (in) the end of the format string
115
+ * @param type (out) the type of the directive
116
+ * @param signed_type (out) whether the value is signed
117
+ * @param endian (out) the endianness of the value
118
+ * @param size (out) the size of the value
119
+ * @param length_type (out) what kind of length is specified
120
+ * @param length (out) the length of the directive
121
+ * @param encoding (in, out) takes the current encoding of the string which
122
+ * would result from parsing the whole format string, and returns a possibly
123
+ * changed directive - the encoding should be `PM_PACK_ENCODING_START` when
124
+ * pm_pack_parse is called for the first directive in a format string
125
+ *
126
+ * @return `PM_PACK_OK` on success or `PM_PACK_ERROR_*` on error
127
+ * @note Consult Ruby documentation for the meaning of directives.
128
+ */
129
+ PRISM_EXPORTED_FUNCTION pm_pack_result
130
+ pm_pack_parse(
131
+ pm_pack_variant variant,
132
+ const char **format,
133
+ const char *format_end,
134
+ pm_pack_type *type,
135
+ pm_pack_signed *signed_type,
136
+ pm_pack_endian *endian,
137
+ pm_pack_size *size,
138
+ pm_pack_length_type *length_type,
139
+ uint64_t *length,
140
+ pm_pack_encoding *encoding
141
+ );
142
+
143
+ /**
144
+ * Prism abstracts sizes away from the native system - this converts an abstract
145
+ * size to a native size.
146
+ *
147
+ * @param size The abstract size to convert.
148
+ * @return The native size.
149
+ */
150
+ PRISM_EXPORTED_FUNCTION size_t pm_size_to_native(pm_pack_size size);
151
+
152
+ #endif