jruby-prism-parser 0.23.0.pre.SNAPSHOT-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (110) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +401 -0
  3. data/CODE_OF_CONDUCT.md +76 -0
  4. data/CONTRIBUTING.md +62 -0
  5. data/LICENSE.md +7 -0
  6. data/Makefile +101 -0
  7. data/README.md +98 -0
  8. data/config.yml +2902 -0
  9. data/docs/build_system.md +91 -0
  10. data/docs/configuration.md +64 -0
  11. data/docs/cruby_compilation.md +27 -0
  12. data/docs/design.md +53 -0
  13. data/docs/encoding.md +121 -0
  14. data/docs/fuzzing.md +88 -0
  15. data/docs/heredocs.md +36 -0
  16. data/docs/javascript.md +118 -0
  17. data/docs/local_variable_depth.md +229 -0
  18. data/docs/mapping.md +117 -0
  19. data/docs/parser_translation.md +34 -0
  20. data/docs/parsing_rules.md +19 -0
  21. data/docs/releasing.md +98 -0
  22. data/docs/ripper.md +36 -0
  23. data/docs/ruby_api.md +43 -0
  24. data/docs/ruby_parser_translation.md +19 -0
  25. data/docs/serialization.md +209 -0
  26. data/docs/testing.md +55 -0
  27. data/ext/prism/api_node.c +5098 -0
  28. data/ext/prism/api_pack.c +267 -0
  29. data/ext/prism/extconf.rb +110 -0
  30. data/ext/prism/extension.c +1155 -0
  31. data/ext/prism/extension.h +18 -0
  32. data/include/prism/ast.h +5807 -0
  33. data/include/prism/defines.h +102 -0
  34. data/include/prism/diagnostic.h +339 -0
  35. data/include/prism/encoding.h +265 -0
  36. data/include/prism/node.h +57 -0
  37. data/include/prism/options.h +230 -0
  38. data/include/prism/pack.h +152 -0
  39. data/include/prism/parser.h +732 -0
  40. data/include/prism/prettyprint.h +26 -0
  41. data/include/prism/regexp.h +33 -0
  42. data/include/prism/util/pm_buffer.h +155 -0
  43. data/include/prism/util/pm_char.h +205 -0
  44. data/include/prism/util/pm_constant_pool.h +209 -0
  45. data/include/prism/util/pm_list.h +97 -0
  46. data/include/prism/util/pm_memchr.h +29 -0
  47. data/include/prism/util/pm_newline_list.h +93 -0
  48. data/include/prism/util/pm_state_stack.h +42 -0
  49. data/include/prism/util/pm_string.h +150 -0
  50. data/include/prism/util/pm_string_list.h +44 -0
  51. data/include/prism/util/pm_strncasecmp.h +32 -0
  52. data/include/prism/util/pm_strpbrk.h +46 -0
  53. data/include/prism/version.h +29 -0
  54. data/include/prism.h +289 -0
  55. data/jruby-prism.jar +0 -0
  56. data/lib/prism/compiler.rb +486 -0
  57. data/lib/prism/debug.rb +206 -0
  58. data/lib/prism/desugar_compiler.rb +207 -0
  59. data/lib/prism/dispatcher.rb +2150 -0
  60. data/lib/prism/dot_visitor.rb +4634 -0
  61. data/lib/prism/dsl.rb +785 -0
  62. data/lib/prism/ffi.rb +346 -0
  63. data/lib/prism/lex_compat.rb +908 -0
  64. data/lib/prism/mutation_compiler.rb +753 -0
  65. data/lib/prism/node.rb +17864 -0
  66. data/lib/prism/node_ext.rb +212 -0
  67. data/lib/prism/node_inspector.rb +68 -0
  68. data/lib/prism/pack.rb +224 -0
  69. data/lib/prism/parse_result/comments.rb +177 -0
  70. data/lib/prism/parse_result/newlines.rb +64 -0
  71. data/lib/prism/parse_result.rb +498 -0
  72. data/lib/prism/pattern.rb +250 -0
  73. data/lib/prism/serialize.rb +1354 -0
  74. data/lib/prism/translation/parser/compiler.rb +1838 -0
  75. data/lib/prism/translation/parser/lexer.rb +335 -0
  76. data/lib/prism/translation/parser/rubocop.rb +37 -0
  77. data/lib/prism/translation/parser.rb +178 -0
  78. data/lib/prism/translation/ripper.rb +577 -0
  79. data/lib/prism/translation/ruby_parser.rb +1521 -0
  80. data/lib/prism/translation.rb +11 -0
  81. data/lib/prism/version.rb +3 -0
  82. data/lib/prism/visitor.rb +495 -0
  83. data/lib/prism.rb +99 -0
  84. data/prism.gemspec +135 -0
  85. data/rbi/prism.rbi +7767 -0
  86. data/rbi/prism_static.rbi +207 -0
  87. data/sig/prism.rbs +4773 -0
  88. data/sig/prism_static.rbs +201 -0
  89. data/src/diagnostic.c +400 -0
  90. data/src/encoding.c +5132 -0
  91. data/src/node.c +2786 -0
  92. data/src/options.c +213 -0
  93. data/src/pack.c +493 -0
  94. data/src/prettyprint.c +8881 -0
  95. data/src/prism.c +18406 -0
  96. data/src/regexp.c +638 -0
  97. data/src/serialize.c +1554 -0
  98. data/src/token_type.c +700 -0
  99. data/src/util/pm_buffer.c +190 -0
  100. data/src/util/pm_char.c +318 -0
  101. data/src/util/pm_constant_pool.c +322 -0
  102. data/src/util/pm_list.c +49 -0
  103. data/src/util/pm_memchr.c +35 -0
  104. data/src/util/pm_newline_list.c +84 -0
  105. data/src/util/pm_state_stack.c +25 -0
  106. data/src/util/pm_string.c +203 -0
  107. data/src/util/pm_string_list.c +28 -0
  108. data/src/util/pm_strncasecmp.c +24 -0
  109. data/src/util/pm_strpbrk.c +180 -0
  110. metadata +156 -0
@@ -0,0 +1,267 @@
1
+ #include "prism/extension.h"
2
+
3
+ static VALUE rb_cPrism;
4
+ static VALUE rb_cPrismPack;
5
+ static VALUE rb_cPrismPackDirective;
6
+ static VALUE rb_cPrismPackFormat;
7
+
8
+ static VALUE v3_2_0_symbol;
9
+ static VALUE pack_symbol;
10
+ static VALUE unpack_symbol;
11
+
12
+ #if SIZEOF_UINT64_T == SIZEOF_LONG_LONG
13
+ # define UINT64T2NUM(x) ULL2NUM(x)
14
+ # define NUM2UINT64T(x) (uint64_t)NUM2ULL(x)
15
+ #elif SIZEOF_UINT64_T == SIZEOF_LONG
16
+ # define UINT64T2NUM(x) ULONG2NUM(x)
17
+ # define NUM2UINT64T(x) (uint64_t)NUM2ULONG(x)
18
+ #else
19
+ // error No uint64_t conversion
20
+ #endif
21
+
22
+ static VALUE
23
+ pack_type_to_symbol(pm_pack_type type) {
24
+ switch (type) {
25
+ case PM_PACK_SPACE:
26
+ return ID2SYM(rb_intern("SPACE"));
27
+ case PM_PACK_COMMENT:
28
+ return ID2SYM(rb_intern("COMMENT"));
29
+ case PM_PACK_INTEGER:
30
+ return ID2SYM(rb_intern("INTEGER"));
31
+ case PM_PACK_UTF8:
32
+ return ID2SYM(rb_intern("UTF8"));
33
+ case PM_PACK_BER:
34
+ return ID2SYM(rb_intern("BER"));
35
+ case PM_PACK_FLOAT:
36
+ return ID2SYM(rb_intern("FLOAT"));
37
+ case PM_PACK_STRING_SPACE_PADDED:
38
+ return ID2SYM(rb_intern("STRING_SPACE_PADDED"));
39
+ case PM_PACK_STRING_NULL_PADDED:
40
+ return ID2SYM(rb_intern("STRING_NULL_PADDED"));
41
+ case PM_PACK_STRING_NULL_TERMINATED:
42
+ return ID2SYM(rb_intern("STRING_NULL_TERMINATED"));
43
+ case PM_PACK_STRING_MSB:
44
+ return ID2SYM(rb_intern("STRING_MSB"));
45
+ case PM_PACK_STRING_LSB:
46
+ return ID2SYM(rb_intern("STRING_LSB"));
47
+ case PM_PACK_STRING_HEX_HIGH:
48
+ return ID2SYM(rb_intern("STRING_HEX_HIGH"));
49
+ case PM_PACK_STRING_HEX_LOW:
50
+ return ID2SYM(rb_intern("STRING_HEX_LOW"));
51
+ case PM_PACK_STRING_UU:
52
+ return ID2SYM(rb_intern("STRING_UU"));
53
+ case PM_PACK_STRING_MIME:
54
+ return ID2SYM(rb_intern("STRING_MIME"));
55
+ case PM_PACK_STRING_BASE64:
56
+ return ID2SYM(rb_intern("STRING_BASE64"));
57
+ case PM_PACK_STRING_FIXED:
58
+ return ID2SYM(rb_intern("STRING_FIXED"));
59
+ case PM_PACK_STRING_POINTER:
60
+ return ID2SYM(rb_intern("STRING_POINTER"));
61
+ case PM_PACK_MOVE:
62
+ return ID2SYM(rb_intern("MOVE"));
63
+ case PM_PACK_BACK:
64
+ return ID2SYM(rb_intern("BACK"));
65
+ case PM_PACK_NULL:
66
+ return ID2SYM(rb_intern("NULL"));
67
+ default:
68
+ return Qnil;
69
+ }
70
+ }
71
+
72
+ static VALUE
73
+ pack_signed_to_symbol(pm_pack_signed signed_type) {
74
+ switch (signed_type) {
75
+ case PM_PACK_UNSIGNED:
76
+ return ID2SYM(rb_intern("UNSIGNED"));
77
+ case PM_PACK_SIGNED:
78
+ return ID2SYM(rb_intern("SIGNED"));
79
+ case PM_PACK_SIGNED_NA:
80
+ return ID2SYM(rb_intern("SIGNED_NA"));
81
+ default:
82
+ return Qnil;
83
+ }
84
+ }
85
+
86
+ static VALUE
87
+ pack_endian_to_symbol(pm_pack_endian endian) {
88
+ switch (endian) {
89
+ case PM_PACK_AGNOSTIC_ENDIAN:
90
+ return ID2SYM(rb_intern("AGNOSTIC_ENDIAN"));
91
+ case PM_PACK_LITTLE_ENDIAN:
92
+ return ID2SYM(rb_intern("LITTLE_ENDIAN"));
93
+ case PM_PACK_BIG_ENDIAN:
94
+ return ID2SYM(rb_intern("BIG_ENDIAN"));
95
+ case PM_PACK_NATIVE_ENDIAN:
96
+ return ID2SYM(rb_intern("NATIVE_ENDIAN"));
97
+ case PM_PACK_ENDIAN_NA:
98
+ return ID2SYM(rb_intern("ENDIAN_NA"));
99
+ default:
100
+ return Qnil;
101
+ }
102
+ }
103
+
104
+ static VALUE
105
+ pack_size_to_symbol(pm_pack_size size) {
106
+ switch (size) {
107
+ case PM_PACK_SIZE_SHORT:
108
+ return ID2SYM(rb_intern("SIZE_SHORT"));
109
+ case PM_PACK_SIZE_INT:
110
+ return ID2SYM(rb_intern("SIZE_INT"));
111
+ case PM_PACK_SIZE_LONG:
112
+ return ID2SYM(rb_intern("SIZE_LONG"));
113
+ case PM_PACK_SIZE_LONG_LONG:
114
+ return ID2SYM(rb_intern("SIZE_LONG_LONG"));
115
+ case PM_PACK_SIZE_8:
116
+ return ID2SYM(rb_intern("SIZE_8"));
117
+ case PM_PACK_SIZE_16:
118
+ return ID2SYM(rb_intern("SIZE_16"));
119
+ case PM_PACK_SIZE_32:
120
+ return ID2SYM(rb_intern("SIZE_32"));
121
+ case PM_PACK_SIZE_64:
122
+ return ID2SYM(rb_intern("SIZE_64"));
123
+ case PM_PACK_SIZE_P:
124
+ return ID2SYM(rb_intern("SIZE_P"));
125
+ case PM_PACK_SIZE_NA:
126
+ return ID2SYM(rb_intern("SIZE_NA"));
127
+ default:
128
+ return Qnil;
129
+ }
130
+ }
131
+
132
+ static VALUE
133
+ pack_length_type_to_symbol(pm_pack_length_type length_type) {
134
+ switch (length_type) {
135
+ case PM_PACK_LENGTH_FIXED:
136
+ return ID2SYM(rb_intern("LENGTH_FIXED"));
137
+ case PM_PACK_LENGTH_MAX:
138
+ return ID2SYM(rb_intern("LENGTH_MAX"));
139
+ case PM_PACK_LENGTH_RELATIVE:
140
+ return ID2SYM(rb_intern("LENGTH_RELATIVE"));
141
+ case PM_PACK_LENGTH_NA:
142
+ return ID2SYM(rb_intern("LENGTH_NA"));
143
+ default:
144
+ return Qnil;
145
+ }
146
+ }
147
+
148
+ static VALUE
149
+ pack_encoding_to_ruby(pm_pack_encoding encoding) {
150
+ int index;
151
+ switch (encoding) {
152
+ case PM_PACK_ENCODING_ASCII_8BIT:
153
+ index = rb_ascii8bit_encindex();
154
+ break;
155
+ case PM_PACK_ENCODING_US_ASCII:
156
+ index = rb_usascii_encindex();
157
+ break;
158
+ case PM_PACK_ENCODING_UTF_8:
159
+ index = rb_utf8_encindex();
160
+ break;
161
+ default:
162
+ return Qnil;
163
+ }
164
+ return rb_enc_from_encoding(rb_enc_from_index(index));
165
+ }
166
+
167
+ /**
168
+ * call-seq:
169
+ * Pack::parse(version, variant, source) -> Format
170
+ *
171
+ * Parse the given source and return a format object.
172
+ */
173
+ static VALUE
174
+ pack_parse(VALUE self, VALUE version_symbol, VALUE variant_symbol, VALUE format_string) {
175
+ if (version_symbol != v3_2_0_symbol) {
176
+ rb_raise(rb_eArgError, "invalid version");
177
+ }
178
+
179
+ pm_pack_variant variant;
180
+ if (variant_symbol == pack_symbol) {
181
+ variant = PM_PACK_VARIANT_PACK;
182
+ } else if (variant_symbol == unpack_symbol) {
183
+ variant = PM_PACK_VARIANT_UNPACK;
184
+ } else {
185
+ rb_raise(rb_eArgError, "invalid variant");
186
+ }
187
+
188
+ StringValue(format_string);
189
+
190
+ const char *format = RSTRING_PTR(format_string);
191
+ const char *format_end = format + RSTRING_LEN(format_string);
192
+ pm_pack_encoding encoding = PM_PACK_ENCODING_START;
193
+
194
+ VALUE directives_array = rb_ary_new();
195
+
196
+ while (format < format_end) {
197
+ pm_pack_type type;
198
+ pm_pack_signed signed_type;
199
+ pm_pack_endian endian;
200
+ pm_pack_size size;
201
+ pm_pack_length_type length_type;
202
+ uint64_t length;
203
+
204
+ const char *directive_start = format;
205
+
206
+ pm_pack_result parse_result = pm_pack_parse(variant, &format, format_end, &type, &signed_type, &endian,
207
+ &size, &length_type, &length, &encoding);
208
+
209
+ const char *directive_end = format;
210
+
211
+ switch (parse_result) {
212
+ case PM_PACK_OK:
213
+ break;
214
+ case PM_PACK_ERROR_UNSUPPORTED_DIRECTIVE:
215
+ rb_raise(rb_eArgError, "unsupported directive");
216
+ case PM_PACK_ERROR_UNKNOWN_DIRECTIVE:
217
+ rb_raise(rb_eArgError, "unsupported directive");
218
+ case PM_PACK_ERROR_LENGTH_TOO_BIG:
219
+ rb_raise(rb_eRangeError, "pack length too big");
220
+ case PM_PACK_ERROR_BANG_NOT_ALLOWED:
221
+ rb_raise(rb_eRangeError, "bang not allowed");
222
+ case PM_PACK_ERROR_DOUBLE_ENDIAN:
223
+ rb_raise(rb_eRangeError, "double endian");
224
+ default:
225
+ rb_bug("parse result");
226
+ }
227
+
228
+ if (type == PM_PACK_END) {
229
+ break;
230
+ }
231
+
232
+ VALUE directive_args[9] = {
233
+ version_symbol,
234
+ variant_symbol,
235
+ rb_usascii_str_new(directive_start, directive_end - directive_start),
236
+ pack_type_to_symbol(type),
237
+ pack_signed_to_symbol(signed_type),
238
+ pack_endian_to_symbol(endian),
239
+ pack_size_to_symbol(size),
240
+ pack_length_type_to_symbol(length_type),
241
+ UINT64T2NUM(length)
242
+ };
243
+
244
+ rb_ary_push(directives_array, rb_class_new_instance(9, directive_args, rb_cPrismPackDirective));
245
+ }
246
+
247
+ VALUE format_args[2];
248
+ format_args[0] = directives_array;
249
+ format_args[1] = pack_encoding_to_ruby(encoding);
250
+ return rb_class_new_instance(2, format_args, rb_cPrismPackFormat);
251
+ }
252
+
253
+ /**
254
+ * The function that gets called when Ruby initializes the prism extension.
255
+ */
256
+ void
257
+ Init_prism_pack(void) {
258
+ rb_cPrism = rb_define_module("Prism");
259
+ rb_cPrismPack = rb_define_module_under(rb_cPrism, "Pack");
260
+ rb_cPrismPackDirective = rb_define_class_under(rb_cPrismPack, "Directive", rb_cObject);
261
+ rb_cPrismPackFormat = rb_define_class_under(rb_cPrismPack, "Format", rb_cObject);
262
+ rb_define_singleton_method(rb_cPrismPack, "parse", pack_parse, 3);
263
+
264
+ v3_2_0_symbol = ID2SYM(rb_intern("v3_2_0"));
265
+ pack_symbol = ID2SYM(rb_intern("pack"));
266
+ unpack_symbol = ID2SYM(rb_intern("unpack"));
267
+ }
@@ -0,0 +1,110 @@
1
+ # frozen_string_literal: true
2
+
3
+ if ARGV.delete("--help")
4
+ print(<<~TEXT)
5
+ USAGE: ruby #{$PROGRAM_NAME} [options]
6
+
7
+ Flags that are always valid:
8
+
9
+ --enable-debug-mode-build
10
+ Enable debug mode build.
11
+ You may also use set PRISM_DEBUG_MODE_BUILD environment variable.
12
+
13
+ --help
14
+ Display this message.
15
+
16
+ Environment variables used:
17
+
18
+ PRISM_DEBUG_MODE_BUILD
19
+ Equivalent to `--enable-debug-mode-build` when set, even if nil or blank.
20
+
21
+ TEXT
22
+ exit!(0)
23
+ end
24
+
25
+ # If this gem is being build from a git source, then we need to run
26
+ # templating if it hasn't been run yet. In normal packaging, we would have
27
+ # shipped the templated files with the gem, so this wouldn't be necessary.
28
+ def generate_templates
29
+ Dir.chdir(File.expand_path("../..", __dir__)) do
30
+ if !File.exist?("include/prism/ast.h") && Dir.exist?(".git")
31
+ system("templates/template.rb", exception: true)
32
+ end
33
+ end
34
+ end
35
+
36
+ # Runs `make` in the root directory of the project. Note that this is the
37
+ # `Makefile` for the overall project, not the `Makefile` that is being generated
38
+ # by this script.`
39
+ def make(target)
40
+ Dir.chdir(File.expand_path("../..", __dir__)) do
41
+ system("make", target, exception: true)
42
+ end
43
+ end
44
+
45
+ require "rbconfig"
46
+
47
+ # On non-CRuby we only need the shared library since we'll interface with it
48
+ # through FFI, so we'll build only that and not the C extension. We also avoid
49
+ # `require "mkmf"` as that prepends the LLVM toolchain to PATH on TruffleRuby,
50
+ # but we want to use the native toolchain here since libprism is run natively.
51
+ if RUBY_ENGINE != "ruby"
52
+ require 'fileutils'
53
+ generate_templates
54
+ lib_file = "build/libprism.#{RbConfig::CONFIG["SOEXT"]}"
55
+ make(lib_file)
56
+ FileUtils.cp "../../#{lib_file}", RbConfig::CONFIG["libdir"]
57
+ FileUtils.cp "../../jruby-prism.jar", "#{RbConfig::CONFIG['libdir']}/jruby-prism.jar"
58
+ File.write("Makefile", "all install clean:\n\t@#{RbConfig::CONFIG["NULLCMD"]}\n")
59
+ return
60
+ end
61
+
62
+ require "mkmf"
63
+
64
+ # First, ensure that we can find the header for the prism library.
65
+ generate_templates # Templates should be generated before find_header.
66
+ unless find_header("prism.h", File.expand_path("../../include", __dir__))
67
+ raise "prism.h is required"
68
+ end
69
+
70
+ # Next, ensure we can find the header for the C extension. Explicitly look for
71
+ # the extension header in the parent directory because we want to consistently
72
+ # look for `prism/extension.h` in our source files to line up with our mirroring
73
+ # in CRuby.
74
+ unless find_header("prism/extension.h", File.expand_path("..", __dir__))
75
+ raise "prism/extension.h is required"
76
+ end
77
+
78
+ # If `--enable-debug-mode-build` is passed to this script or the
79
+ # `PRISM_DEBUG_MODE_BUILD` environment variable is defined, we'll build with the
80
+ # `PRISM_DEBUG_MODE_BUILD` macro defined. This causes parse functions to
81
+ # duplicate their input so that they have clearly set bounds, which is useful
82
+ # for finding bugs that cause the parser to read off the end of the input.
83
+ if enable_config("debug-mode-build", ENV["PRISM_DEBUG_MODE_BUILD"] || false)
84
+ append_cflags("-DPRISM_DEBUG_MODE_BUILD")
85
+ end
86
+
87
+ # By default, all symbols are hidden in the shared library.
88
+ append_cflags("-fvisibility=hidden")
89
+
90
+ # We need to link against the libprism.a archive, which is built by the
91
+ # project's `Makefile`. We'll build it if it doesn't exist yet, and then add it
92
+ # to `mkmf`'s list of local libraries.
93
+ archive_target = "build/libprism.a"
94
+ archive_path = File.expand_path("../../#{archive_target}", __dir__)
95
+
96
+ make(archive_target) unless File.exist?(archive_path)
97
+ $LOCAL_LIBS << " #{archive_path}"
98
+
99
+ # Finally, we'll create the `Makefile` that is going to be used to configure and
100
+ # build the C extension.
101
+ create_makefile("prism/prism")
102
+
103
+ # Now that the `Makefile` for the C extension is built, we'll append on an extra
104
+ # rule that dictates that the extension should be rebuilt if the archive is
105
+ # updated.
106
+ File.open("Makefile", "a") do |mf|
107
+ mf.puts
108
+ mf.puts("# Automatically rebuild the extension if libprism.a changed")
109
+ mf.puts("$(TARGET_SO): $(LOCAL_LIBS)")
110
+ end