prism 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (95) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +172 -0
  3. data/CODE_OF_CONDUCT.md +76 -0
  4. data/CONTRIBUTING.md +62 -0
  5. data/LICENSE.md +7 -0
  6. data/Makefile +84 -0
  7. data/README.md +89 -0
  8. data/config.yml +2481 -0
  9. data/docs/build_system.md +74 -0
  10. data/docs/building.md +22 -0
  11. data/docs/configuration.md +60 -0
  12. data/docs/design.md +53 -0
  13. data/docs/encoding.md +117 -0
  14. data/docs/fuzzing.md +93 -0
  15. data/docs/heredocs.md +36 -0
  16. data/docs/mapping.md +117 -0
  17. data/docs/ripper.md +36 -0
  18. data/docs/ruby_api.md +25 -0
  19. data/docs/serialization.md +181 -0
  20. data/docs/testing.md +55 -0
  21. data/ext/prism/api_node.c +4725 -0
  22. data/ext/prism/api_pack.c +256 -0
  23. data/ext/prism/extconf.rb +136 -0
  24. data/ext/prism/extension.c +626 -0
  25. data/ext/prism/extension.h +18 -0
  26. data/include/prism/ast.h +1932 -0
  27. data/include/prism/defines.h +45 -0
  28. data/include/prism/diagnostic.h +231 -0
  29. data/include/prism/enc/pm_encoding.h +95 -0
  30. data/include/prism/node.h +41 -0
  31. data/include/prism/pack.h +141 -0
  32. data/include/prism/parser.h +418 -0
  33. data/include/prism/regexp.h +19 -0
  34. data/include/prism/unescape.h +48 -0
  35. data/include/prism/util/pm_buffer.h +51 -0
  36. data/include/prism/util/pm_char.h +91 -0
  37. data/include/prism/util/pm_constant_pool.h +78 -0
  38. data/include/prism/util/pm_list.h +67 -0
  39. data/include/prism/util/pm_memchr.h +14 -0
  40. data/include/prism/util/pm_newline_list.h +61 -0
  41. data/include/prism/util/pm_state_stack.h +24 -0
  42. data/include/prism/util/pm_string.h +61 -0
  43. data/include/prism/util/pm_string_list.h +25 -0
  44. data/include/prism/util/pm_strpbrk.h +29 -0
  45. data/include/prism/version.h +4 -0
  46. data/include/prism.h +82 -0
  47. data/lib/prism/compiler.rb +465 -0
  48. data/lib/prism/debug.rb +157 -0
  49. data/lib/prism/desugar_compiler.rb +206 -0
  50. data/lib/prism/dispatcher.rb +2051 -0
  51. data/lib/prism/dsl.rb +750 -0
  52. data/lib/prism/ffi.rb +251 -0
  53. data/lib/prism/lex_compat.rb +838 -0
  54. data/lib/prism/mutation_compiler.rb +718 -0
  55. data/lib/prism/node.rb +14540 -0
  56. data/lib/prism/node_ext.rb +55 -0
  57. data/lib/prism/node_inspector.rb +68 -0
  58. data/lib/prism/pack.rb +185 -0
  59. data/lib/prism/parse_result/comments.rb +172 -0
  60. data/lib/prism/parse_result/newlines.rb +60 -0
  61. data/lib/prism/parse_result.rb +266 -0
  62. data/lib/prism/pattern.rb +239 -0
  63. data/lib/prism/ripper_compat.rb +174 -0
  64. data/lib/prism/serialize.rb +662 -0
  65. data/lib/prism/visitor.rb +470 -0
  66. data/lib/prism.rb +64 -0
  67. data/prism.gemspec +113 -0
  68. data/src/diagnostic.c +287 -0
  69. data/src/enc/pm_big5.c +52 -0
  70. data/src/enc/pm_euc_jp.c +58 -0
  71. data/src/enc/pm_gbk.c +61 -0
  72. data/src/enc/pm_shift_jis.c +56 -0
  73. data/src/enc/pm_tables.c +507 -0
  74. data/src/enc/pm_unicode.c +2324 -0
  75. data/src/enc/pm_windows_31j.c +56 -0
  76. data/src/node.c +2633 -0
  77. data/src/pack.c +493 -0
  78. data/src/prettyprint.c +2136 -0
  79. data/src/prism.c +14587 -0
  80. data/src/regexp.c +580 -0
  81. data/src/serialize.c +1899 -0
  82. data/src/token_type.c +349 -0
  83. data/src/unescape.c +637 -0
  84. data/src/util/pm_buffer.c +103 -0
  85. data/src/util/pm_char.c +272 -0
  86. data/src/util/pm_constant_pool.c +252 -0
  87. data/src/util/pm_list.c +41 -0
  88. data/src/util/pm_memchr.c +33 -0
  89. data/src/util/pm_newline_list.c +134 -0
  90. data/src/util/pm_state_stack.c +19 -0
  91. data/src/util/pm_string.c +200 -0
  92. data/src/util/pm_string_list.c +29 -0
  93. data/src/util/pm_strncasecmp.c +17 -0
  94. data/src/util/pm_strpbrk.c +66 -0
  95. metadata +138 -0
@@ -0,0 +1,256 @@
1
+ #include "prism/extension.h"
2
+
3
+ static VALUE rb_cPrism;
4
+ static VALUE rb_cPrismPack;
5
+ static VALUE rb_cPrismPackDirective;
6
+ static VALUE rb_cPrismPackFormat;
7
+
8
+ static VALUE v3_2_0_symbol;
9
+ static VALUE pack_symbol;
10
+ static VALUE unpack_symbol;
11
+
12
+ #if SIZEOF_UINT64_T == SIZEOF_LONG_LONG
13
+ # define UINT64T2NUM(x) ULL2NUM(x)
14
+ # define NUM2UINT64T(x) (uint64_t)NUM2ULL(x)
15
+ #elif SIZEOF_UINT64_T == SIZEOF_LONG
16
+ # define UINT64T2NUM(x) ULONG2NUM(x)
17
+ # define NUM2UINT64T(x) (uint64_t)NUM2ULONG(x)
18
+ #else
19
+ // error No uint64_t conversion
20
+ #endif
21
+
22
+ static VALUE
23
+ pack_type_to_symbol(pm_pack_type type) {
24
+ switch (type) {
25
+ case PM_PACK_SPACE:
26
+ return ID2SYM(rb_intern("SPACE"));
27
+ case PM_PACK_COMMENT:
28
+ return ID2SYM(rb_intern("COMMENT"));
29
+ case PM_PACK_INTEGER:
30
+ return ID2SYM(rb_intern("INTEGER"));
31
+ case PM_PACK_UTF8:
32
+ return ID2SYM(rb_intern("UTF8"));
33
+ case PM_PACK_BER:
34
+ return ID2SYM(rb_intern("BER"));
35
+ case PM_PACK_FLOAT:
36
+ return ID2SYM(rb_intern("FLOAT"));
37
+ case PM_PACK_STRING_SPACE_PADDED:
38
+ return ID2SYM(rb_intern("STRING_SPACE_PADDED"));
39
+ case PM_PACK_STRING_NULL_PADDED:
40
+ return ID2SYM(rb_intern("STRING_NULL_PADDED"));
41
+ case PM_PACK_STRING_NULL_TERMINATED:
42
+ return ID2SYM(rb_intern("STRING_NULL_TERMINATED"));
43
+ case PM_PACK_STRING_MSB:
44
+ return ID2SYM(rb_intern("STRING_MSB"));
45
+ case PM_PACK_STRING_LSB:
46
+ return ID2SYM(rb_intern("STRING_LSB"));
47
+ case PM_PACK_STRING_HEX_HIGH:
48
+ return ID2SYM(rb_intern("STRING_HEX_HIGH"));
49
+ case PM_PACK_STRING_HEX_LOW:
50
+ return ID2SYM(rb_intern("STRING_HEX_LOW"));
51
+ case PM_PACK_STRING_UU:
52
+ return ID2SYM(rb_intern("STRING_UU"));
53
+ case PM_PACK_STRING_MIME:
54
+ return ID2SYM(rb_intern("STRING_MIME"));
55
+ case PM_PACK_STRING_BASE64:
56
+ return ID2SYM(rb_intern("STRING_BASE64"));
57
+ case PM_PACK_STRING_FIXED:
58
+ return ID2SYM(rb_intern("STRING_FIXED"));
59
+ case PM_PACK_STRING_POINTER:
60
+ return ID2SYM(rb_intern("STRING_POINTER"));
61
+ case PM_PACK_MOVE:
62
+ return ID2SYM(rb_intern("MOVE"));
63
+ case PM_PACK_BACK:
64
+ return ID2SYM(rb_intern("BACK"));
65
+ case PM_PACK_NULL:
66
+ return ID2SYM(rb_intern("NULL"));
67
+ default:
68
+ return Qnil;
69
+ }
70
+ }
71
+
72
+ static VALUE
73
+ pack_signed_to_symbol(pm_pack_signed signed_type) {
74
+ switch (signed_type) {
75
+ case PM_PACK_UNSIGNED:
76
+ return ID2SYM(rb_intern("UNSIGNED"));
77
+ case PM_PACK_SIGNED:
78
+ return ID2SYM(rb_intern("SIGNED"));
79
+ case PM_PACK_SIGNED_NA:
80
+ return ID2SYM(rb_intern("SIGNED_NA"));
81
+ default:
82
+ return Qnil;
83
+ }
84
+ }
85
+
86
+ static VALUE
87
+ pack_endian_to_symbol(pm_pack_endian endian) {
88
+ switch (endian) {
89
+ case PM_PACK_AGNOSTIC_ENDIAN:
90
+ return ID2SYM(rb_intern("AGNOSTIC_ENDIAN"));
91
+ case PM_PACK_LITTLE_ENDIAN:
92
+ return ID2SYM(rb_intern("LITTLE_ENDIAN"));
93
+ case PM_PACK_BIG_ENDIAN:
94
+ return ID2SYM(rb_intern("BIG_ENDIAN"));
95
+ case PM_PACK_NATIVE_ENDIAN:
96
+ return ID2SYM(rb_intern("NATIVE_ENDIAN"));
97
+ case PM_PACK_ENDIAN_NA:
98
+ return ID2SYM(rb_intern("ENDIAN_NA"));
99
+ default:
100
+ return Qnil;
101
+ }
102
+ }
103
+
104
+ static VALUE
105
+ pack_size_to_symbol(pm_pack_size size) {
106
+ switch (size) {
107
+ case PM_PACK_SIZE_SHORT:
108
+ return ID2SYM(rb_intern("SIZE_SHORT"));
109
+ case PM_PACK_SIZE_INT:
110
+ return ID2SYM(rb_intern("SIZE_INT"));
111
+ case PM_PACK_SIZE_LONG:
112
+ return ID2SYM(rb_intern("SIZE_LONG"));
113
+ case PM_PACK_SIZE_LONG_LONG:
114
+ return ID2SYM(rb_intern("SIZE_LONG_LONG"));
115
+ case PM_PACK_SIZE_8:
116
+ return ID2SYM(rb_intern("SIZE_8"));
117
+ case PM_PACK_SIZE_16:
118
+ return ID2SYM(rb_intern("SIZE_16"));
119
+ case PM_PACK_SIZE_32:
120
+ return ID2SYM(rb_intern("SIZE_32"));
121
+ case PM_PACK_SIZE_64:
122
+ return ID2SYM(rb_intern("SIZE_64"));
123
+ case PM_PACK_SIZE_P:
124
+ return ID2SYM(rb_intern("SIZE_P"));
125
+ case PM_PACK_SIZE_NA:
126
+ return ID2SYM(rb_intern("SIZE_NA"));
127
+ default:
128
+ return Qnil;
129
+ }
130
+ }
131
+
132
+ static VALUE
133
+ pack_length_type_to_symbol(pm_pack_length_type length_type) {
134
+ switch (length_type) {
135
+ case PM_PACK_LENGTH_FIXED:
136
+ return ID2SYM(rb_intern("LENGTH_FIXED"));
137
+ case PM_PACK_LENGTH_MAX:
138
+ return ID2SYM(rb_intern("LENGTH_MAX"));
139
+ case PM_PACK_LENGTH_RELATIVE:
140
+ return ID2SYM(rb_intern("LENGTH_RELATIVE"));
141
+ case PM_PACK_LENGTH_NA:
142
+ return ID2SYM(rb_intern("LENGTH_NA"));
143
+ default:
144
+ return Qnil;
145
+ }
146
+ }
147
+
148
+ static VALUE
149
+ pack_encoding_to_ruby(pm_pack_encoding encoding) {
150
+ int index;
151
+ switch (encoding) {
152
+ case PM_PACK_ENCODING_ASCII_8BIT:
153
+ index = rb_ascii8bit_encindex();
154
+ break;
155
+ case PM_PACK_ENCODING_US_ASCII:
156
+ index = rb_usascii_encindex();
157
+ break;
158
+ case PM_PACK_ENCODING_UTF_8:
159
+ index = rb_utf8_encindex();
160
+ break;
161
+ default:
162
+ return Qnil;
163
+ }
164
+ return rb_enc_from_encoding(rb_enc_from_index(index));
165
+ }
166
+
167
+ static VALUE
168
+ pack_parse(VALUE self, VALUE version_symbol, VALUE variant_symbol, VALUE format_string) {
169
+ if (version_symbol != v3_2_0_symbol) {
170
+ rb_raise(rb_eArgError, "invalid version");
171
+ }
172
+
173
+ pm_pack_variant variant;
174
+ if (variant_symbol == pack_symbol) {
175
+ variant = PM_PACK_VARIANT_PACK;
176
+ } else if (variant_symbol == unpack_symbol) {
177
+ variant = PM_PACK_VARIANT_UNPACK;
178
+ } else {
179
+ rb_raise(rb_eArgError, "invalid variant");
180
+ }
181
+
182
+ StringValue(format_string);
183
+
184
+ const char *format = RSTRING_PTR(format_string);
185
+ const char *format_end = format + RSTRING_LEN(format_string);
186
+ pm_pack_encoding encoding = PM_PACK_ENCODING_START;
187
+
188
+ VALUE directives_array = rb_ary_new();
189
+
190
+ while (format < format_end) {
191
+ pm_pack_type type;
192
+ pm_pack_signed signed_type;
193
+ pm_pack_endian endian;
194
+ pm_pack_size size;
195
+ pm_pack_length_type length_type;
196
+ uint64_t length;
197
+
198
+ const char *directive_start = format;
199
+
200
+ pm_pack_result parse_result = pm_pack_parse(variant, &format, format_end, &type, &signed_type, &endian,
201
+ &size, &length_type, &length, &encoding);
202
+
203
+ const char *directive_end = format;
204
+
205
+ switch (parse_result) {
206
+ case PM_PACK_OK:
207
+ break;
208
+ case PM_PACK_ERROR_UNSUPPORTED_DIRECTIVE:
209
+ rb_raise(rb_eArgError, "unsupported directive");
210
+ case PM_PACK_ERROR_UNKNOWN_DIRECTIVE:
211
+ rb_raise(rb_eArgError, "unsupported directive");
212
+ case PM_PACK_ERROR_LENGTH_TOO_BIG:
213
+ rb_raise(rb_eRangeError, "pack length too big");
214
+ case PM_PACK_ERROR_BANG_NOT_ALLOWED:
215
+ rb_raise(rb_eRangeError, "bang not allowed");
216
+ case PM_PACK_ERROR_DOUBLE_ENDIAN:
217
+ rb_raise(rb_eRangeError, "double endian");
218
+ default:
219
+ rb_bug("parse result");
220
+ }
221
+
222
+ if (type == PM_PACK_END) {
223
+ break;
224
+ }
225
+
226
+ VALUE directive_args[9] = { version_symbol,
227
+ variant_symbol,
228
+ rb_usascii_str_new(directive_start, directive_end - directive_start),
229
+ pack_type_to_symbol(type),
230
+ pack_signed_to_symbol(signed_type),
231
+ pack_endian_to_symbol(endian),
232
+ pack_size_to_symbol(size),
233
+ pack_length_type_to_symbol(length_type),
234
+ UINT64T2NUM(length) };
235
+
236
+ rb_ary_push(directives_array, rb_class_new_instance(9, directive_args, rb_cPrismPackDirective));
237
+ }
238
+
239
+ VALUE format_args[2];
240
+ format_args[0] = directives_array;
241
+ format_args[1] = pack_encoding_to_ruby(encoding);
242
+ return rb_class_new_instance(2, format_args, rb_cPrismPackFormat);
243
+ }
244
+
245
+ void
246
+ Init_prism_pack(void) {
247
+ rb_cPrism = rb_define_module("Prism");
248
+ rb_cPrismPack = rb_define_module_under(rb_cPrism, "Pack");
249
+ rb_cPrismPackDirective = rb_define_class_under(rb_cPrismPack, "Directive", rb_cObject);
250
+ rb_cPrismPackFormat = rb_define_class_under(rb_cPrismPack, "Format", rb_cObject);
251
+ rb_define_singleton_method(rb_cPrismPack, "parse", pack_parse, 3);
252
+
253
+ v3_2_0_symbol = ID2SYM(rb_intern("v3_2_0"));
254
+ pack_symbol = ID2SYM(rb_intern("pack"));
255
+ unpack_symbol = ID2SYM(rb_intern("unpack"));
256
+ }
@@ -0,0 +1,136 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rbconfig"
4
+
5
+ module Prism
6
+ module ExtConf
7
+ class << self
8
+ def configure
9
+ unless RUBY_ENGINE == "ruby"
10
+ # On non-CRuby we only need the shared library, so build only that and not the C extension.
11
+ # We also avoid `require "mkmf"` as that prepends the LLVM toolchain to PATH on TruffleRuby,
12
+ # but we want to use the native toolchain here since librubyparser is run natively.
13
+ build_shared_rubyparser
14
+ File.write("Makefile", "all install clean:\n\t@#{RbConfig::CONFIG["NULLCMD"]}\n")
15
+ return
16
+ end
17
+
18
+ require "mkmf"
19
+ configure_c_extension
20
+ configure_rubyparser
21
+
22
+ create_makefile("prism/prism")
23
+
24
+ if static_link?
25
+ File.open('Makefile', 'a') do |mf|
26
+ mf.puts
27
+ mf.puts '# Automatically rebuild the extension if librubyparser.a changed'
28
+ mf.puts '$(TARGET_SO): $(LOCAL_LIBS)'
29
+ end
30
+ end
31
+ end
32
+
33
+ def configure_c_extension
34
+ append_cflags("-DPRISM_DEBUG_MODE_BUILD") if debug_mode_build?
35
+ append_cflags("-fvisibility=hidden")
36
+ end
37
+
38
+ def configure_rubyparser
39
+ if static_link?
40
+ static_archive_path = File.join(build_dir, "librubyparser.a")
41
+ unless File.exist?(static_archive_path)
42
+ build_static_rubyparser
43
+ end
44
+ $LOCAL_LIBS << " #{static_archive_path}"
45
+ else
46
+ shared_library_path = File.join(build_dir, "librubyparser.#{RbConfig::CONFIG["SOEXT"]}")
47
+ unless File.exist?(shared_library_path)
48
+ build_shared_rubyparser
49
+ end
50
+ unless find_library("rubyparser", "pm_parser_init", build_dir)
51
+ raise "could not link against #{File.basename(shared_library_path)}"
52
+ end
53
+ end
54
+
55
+ find_header("prism.h", include_dir) or raise "prism.h is required"
56
+
57
+ # Explicitly look for the extension header in the parent directory
58
+ # because we want to consistently look for prism/extension.h in our
59
+ # source files to line up with our mirroring in CRuby.
60
+ find_header("prism/extension.h", File.join(__dir__, "..")) or raise "prism/extension.h is required"
61
+ end
62
+
63
+ def build_shared_rubyparser
64
+ build_target_rubyparser "build/librubyparser.#{RbConfig::CONFIG["SOEXT"]}"
65
+ end
66
+
67
+ def build_static_rubyparser
68
+ build_target_rubyparser "build/librubyparser.a"
69
+ end
70
+
71
+ def build_target_rubyparser(target)
72
+ Dir.chdir(root_dir) do
73
+ if !File.exist?("include/prism/ast.h") && Dir.exist?(".git")
74
+ # this block only exists to support building the gem from a "git" source,
75
+ # normally we package up the configure and other files in the gem itself
76
+ system("templates/template.rb", exception: true)
77
+ end
78
+ system("make", target, exception: true)
79
+ end
80
+ end
81
+
82
+ def root_dir
83
+ File.expand_path("../..", __dir__)
84
+ end
85
+
86
+ def include_dir
87
+ File.join(root_dir, "include")
88
+ end
89
+
90
+ def build_dir
91
+ File.join(root_dir, "build")
92
+ end
93
+
94
+ def print_help
95
+ print(<<~TEXT)
96
+ USAGE: ruby #{$PROGRAM_NAME} [options]
97
+
98
+ Flags that are always valid:
99
+
100
+ --enable-static
101
+ --disable-static
102
+ Enable or disable static linking against librubyparser.
103
+ The default is to statically link.
104
+
105
+ --enable-debug-mode-build
106
+ Enable debug mode build.
107
+ You may also use set PRISM_DEBUG_MODE_BUILD environment variable.
108
+
109
+ --help
110
+ Display this message.
111
+
112
+ Environment variables used:
113
+
114
+ PRISM_DEBUG_MODE_BUILD
115
+ Equivalent to `--enable-debug-mode-build` when set, even if nil or blank.
116
+
117
+ TEXT
118
+ end
119
+
120
+ def static_link?
121
+ enable_config("static", true)
122
+ end
123
+
124
+ def debug_mode_build?
125
+ enable_config("debug-mode-build", ENV["PRISM_DEBUG_MODE_BUILD"] || false)
126
+ end
127
+ end
128
+ end
129
+ end
130
+
131
+ if ARGV.delete("--help")
132
+ Prism::ExtConf.print_help
133
+ exit!(0)
134
+ end
135
+
136
+ Prism::ExtConf.configure