yarp 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/CODE_OF_CONDUCT.md +76 -0
  3. data/CONTRIBUTING.md +51 -0
  4. data/LICENSE.md +7 -0
  5. data/Makefile.in +79 -0
  6. data/README.md +86 -0
  7. data/config.h.in +25 -0
  8. data/config.yml +2147 -0
  9. data/configure +4487 -0
  10. data/docs/build_system.md +85 -0
  11. data/docs/building.md +26 -0
  12. data/docs/configuration.md +56 -0
  13. data/docs/design.md +53 -0
  14. data/docs/encoding.md +116 -0
  15. data/docs/extension.md +20 -0
  16. data/docs/fuzzing.md +93 -0
  17. data/docs/heredocs.md +36 -0
  18. data/docs/mapping.md +117 -0
  19. data/docs/ripper.md +36 -0
  20. data/docs/serialization.md +130 -0
  21. data/docs/testing.md +55 -0
  22. data/ext/yarp/api_node.c +3680 -0
  23. data/ext/yarp/api_pack.c +256 -0
  24. data/ext/yarp/extconf.rb +131 -0
  25. data/ext/yarp/extension.c +547 -0
  26. data/ext/yarp/extension.h +18 -0
  27. data/include/yarp/ast.h +1412 -0
  28. data/include/yarp/defines.h +54 -0
  29. data/include/yarp/diagnostic.h +24 -0
  30. data/include/yarp/enc/yp_encoding.h +94 -0
  31. data/include/yarp/node.h +36 -0
  32. data/include/yarp/pack.h +141 -0
  33. data/include/yarp/parser.h +389 -0
  34. data/include/yarp/regexp.h +19 -0
  35. data/include/yarp/unescape.h +42 -0
  36. data/include/yarp/util/yp_buffer.h +39 -0
  37. data/include/yarp/util/yp_char.h +75 -0
  38. data/include/yarp/util/yp_constant_pool.h +64 -0
  39. data/include/yarp/util/yp_list.h +67 -0
  40. data/include/yarp/util/yp_memchr.h +14 -0
  41. data/include/yarp/util/yp_newline_list.h +54 -0
  42. data/include/yarp/util/yp_state_stack.h +24 -0
  43. data/include/yarp/util/yp_string.h +57 -0
  44. data/include/yarp/util/yp_string_list.h +28 -0
  45. data/include/yarp/util/yp_strpbrk.h +29 -0
  46. data/include/yarp/version.h +5 -0
  47. data/include/yarp.h +69 -0
  48. data/lib/yarp/lex_compat.rb +759 -0
  49. data/lib/yarp/node.rb +7428 -0
  50. data/lib/yarp/pack.rb +185 -0
  51. data/lib/yarp/ripper_compat.rb +174 -0
  52. data/lib/yarp/serialize.rb +389 -0
  53. data/lib/yarp.rb +330 -0
  54. data/src/diagnostic.c +25 -0
  55. data/src/enc/yp_big5.c +79 -0
  56. data/src/enc/yp_euc_jp.c +85 -0
  57. data/src/enc/yp_gbk.c +88 -0
  58. data/src/enc/yp_shift_jis.c +83 -0
  59. data/src/enc/yp_tables.c +509 -0
  60. data/src/enc/yp_unicode.c +2320 -0
  61. data/src/enc/yp_windows_31j.c +83 -0
  62. data/src/node.c +2011 -0
  63. data/src/pack.c +493 -0
  64. data/src/prettyprint.c +1782 -0
  65. data/src/regexp.c +580 -0
  66. data/src/serialize.c +1576 -0
  67. data/src/token_type.c +347 -0
  68. data/src/unescape.c +576 -0
  69. data/src/util/yp_buffer.c +78 -0
  70. data/src/util/yp_char.c +229 -0
  71. data/src/util/yp_constant_pool.c +147 -0
  72. data/src/util/yp_list.c +50 -0
  73. data/src/util/yp_memchr.c +31 -0
  74. data/src/util/yp_newline_list.c +119 -0
  75. data/src/util/yp_state_stack.c +25 -0
  76. data/src/util/yp_string.c +207 -0
  77. data/src/util/yp_string_list.c +32 -0
  78. data/src/util/yp_strncasecmp.c +20 -0
  79. data/src/util/yp_strpbrk.c +66 -0
  80. data/src/yarp.c +13211 -0
  81. data/yarp.gemspec +100 -0
  82. metadata +125 -0
@@ -0,0 +1,256 @@
1
+ #include "yarp/extension.h"
2
+
3
+ static VALUE rb_cYARP;
4
+ static VALUE rb_cYARPPack;
5
+ static VALUE rb_cYARPPackDirective;
6
+ static VALUE rb_cYARPPackFormat;
7
+
8
+ static VALUE v3_2_0_symbol;
9
+ static VALUE pack_symbol;
10
+ static VALUE unpack_symbol;
11
+
12
+ #if SIZEOF_UINT64_T == SIZEOF_LONG_LONG
13
+ # define UINT64T2NUM(x) ULL2NUM(x)
14
+ # define NUM2UINT64T(x) (uint64_t)NUM2ULL(x)
15
+ #elif SIZEOF_UINT64_T == SIZEOF_LONG
16
+ # define UINT64T2NUM(x) ULONG2NUM(x)
17
+ # define NUM2UINT64T(x) (uint64_t)NUM2ULONG(x)
18
+ #else
19
+ // error No uint64_t conversion
20
+ #endif
21
+
22
+ static VALUE
23
+ pack_type_to_symbol(yp_pack_type type) {
24
+ switch (type) {
25
+ case YP_PACK_SPACE:
26
+ return ID2SYM(rb_intern("SPACE"));
27
+ case YP_PACK_COMMENT:
28
+ return ID2SYM(rb_intern("COMMENT"));
29
+ case YP_PACK_INTEGER:
30
+ return ID2SYM(rb_intern("INTEGER"));
31
+ case YP_PACK_UTF8:
32
+ return ID2SYM(rb_intern("UTF8"));
33
+ case YP_PACK_BER:
34
+ return ID2SYM(rb_intern("BER"));
35
+ case YP_PACK_FLOAT:
36
+ return ID2SYM(rb_intern("FLOAT"));
37
+ case YP_PACK_STRING_SPACE_PADDED:
38
+ return ID2SYM(rb_intern("STRING_SPACE_PADDED"));
39
+ case YP_PACK_STRING_NULL_PADDED:
40
+ return ID2SYM(rb_intern("STRING_NULL_PADDED"));
41
+ case YP_PACK_STRING_NULL_TERMINATED:
42
+ return ID2SYM(rb_intern("STRING_NULL_TERMINATED"));
43
+ case YP_PACK_STRING_MSB:
44
+ return ID2SYM(rb_intern("STRING_MSB"));
45
+ case YP_PACK_STRING_LSB:
46
+ return ID2SYM(rb_intern("STRING_LSB"));
47
+ case YP_PACK_STRING_HEX_HIGH:
48
+ return ID2SYM(rb_intern("STRING_HEX_HIGH"));
49
+ case YP_PACK_STRING_HEX_LOW:
50
+ return ID2SYM(rb_intern("STRING_HEX_LOW"));
51
+ case YP_PACK_STRING_UU:
52
+ return ID2SYM(rb_intern("STRING_UU"));
53
+ case YP_PACK_STRING_MIME:
54
+ return ID2SYM(rb_intern("STRING_MIME"));
55
+ case YP_PACK_STRING_BASE64:
56
+ return ID2SYM(rb_intern("STRING_BASE64"));
57
+ case YP_PACK_STRING_FIXED:
58
+ return ID2SYM(rb_intern("STRING_FIXED"));
59
+ case YP_PACK_STRING_POINTER:
60
+ return ID2SYM(rb_intern("STRING_POINTER"));
61
+ case YP_PACK_MOVE:
62
+ return ID2SYM(rb_intern("MOVE"));
63
+ case YP_PACK_BACK:
64
+ return ID2SYM(rb_intern("BACK"));
65
+ case YP_PACK_NULL:
66
+ return ID2SYM(rb_intern("NULL"));
67
+ default:
68
+ return Qnil;
69
+ }
70
+ }
71
+
72
+ static VALUE
73
+ pack_signed_to_symbol(yp_pack_signed signed_type) {
74
+ switch (signed_type) {
75
+ case YP_PACK_UNSIGNED:
76
+ return ID2SYM(rb_intern("UNSIGNED"));
77
+ case YP_PACK_SIGNED:
78
+ return ID2SYM(rb_intern("SIGNED"));
79
+ case YP_PACK_SIGNED_NA:
80
+ return ID2SYM(rb_intern("SIGNED_NA"));
81
+ default:
82
+ return Qnil;
83
+ }
84
+ }
85
+
86
+ static VALUE
87
+ pack_endian_to_symbol(yp_pack_endian endian) {
88
+ switch (endian) {
89
+ case YP_PACK_AGNOSTIC_ENDIAN:
90
+ return ID2SYM(rb_intern("AGNOSTIC_ENDIAN"));
91
+ case YP_PACK_LITTLE_ENDIAN:
92
+ return ID2SYM(rb_intern("LITTLE_ENDIAN"));
93
+ case YP_PACK_BIG_ENDIAN:
94
+ return ID2SYM(rb_intern("BIG_ENDIAN"));
95
+ case YP_PACK_NATIVE_ENDIAN:
96
+ return ID2SYM(rb_intern("NATIVE_ENDIAN"));
97
+ case YP_PACK_ENDIAN_NA:
98
+ return ID2SYM(rb_intern("ENDIAN_NA"));
99
+ default:
100
+ return Qnil;
101
+ }
102
+ }
103
+
104
+ static VALUE
105
+ pack_size_to_symbol(yp_pack_size size) {
106
+ switch (size) {
107
+ case YP_PACK_SIZE_SHORT:
108
+ return ID2SYM(rb_intern("SIZE_SHORT"));
109
+ case YP_PACK_SIZE_INT:
110
+ return ID2SYM(rb_intern("SIZE_INT"));
111
+ case YP_PACK_SIZE_LONG:
112
+ return ID2SYM(rb_intern("SIZE_LONG"));
113
+ case YP_PACK_SIZE_LONG_LONG:
114
+ return ID2SYM(rb_intern("SIZE_LONG_LONG"));
115
+ case YP_PACK_SIZE_8:
116
+ return ID2SYM(rb_intern("SIZE_8"));
117
+ case YP_PACK_SIZE_16:
118
+ return ID2SYM(rb_intern("SIZE_16"));
119
+ case YP_PACK_SIZE_32:
120
+ return ID2SYM(rb_intern("SIZE_32"));
121
+ case YP_PACK_SIZE_64:
122
+ return ID2SYM(rb_intern("SIZE_64"));
123
+ case YP_PACK_SIZE_P:
124
+ return ID2SYM(rb_intern("SIZE_P"));
125
+ case YP_PACK_SIZE_NA:
126
+ return ID2SYM(rb_intern("SIZE_NA"));
127
+ default:
128
+ return Qnil;
129
+ }
130
+ }
131
+
132
+ static VALUE
133
+ pack_length_type_to_symbol(yp_pack_length_type length_type) {
134
+ switch (length_type) {
135
+ case YP_PACK_LENGTH_FIXED:
136
+ return ID2SYM(rb_intern("LENGTH_FIXED"));
137
+ case YP_PACK_LENGTH_MAX:
138
+ return ID2SYM(rb_intern("LENGTH_MAX"));
139
+ case YP_PACK_LENGTH_RELATIVE:
140
+ return ID2SYM(rb_intern("LENGTH_RELATIVE"));
141
+ case YP_PACK_LENGTH_NA:
142
+ return ID2SYM(rb_intern("LENGTH_NA"));
143
+ default:
144
+ return Qnil;
145
+ }
146
+ }
147
+
148
+ static VALUE
149
+ pack_encoding_to_ruby(yp_pack_encoding encoding) {
150
+ int index;
151
+ switch (encoding) {
152
+ case YP_PACK_ENCODING_ASCII_8BIT:
153
+ index = rb_ascii8bit_encindex();
154
+ break;
155
+ case YP_PACK_ENCODING_US_ASCII:
156
+ index = rb_usascii_encindex();
157
+ break;
158
+ case YP_PACK_ENCODING_UTF_8:
159
+ index = rb_utf8_encindex();
160
+ break;
161
+ default:
162
+ return Qnil;
163
+ }
164
+ return rb_enc_from_encoding(rb_enc_from_index(index));
165
+ }
166
+
167
+ static VALUE
168
+ pack_parse(VALUE self, VALUE version_symbol, VALUE variant_symbol, VALUE format_string) {
169
+ if (version_symbol != v3_2_0_symbol) {
170
+ rb_raise(rb_eArgError, "invalid version");
171
+ }
172
+
173
+ yp_pack_variant variant;
174
+ if (variant_symbol == pack_symbol) {
175
+ variant = YP_PACK_VARIANT_PACK;
176
+ } else if (variant_symbol == unpack_symbol) {
177
+ variant = YP_PACK_VARIANT_UNPACK;
178
+ } else {
179
+ rb_raise(rb_eArgError, "invalid variant");
180
+ }
181
+
182
+ StringValue(format_string);
183
+
184
+ const char *format = RSTRING_PTR(format_string);
185
+ const char *format_end = format + RSTRING_LEN(format_string);
186
+ yp_pack_encoding encoding = YP_PACK_ENCODING_START;
187
+
188
+ VALUE directives_array = rb_ary_new();
189
+
190
+ while (format < format_end) {
191
+ yp_pack_type type;
192
+ yp_pack_signed signed_type;
193
+ yp_pack_endian endian;
194
+ yp_pack_size size;
195
+ yp_pack_length_type length_type;
196
+ uint64_t length;
197
+
198
+ const char *directive_start = format;
199
+
200
+ yp_pack_result parse_result = yp_pack_parse(variant, &format, format_end, &type, &signed_type, &endian,
201
+ &size, &length_type, &length, &encoding);
202
+
203
+ const char *directive_end = format;
204
+
205
+ switch (parse_result) {
206
+ case YP_PACK_OK:
207
+ break;
208
+ case YP_PACK_ERROR_UNSUPPORTED_DIRECTIVE:
209
+ rb_raise(rb_eArgError, "unsupported directive");
210
+ case YP_PACK_ERROR_UNKNOWN_DIRECTIVE:
211
+ rb_raise(rb_eArgError, "unsupported directive");
212
+ case YP_PACK_ERROR_LENGTH_TOO_BIG:
213
+ rb_raise(rb_eRangeError, "pack length too big");
214
+ case YP_PACK_ERROR_BANG_NOT_ALLOWED:
215
+ rb_raise(rb_eRangeError, "bang not allowed");
216
+ case YP_PACK_ERROR_DOUBLE_ENDIAN:
217
+ rb_raise(rb_eRangeError, "double endian");
218
+ default:
219
+ rb_bug("parse result");
220
+ }
221
+
222
+ if (type == YP_PACK_END) {
223
+ break;
224
+ }
225
+
226
+ VALUE directive_args[9] = { version_symbol,
227
+ variant_symbol,
228
+ rb_usascii_str_new(directive_start, directive_end - directive_start),
229
+ pack_type_to_symbol(type),
230
+ pack_signed_to_symbol(signed_type),
231
+ pack_endian_to_symbol(endian),
232
+ pack_size_to_symbol(size),
233
+ pack_length_type_to_symbol(length_type),
234
+ UINT64T2NUM(length) };
235
+
236
+ rb_ary_push(directives_array, rb_class_new_instance(9, directive_args, rb_cYARPPackDirective));
237
+ }
238
+
239
+ VALUE format_args[2];
240
+ format_args[0] = directives_array;
241
+ format_args[1] = pack_encoding_to_ruby(encoding);
242
+ return rb_class_new_instance(2, format_args, rb_cYARPPackFormat);
243
+ }
244
+
245
+ void
246
+ Init_yarp_pack(void) {
247
+ rb_cYARP = rb_define_module("YARP");
248
+ rb_cYARPPack = rb_define_module_under(rb_cYARP, "Pack");
249
+ rb_cYARPPackDirective = rb_define_class_under(rb_cYARPPack, "Directive", rb_cObject);
250
+ rb_cYARPPackFormat = rb_define_class_under(rb_cYARPPack, "Format", rb_cObject);
251
+ rb_define_singleton_method(rb_cYARPPack, "parse", pack_parse, 3);
252
+
253
+ v3_2_0_symbol = ID2SYM(rb_intern("v3_2_0"));
254
+ pack_symbol = ID2SYM(rb_intern("pack"));
255
+ unpack_symbol = ID2SYM(rb_intern("unpack"));
256
+ }
@@ -0,0 +1,131 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "mkmf"
4
+ require "rbconfig"
5
+ require "rake"
6
+
7
+ module Yarp
8
+ module ExtConf
9
+ class << self
10
+ def configure
11
+ configure_c_extension
12
+ configure_rubyparser
13
+
14
+ create_makefile("yarp/yarp")
15
+
16
+ if static_link?
17
+ File.open('Makefile', 'a') do |mf|
18
+ mf.puts
19
+ mf.puts '# Automatically rebuild the extension if librubyparser.a changed'
20
+ mf.puts '$(TARGET_SO): $(LOCAL_LIBS)'
21
+ end
22
+ end
23
+ end
24
+
25
+ def configure_c_extension
26
+ append_cflags("-DYARP_DEBUG_MODE_BUILD") if debug_mode_build?
27
+ append_cflags("-fvisibility=hidden")
28
+ end
29
+
30
+ def configure_rubyparser
31
+ if static_link?
32
+ static_archive_path = File.join(build_dir, "librubyparser.a")
33
+ unless File.exist?(static_archive_path)
34
+ build_static_rubyparser
35
+ end
36
+ $LOCAL_LIBS << " #{static_archive_path}"
37
+ else
38
+ shared_library_path = File.join(build_dir, "librubyparser.#{RbConfig::CONFIG["DLEXT"]}")
39
+ unless File.exist?(shared_library_path)
40
+ build_shared_rubyparser
41
+ end
42
+ unless find_library("rubyparser", "yp_parser_init", build_dir)
43
+ raise "could not link against #{File.basename(shared_library_path)}"
44
+ end
45
+ end
46
+
47
+ find_header("yarp.h", include_dir) or raise "yarp.h is required"
48
+
49
+ # Explicitly look for the extension header in the parent directory
50
+ # because we want to consistently look for yarp/extension.h in our
51
+ # source files to line up with our mirroring in CRuby.
52
+ find_header("yarp/extension.h", File.join(__dir__, "..")) or raise "yarp/extension.h is required"
53
+ end
54
+
55
+ def build_shared_rubyparser
56
+ build_target_rubyparser "build/librubyparser.#{RbConfig::CONFIG["SOEXT"]}"
57
+ end
58
+
59
+ def build_static_rubyparser
60
+ build_target_rubyparser "build/librubyparser.a"
61
+ end
62
+
63
+ def build_target_rubyparser(target)
64
+ Dir.chdir(root_dir) do
65
+ if !File.exist?("configure") && Dir.exist?(".git")
66
+ # this block only exists to support building the gem from a "git" source,
67
+ # normally we package up the configure and other files in the gem itself
68
+ Rake.sh("autoconf")
69
+ Rake.sh("autoheader")
70
+ Rake.sh("templates/template.rb")
71
+ end
72
+ Rake.sh("sh", "configure") # explicit "sh" for Windows where shebangs are not supported
73
+ Rake.sh("make", target)
74
+ end
75
+ end
76
+
77
+ def root_dir
78
+ File.expand_path("../..", __dir__)
79
+ end
80
+
81
+ def include_dir
82
+ File.join(root_dir, "include")
83
+ end
84
+
85
+ def build_dir
86
+ File.join(root_dir, "build")
87
+ end
88
+
89
+ def print_help
90
+ print(<<~TEXT)
91
+ USAGE: ruby #{$PROGRAM_NAME} [options]
92
+
93
+ Flags that are always valid:
94
+
95
+ --enable-static
96
+ --disable-static
97
+ Enable or disable static linking against librubyparser.
98
+ The default is to statically link.
99
+
100
+ --enable-debug-mode-build
101
+ Enable debug mode build.
102
+ You may also use set YARP_DEBUG_MODE_BUILD environment variable.
103
+
104
+ --help
105
+ Display this message.
106
+
107
+ Environment variables used:
108
+
109
+ YARP_DEBUG_MODE_BUILD
110
+ Equivalent to `--enable-debug-mode-build` when set, even if nil or blank.
111
+
112
+ TEXT
113
+ end
114
+
115
+ def static_link?
116
+ enable_config("static", true)
117
+ end
118
+
119
+ def debug_mode_build?
120
+ enable_config("debug-mode-build", ENV["YARP_DEBUG_MODE_BUILD"] || false)
121
+ end
122
+ end
123
+ end
124
+ end
125
+
126
+ if arg_config("--help")
127
+ Yarp::ExtConf.print_help
128
+ exit!(0)
129
+ end
130
+
131
+ Yarp::ExtConf.configure