yarp 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/CODE_OF_CONDUCT.md +76 -0
  3. data/CONTRIBUTING.md +51 -0
  4. data/LICENSE.md +7 -0
  5. data/Makefile.in +79 -0
  6. data/README.md +86 -0
  7. data/config.h.in +25 -0
  8. data/config.yml +2147 -0
  9. data/configure +4487 -0
  10. data/docs/build_system.md +85 -0
  11. data/docs/building.md +26 -0
  12. data/docs/configuration.md +56 -0
  13. data/docs/design.md +53 -0
  14. data/docs/encoding.md +116 -0
  15. data/docs/extension.md +20 -0
  16. data/docs/fuzzing.md +93 -0
  17. data/docs/heredocs.md +36 -0
  18. data/docs/mapping.md +117 -0
  19. data/docs/ripper.md +36 -0
  20. data/docs/serialization.md +130 -0
  21. data/docs/testing.md +55 -0
  22. data/ext/yarp/api_node.c +3680 -0
  23. data/ext/yarp/api_pack.c +256 -0
  24. data/ext/yarp/extconf.rb +131 -0
  25. data/ext/yarp/extension.c +547 -0
  26. data/ext/yarp/extension.h +18 -0
  27. data/include/yarp/ast.h +1412 -0
  28. data/include/yarp/defines.h +54 -0
  29. data/include/yarp/diagnostic.h +24 -0
  30. data/include/yarp/enc/yp_encoding.h +94 -0
  31. data/include/yarp/node.h +36 -0
  32. data/include/yarp/pack.h +141 -0
  33. data/include/yarp/parser.h +389 -0
  34. data/include/yarp/regexp.h +19 -0
  35. data/include/yarp/unescape.h +42 -0
  36. data/include/yarp/util/yp_buffer.h +39 -0
  37. data/include/yarp/util/yp_char.h +75 -0
  38. data/include/yarp/util/yp_constant_pool.h +64 -0
  39. data/include/yarp/util/yp_list.h +67 -0
  40. data/include/yarp/util/yp_memchr.h +14 -0
  41. data/include/yarp/util/yp_newline_list.h +54 -0
  42. data/include/yarp/util/yp_state_stack.h +24 -0
  43. data/include/yarp/util/yp_string.h +57 -0
  44. data/include/yarp/util/yp_string_list.h +28 -0
  45. data/include/yarp/util/yp_strpbrk.h +29 -0
  46. data/include/yarp/version.h +5 -0
  47. data/include/yarp.h +69 -0
  48. data/lib/yarp/lex_compat.rb +759 -0
  49. data/lib/yarp/node.rb +7428 -0
  50. data/lib/yarp/pack.rb +185 -0
  51. data/lib/yarp/ripper_compat.rb +174 -0
  52. data/lib/yarp/serialize.rb +389 -0
  53. data/lib/yarp.rb +330 -0
  54. data/src/diagnostic.c +25 -0
  55. data/src/enc/yp_big5.c +79 -0
  56. data/src/enc/yp_euc_jp.c +85 -0
  57. data/src/enc/yp_gbk.c +88 -0
  58. data/src/enc/yp_shift_jis.c +83 -0
  59. data/src/enc/yp_tables.c +509 -0
  60. data/src/enc/yp_unicode.c +2320 -0
  61. data/src/enc/yp_windows_31j.c +83 -0
  62. data/src/node.c +2011 -0
  63. data/src/pack.c +493 -0
  64. data/src/prettyprint.c +1782 -0
  65. data/src/regexp.c +580 -0
  66. data/src/serialize.c +1576 -0
  67. data/src/token_type.c +347 -0
  68. data/src/unescape.c +576 -0
  69. data/src/util/yp_buffer.c +78 -0
  70. data/src/util/yp_char.c +229 -0
  71. data/src/util/yp_constant_pool.c +147 -0
  72. data/src/util/yp_list.c +50 -0
  73. data/src/util/yp_memchr.c +31 -0
  74. data/src/util/yp_newline_list.c +119 -0
  75. data/src/util/yp_state_stack.c +25 -0
  76. data/src/util/yp_string.c +207 -0
  77. data/src/util/yp_string_list.c +32 -0
  78. data/src/util/yp_strncasecmp.c +20 -0
  79. data/src/util/yp_strpbrk.c +66 -0
  80. data/src/yarp.c +13211 -0
  81. data/yarp.gemspec +100 -0
  82. metadata +125 -0
@@ -0,0 +1,256 @@
1
+ #include "yarp/extension.h"
2
+
3
+ static VALUE rb_cYARP;
4
+ static VALUE rb_cYARPPack;
5
+ static VALUE rb_cYARPPackDirective;
6
+ static VALUE rb_cYARPPackFormat;
7
+
8
+ static VALUE v3_2_0_symbol;
9
+ static VALUE pack_symbol;
10
+ static VALUE unpack_symbol;
11
+
12
+ #if SIZEOF_UINT64_T == SIZEOF_LONG_LONG
13
+ # define UINT64T2NUM(x) ULL2NUM(x)
14
+ # define NUM2UINT64T(x) (uint64_t)NUM2ULL(x)
15
+ #elif SIZEOF_UINT64_T == SIZEOF_LONG
16
+ # define UINT64T2NUM(x) ULONG2NUM(x)
17
+ # define NUM2UINT64T(x) (uint64_t)NUM2ULONG(x)
18
+ #else
19
+ // error No uint64_t conversion
20
+ #endif
21
+
22
+ static VALUE
23
+ pack_type_to_symbol(yp_pack_type type) {
24
+ switch (type) {
25
+ case YP_PACK_SPACE:
26
+ return ID2SYM(rb_intern("SPACE"));
27
+ case YP_PACK_COMMENT:
28
+ return ID2SYM(rb_intern("COMMENT"));
29
+ case YP_PACK_INTEGER:
30
+ return ID2SYM(rb_intern("INTEGER"));
31
+ case YP_PACK_UTF8:
32
+ return ID2SYM(rb_intern("UTF8"));
33
+ case YP_PACK_BER:
34
+ return ID2SYM(rb_intern("BER"));
35
+ case YP_PACK_FLOAT:
36
+ return ID2SYM(rb_intern("FLOAT"));
37
+ case YP_PACK_STRING_SPACE_PADDED:
38
+ return ID2SYM(rb_intern("STRING_SPACE_PADDED"));
39
+ case YP_PACK_STRING_NULL_PADDED:
40
+ return ID2SYM(rb_intern("STRING_NULL_PADDED"));
41
+ case YP_PACK_STRING_NULL_TERMINATED:
42
+ return ID2SYM(rb_intern("STRING_NULL_TERMINATED"));
43
+ case YP_PACK_STRING_MSB:
44
+ return ID2SYM(rb_intern("STRING_MSB"));
45
+ case YP_PACK_STRING_LSB:
46
+ return ID2SYM(rb_intern("STRING_LSB"));
47
+ case YP_PACK_STRING_HEX_HIGH:
48
+ return ID2SYM(rb_intern("STRING_HEX_HIGH"));
49
+ case YP_PACK_STRING_HEX_LOW:
50
+ return ID2SYM(rb_intern("STRING_HEX_LOW"));
51
+ case YP_PACK_STRING_UU:
52
+ return ID2SYM(rb_intern("STRING_UU"));
53
+ case YP_PACK_STRING_MIME:
54
+ return ID2SYM(rb_intern("STRING_MIME"));
55
+ case YP_PACK_STRING_BASE64:
56
+ return ID2SYM(rb_intern("STRING_BASE64"));
57
+ case YP_PACK_STRING_FIXED:
58
+ return ID2SYM(rb_intern("STRING_FIXED"));
59
+ case YP_PACK_STRING_POINTER:
60
+ return ID2SYM(rb_intern("STRING_POINTER"));
61
+ case YP_PACK_MOVE:
62
+ return ID2SYM(rb_intern("MOVE"));
63
+ case YP_PACK_BACK:
64
+ return ID2SYM(rb_intern("BACK"));
65
+ case YP_PACK_NULL:
66
+ return ID2SYM(rb_intern("NULL"));
67
+ default:
68
+ return Qnil;
69
+ }
70
+ }
71
+
72
+ static VALUE
73
+ pack_signed_to_symbol(yp_pack_signed signed_type) {
74
+ switch (signed_type) {
75
+ case YP_PACK_UNSIGNED:
76
+ return ID2SYM(rb_intern("UNSIGNED"));
77
+ case YP_PACK_SIGNED:
78
+ return ID2SYM(rb_intern("SIGNED"));
79
+ case YP_PACK_SIGNED_NA:
80
+ return ID2SYM(rb_intern("SIGNED_NA"));
81
+ default:
82
+ return Qnil;
83
+ }
84
+ }
85
+
86
+ static VALUE
87
+ pack_endian_to_symbol(yp_pack_endian endian) {
88
+ switch (endian) {
89
+ case YP_PACK_AGNOSTIC_ENDIAN:
90
+ return ID2SYM(rb_intern("AGNOSTIC_ENDIAN"));
91
+ case YP_PACK_LITTLE_ENDIAN:
92
+ return ID2SYM(rb_intern("LITTLE_ENDIAN"));
93
+ case YP_PACK_BIG_ENDIAN:
94
+ return ID2SYM(rb_intern("BIG_ENDIAN"));
95
+ case YP_PACK_NATIVE_ENDIAN:
96
+ return ID2SYM(rb_intern("NATIVE_ENDIAN"));
97
+ case YP_PACK_ENDIAN_NA:
98
+ return ID2SYM(rb_intern("ENDIAN_NA"));
99
+ default:
100
+ return Qnil;
101
+ }
102
+ }
103
+
104
+ static VALUE
105
+ pack_size_to_symbol(yp_pack_size size) {
106
+ switch (size) {
107
+ case YP_PACK_SIZE_SHORT:
108
+ return ID2SYM(rb_intern("SIZE_SHORT"));
109
+ case YP_PACK_SIZE_INT:
110
+ return ID2SYM(rb_intern("SIZE_INT"));
111
+ case YP_PACK_SIZE_LONG:
112
+ return ID2SYM(rb_intern("SIZE_LONG"));
113
+ case YP_PACK_SIZE_LONG_LONG:
114
+ return ID2SYM(rb_intern("SIZE_LONG_LONG"));
115
+ case YP_PACK_SIZE_8:
116
+ return ID2SYM(rb_intern("SIZE_8"));
117
+ case YP_PACK_SIZE_16:
118
+ return ID2SYM(rb_intern("SIZE_16"));
119
+ case YP_PACK_SIZE_32:
120
+ return ID2SYM(rb_intern("SIZE_32"));
121
+ case YP_PACK_SIZE_64:
122
+ return ID2SYM(rb_intern("SIZE_64"));
123
+ case YP_PACK_SIZE_P:
124
+ return ID2SYM(rb_intern("SIZE_P"));
125
+ case YP_PACK_SIZE_NA:
126
+ return ID2SYM(rb_intern("SIZE_NA"));
127
+ default:
128
+ return Qnil;
129
+ }
130
+ }
131
+
132
+ static VALUE
133
+ pack_length_type_to_symbol(yp_pack_length_type length_type) {
134
+ switch (length_type) {
135
+ case YP_PACK_LENGTH_FIXED:
136
+ return ID2SYM(rb_intern("LENGTH_FIXED"));
137
+ case YP_PACK_LENGTH_MAX:
138
+ return ID2SYM(rb_intern("LENGTH_MAX"));
139
+ case YP_PACK_LENGTH_RELATIVE:
140
+ return ID2SYM(rb_intern("LENGTH_RELATIVE"));
141
+ case YP_PACK_LENGTH_NA:
142
+ return ID2SYM(rb_intern("LENGTH_NA"));
143
+ default:
144
+ return Qnil;
145
+ }
146
+ }
147
+
148
+ static VALUE
149
+ pack_encoding_to_ruby(yp_pack_encoding encoding) {
150
+ int index;
151
+ switch (encoding) {
152
+ case YP_PACK_ENCODING_ASCII_8BIT:
153
+ index = rb_ascii8bit_encindex();
154
+ break;
155
+ case YP_PACK_ENCODING_US_ASCII:
156
+ index = rb_usascii_encindex();
157
+ break;
158
+ case YP_PACK_ENCODING_UTF_8:
159
+ index = rb_utf8_encindex();
160
+ break;
161
+ default:
162
+ return Qnil;
163
+ }
164
+ return rb_enc_from_encoding(rb_enc_from_index(index));
165
+ }
166
+
167
+ static VALUE
168
+ pack_parse(VALUE self, VALUE version_symbol, VALUE variant_symbol, VALUE format_string) {
169
+ if (version_symbol != v3_2_0_symbol) {
170
+ rb_raise(rb_eArgError, "invalid version");
171
+ }
172
+
173
+ yp_pack_variant variant;
174
+ if (variant_symbol == pack_symbol) {
175
+ variant = YP_PACK_VARIANT_PACK;
176
+ } else if (variant_symbol == unpack_symbol) {
177
+ variant = YP_PACK_VARIANT_UNPACK;
178
+ } else {
179
+ rb_raise(rb_eArgError, "invalid variant");
180
+ }
181
+
182
+ StringValue(format_string);
183
+
184
+ const char *format = RSTRING_PTR(format_string);
185
+ const char *format_end = format + RSTRING_LEN(format_string);
186
+ yp_pack_encoding encoding = YP_PACK_ENCODING_START;
187
+
188
+ VALUE directives_array = rb_ary_new();
189
+
190
+ while (format < format_end) {
191
+ yp_pack_type type;
192
+ yp_pack_signed signed_type;
193
+ yp_pack_endian endian;
194
+ yp_pack_size size;
195
+ yp_pack_length_type length_type;
196
+ uint64_t length;
197
+
198
+ const char *directive_start = format;
199
+
200
+ yp_pack_result parse_result = yp_pack_parse(variant, &format, format_end, &type, &signed_type, &endian,
201
+ &size, &length_type, &length, &encoding);
202
+
203
+ const char *directive_end = format;
204
+
205
+ switch (parse_result) {
206
+ case YP_PACK_OK:
207
+ break;
208
+ case YP_PACK_ERROR_UNSUPPORTED_DIRECTIVE:
209
+ rb_raise(rb_eArgError, "unsupported directive");
210
+ case YP_PACK_ERROR_UNKNOWN_DIRECTIVE:
211
+ rb_raise(rb_eArgError, "unsupported directive");
212
+ case YP_PACK_ERROR_LENGTH_TOO_BIG:
213
+ rb_raise(rb_eRangeError, "pack length too big");
214
+ case YP_PACK_ERROR_BANG_NOT_ALLOWED:
215
+ rb_raise(rb_eRangeError, "bang not allowed");
216
+ case YP_PACK_ERROR_DOUBLE_ENDIAN:
217
+ rb_raise(rb_eRangeError, "double endian");
218
+ default:
219
+ rb_bug("parse result");
220
+ }
221
+
222
+ if (type == YP_PACK_END) {
223
+ break;
224
+ }
225
+
226
+ VALUE directive_args[9] = { version_symbol,
227
+ variant_symbol,
228
+ rb_usascii_str_new(directive_start, directive_end - directive_start),
229
+ pack_type_to_symbol(type),
230
+ pack_signed_to_symbol(signed_type),
231
+ pack_endian_to_symbol(endian),
232
+ pack_size_to_symbol(size),
233
+ pack_length_type_to_symbol(length_type),
234
+ UINT64T2NUM(length) };
235
+
236
+ rb_ary_push(directives_array, rb_class_new_instance(9, directive_args, rb_cYARPPackDirective));
237
+ }
238
+
239
+ VALUE format_args[2];
240
+ format_args[0] = directives_array;
241
+ format_args[1] = pack_encoding_to_ruby(encoding);
242
+ return rb_class_new_instance(2, format_args, rb_cYARPPackFormat);
243
+ }
244
+
245
+ void
246
+ Init_yarp_pack(void) {
247
+ rb_cYARP = rb_define_module("YARP");
248
+ rb_cYARPPack = rb_define_module_under(rb_cYARP, "Pack");
249
+ rb_cYARPPackDirective = rb_define_class_under(rb_cYARPPack, "Directive", rb_cObject);
250
+ rb_cYARPPackFormat = rb_define_class_under(rb_cYARPPack, "Format", rb_cObject);
251
+ rb_define_singleton_method(rb_cYARPPack, "parse", pack_parse, 3);
252
+
253
+ v3_2_0_symbol = ID2SYM(rb_intern("v3_2_0"));
254
+ pack_symbol = ID2SYM(rb_intern("pack"));
255
+ unpack_symbol = ID2SYM(rb_intern("unpack"));
256
+ }
@@ -0,0 +1,131 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "mkmf"
4
+ require "rbconfig"
5
+ require "rake"
6
+
7
+ module Yarp
8
+ module ExtConf
9
+ class << self
10
+ def configure
11
+ configure_c_extension
12
+ configure_rubyparser
13
+
14
+ create_makefile("yarp/yarp")
15
+
16
+ if static_link?
17
+ File.open('Makefile', 'a') do |mf|
18
+ mf.puts
19
+ mf.puts '# Automatically rebuild the extension if librubyparser.a changed'
20
+ mf.puts '$(TARGET_SO): $(LOCAL_LIBS)'
21
+ end
22
+ end
23
+ end
24
+
25
+ def configure_c_extension
26
+ append_cflags("-DYARP_DEBUG_MODE_BUILD") if debug_mode_build?
27
+ append_cflags("-fvisibility=hidden")
28
+ end
29
+
30
+ def configure_rubyparser
31
+ if static_link?
32
+ static_archive_path = File.join(build_dir, "librubyparser.a")
33
+ unless File.exist?(static_archive_path)
34
+ build_static_rubyparser
35
+ end
36
+ $LOCAL_LIBS << " #{static_archive_path}"
37
+ else
38
+ shared_library_path = File.join(build_dir, "librubyparser.#{RbConfig::CONFIG["DLEXT"]}")
39
+ unless File.exist?(shared_library_path)
40
+ build_shared_rubyparser
41
+ end
42
+ unless find_library("rubyparser", "yp_parser_init", build_dir)
43
+ raise "could not link against #{File.basename(shared_library_path)}"
44
+ end
45
+ end
46
+
47
+ find_header("yarp.h", include_dir) or raise "yarp.h is required"
48
+
49
+ # Explicitly look for the extension header in the parent directory
50
+ # because we want to consistently look for yarp/extension.h in our
51
+ # source files to line up with our mirroring in CRuby.
52
+ find_header("yarp/extension.h", File.join(__dir__, "..")) or raise "yarp/extension.h is required"
53
+ end
54
+
55
+ def build_shared_rubyparser
56
+ build_target_rubyparser "build/librubyparser.#{RbConfig::CONFIG["SOEXT"]}"
57
+ end
58
+
59
+ def build_static_rubyparser
60
+ build_target_rubyparser "build/librubyparser.a"
61
+ end
62
+
63
+ def build_target_rubyparser(target)
64
+ Dir.chdir(root_dir) do
65
+ if !File.exist?("configure") && Dir.exist?(".git")
66
+ # this block only exists to support building the gem from a "git" source,
67
+ # normally we package up the configure and other files in the gem itself
68
+ Rake.sh("autoconf")
69
+ Rake.sh("autoheader")
70
+ Rake.sh("templates/template.rb")
71
+ end
72
+ Rake.sh("sh", "configure") # explicit "sh" for Windows where shebangs are not supported
73
+ Rake.sh("make", target)
74
+ end
75
+ end
76
+
77
+ def root_dir
78
+ File.expand_path("../..", __dir__)
79
+ end
80
+
81
+ def include_dir
82
+ File.join(root_dir, "include")
83
+ end
84
+
85
+ def build_dir
86
+ File.join(root_dir, "build")
87
+ end
88
+
89
+ def print_help
90
+ print(<<~TEXT)
91
+ USAGE: ruby #{$PROGRAM_NAME} [options]
92
+
93
+ Flags that are always valid:
94
+
95
+ --enable-static
96
+ --disable-static
97
+ Enable or disable static linking against librubyparser.
98
+ The default is to statically link.
99
+
100
+ --enable-debug-mode-build
101
+ Enable debug mode build.
102
+ You may also use set YARP_DEBUG_MODE_BUILD environment variable.
103
+
104
+ --help
105
+ Display this message.
106
+
107
+ Environment variables used:
108
+
109
+ YARP_DEBUG_MODE_BUILD
110
+ Equivalent to `--enable-debug-mode-build` when set, even if nil or blank.
111
+
112
+ TEXT
113
+ end
114
+
115
+ def static_link?
116
+ enable_config("static", true)
117
+ end
118
+
119
+ def debug_mode_build?
120
+ enable_config("debug-mode-build", ENV["YARP_DEBUG_MODE_BUILD"] || false)
121
+ end
122
+ end
123
+ end
124
+ end
125
+
126
+ if arg_config("--help")
127
+ Yarp::ExtConf.print_help
128
+ exit!(0)
129
+ end
130
+
131
+ Yarp::ExtConf.configure