kanayago 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. checksums.yaml +7 -0
  2. data/.rubocop.yml +15 -0
  3. data/.rubocop_todo.yml +23 -0
  4. data/LICENSE.txt +21 -0
  5. data/README.md +79 -0
  6. data/Rakefile +182 -0
  7. data/ext/kanayago/ccan/check_type/check_type.h +63 -0
  8. data/ext/kanayago/ccan/container_of/container_of.h +142 -0
  9. data/ext/kanayago/ccan/list/list.h +791 -0
  10. data/ext/kanayago/ccan/str/str.h +17 -0
  11. data/ext/kanayago/constant.h +53 -0
  12. data/ext/kanayago/extconf.rb +21 -0
  13. data/ext/kanayago/id.h +347 -0
  14. data/ext/kanayago/id_table.h +39 -0
  15. data/ext/kanayago/internal/array.h +151 -0
  16. data/ext/kanayago/internal/basic_operators.h +64 -0
  17. data/ext/kanayago/internal/bignum.h +244 -0
  18. data/ext/kanayago/internal/bits.h +568 -0
  19. data/ext/kanayago/internal/compile.h +34 -0
  20. data/ext/kanayago/internal/compilers.h +107 -0
  21. data/ext/kanayago/internal/complex.h +29 -0
  22. data/ext/kanayago/internal/encoding.h +36 -0
  23. data/ext/kanayago/internal/error.h +218 -0
  24. data/ext/kanayago/internal/fixnum.h +184 -0
  25. data/ext/kanayago/internal/gc.h +322 -0
  26. data/ext/kanayago/internal/hash.h +191 -0
  27. data/ext/kanayago/internal/imemo.h +261 -0
  28. data/ext/kanayago/internal/io.h +140 -0
  29. data/ext/kanayago/internal/numeric.h +274 -0
  30. data/ext/kanayago/internal/parse.h +117 -0
  31. data/ext/kanayago/internal/rational.h +71 -0
  32. data/ext/kanayago/internal/re.h +28 -0
  33. data/ext/kanayago/internal/ruby_parser.h +125 -0
  34. data/ext/kanayago/internal/sanitizers.h +297 -0
  35. data/ext/kanayago/internal/serial.h +23 -0
  36. data/ext/kanayago/internal/static_assert.h +16 -0
  37. data/ext/kanayago/internal/string.h +186 -0
  38. data/ext/kanayago/internal/symbol.h +45 -0
  39. data/ext/kanayago/internal/thread.h +79 -0
  40. data/ext/kanayago/internal/variable.h +72 -0
  41. data/ext/kanayago/internal/vm.h +137 -0
  42. data/ext/kanayago/internal/warnings.h +16 -0
  43. data/ext/kanayago/internal.h +108 -0
  44. data/ext/kanayago/kanayago.c +420 -0
  45. data/ext/kanayago/kanayago.h +21 -0
  46. data/ext/kanayago/lex.c +302 -0
  47. data/ext/kanayago/method.h +255 -0
  48. data/ext/kanayago/node.c +440 -0
  49. data/ext/kanayago/node.h +111 -0
  50. data/ext/kanayago/node_name.inc +224 -0
  51. data/ext/kanayago/parse.c +26931 -0
  52. data/ext/kanayago/parse.h +244 -0
  53. data/ext/kanayago/parse.tmp.y +16145 -0
  54. data/ext/kanayago/parser_bits.h +564 -0
  55. data/ext/kanayago/parser_node.h +32 -0
  56. data/ext/kanayago/parser_st.c +164 -0
  57. data/ext/kanayago/parser_st.h +162 -0
  58. data/ext/kanayago/parser_value.h +106 -0
  59. data/ext/kanayago/probes.h +4 -0
  60. data/ext/kanayago/ruby_assert.h +14 -0
  61. data/ext/kanayago/ruby_atomic.h +23 -0
  62. data/ext/kanayago/ruby_parser.c +1165 -0
  63. data/ext/kanayago/rubyparser.h +1391 -0
  64. data/ext/kanayago/shape.h +234 -0
  65. data/ext/kanayago/st.c +2339 -0
  66. data/ext/kanayago/symbol.h +123 -0
  67. data/ext/kanayago/thread_pthread.h +168 -0
  68. data/ext/kanayago/universal_parser.c +230 -0
  69. data/ext/kanayago/vm_core.h +2215 -0
  70. data/ext/kanayago/vm_opts.h +67 -0
  71. data/lib/kanayago/version.rb +5 -0
  72. data/lib/kanayago.rb +11 -0
  73. data/sig/kanayago.rbs +4 -0
  74. metadata +116 -0
@@ -0,0 +1,1165 @@
1
+ /* This is a wrapper for parse.y */
2
+
3
+ #include "internal/parse.h"
4
+ #include "internal/re.h"
5
+ #include "internal/ruby_parser.h"
6
+
7
+ #include "node.h"
8
+ #include "rubyparser.h"
9
+ #include "internal/error.h"
10
+
11
+ #ifdef UNIVERSAL_PARSER
12
+
13
+ #include "internal.h"
14
+ #include "internal/array.h"
15
+ #include "internal/bignum.h"
16
+ #include "internal/compile.h"
17
+ #include "internal/complex.h"
18
+ #include "internal/encoding.h"
19
+ #include "internal/gc.h"
20
+ #include "internal/hash.h"
21
+ #include "internal/io.h"
22
+ #include "internal/rational.h"
23
+ #include "internal/re.h"
24
+ #include "internal/string.h"
25
+ #include "internal/symbol.h"
26
+ #include "internal/thread.h"
27
+
28
+ #include "ruby/ractor.h"
29
+ #include "ruby/ruby.h"
30
+ #include "ruby/util.h"
31
+ #include "internal.h"
32
+ #include "vm_core.h"
33
+ #include "symbol.h"
34
+
35
+ #define parser_encoding const void
36
+
37
+ static int
38
+ is_ascii_string2(VALUE str)
39
+ {
40
+ return is_ascii_string(str);
41
+ }
42
+
43
+ RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 6, 0)
44
+ static VALUE
45
+ syntax_error_append(VALUE exc, VALUE file, int line, int column,
46
+ parser_encoding *enc, const char *fmt, va_list args)
47
+ {
48
+ return rb_syntax_error_append(exc, file, line, column, enc, fmt, args);
49
+ }
50
+
51
+ static int
52
+ local_defined(ID id, const void *p)
53
+ {
54
+ return rb_local_defined(id, (const rb_iseq_t *)p);
55
+ }
56
+
57
+ static int
58
+ dvar_defined(ID id, const void *p)
59
+ {
60
+ return rb_dvar_defined(id, (const rb_iseq_t *)p);
61
+ }
62
+
63
+ static int
64
+ is_usascii_enc(parser_encoding *enc)
65
+ {
66
+ return rb_is_usascii_enc(enc);
67
+ }
68
+
69
+ static int
70
+ is_local_id2(ID id)
71
+ {
72
+ return is_local_id(id);
73
+ }
74
+
75
+ static int
76
+ is_attrset_id2(ID id)
77
+ {
78
+ return is_attrset_id(id);
79
+ }
80
+
81
+ static int
82
+ is_notop_id2(ID id)
83
+ {
84
+ return is_notop_id(id);
85
+ }
86
+
87
+ static VALUE
88
+ enc_str_new(const char *ptr, long len, parser_encoding *enc)
89
+ {
90
+ return rb_enc_str_new(ptr, len, enc);
91
+ }
92
+
93
+ static int
94
+ enc_isalnum(OnigCodePoint c, parser_encoding *enc)
95
+ {
96
+ return rb_enc_isalnum(c, enc);
97
+ }
98
+
99
+ static int
100
+ enc_precise_mbclen(const char *p, const char *e, parser_encoding *enc)
101
+ {
102
+ return rb_enc_precise_mbclen(p, e, enc);
103
+ }
104
+
105
+ static int
106
+ mbclen_charfound_p(int len)
107
+ {
108
+ return MBCLEN_CHARFOUND_P(len);
109
+ }
110
+
111
+ static int
112
+ mbclen_charfound_len(int len)
113
+ {
114
+ return MBCLEN_CHARFOUND_LEN(len);
115
+ }
116
+
117
+ static const char *
118
+ enc_name(parser_encoding *enc)
119
+ {
120
+ return rb_enc_name(enc);
121
+ }
122
+
123
+ static char *
124
+ enc_prev_char(const char *s, const char *p, const char *e, parser_encoding *enc)
125
+ {
126
+ return rb_enc_prev_char(s, p, e, enc);
127
+ }
128
+
129
+ static parser_encoding *
130
+ enc_get(VALUE obj)
131
+ {
132
+ return rb_enc_get(obj);
133
+ }
134
+
135
+ static int
136
+ enc_asciicompat(parser_encoding *enc)
137
+ {
138
+ return rb_enc_asciicompat(enc);
139
+ }
140
+
141
+ static parser_encoding *
142
+ utf8_encoding(void)
143
+ {
144
+ return rb_utf8_encoding();
145
+ }
146
+
147
+ static VALUE
148
+ enc_associate(VALUE obj, parser_encoding *enc)
149
+ {
150
+ return rb_enc_associate(obj, enc);
151
+ }
152
+
153
+ static parser_encoding *
154
+ ascii8bit_encoding(void)
155
+ {
156
+ return rb_ascii8bit_encoding();
157
+ }
158
+
159
+ static int
160
+ enc_codelen(int c, parser_encoding *enc)
161
+ {
162
+ return rb_enc_codelen(c, enc);
163
+ }
164
+
165
+ static int
166
+ enc_mbcput(unsigned int c, void *buf, parser_encoding *enc)
167
+ {
168
+ return rb_enc_mbcput(c, buf, enc);
169
+ }
170
+
171
+ static parser_encoding *
172
+ enc_from_index(int idx)
173
+ {
174
+ return rb_enc_from_index(idx);
175
+ }
176
+
177
+ static int
178
+ enc_isspace(OnigCodePoint c, parser_encoding *enc)
179
+ {
180
+ return rb_enc_isspace(c, enc);
181
+ }
182
+
183
+ static ID
184
+ intern3(const char *name, long len, parser_encoding *enc)
185
+ {
186
+ return rb_intern3(name, len, enc);
187
+ }
188
+
189
+ static parser_encoding *
190
+ usascii_encoding(void)
191
+ {
192
+ return rb_usascii_encoding();
193
+ }
194
+
195
+ static int
196
+ enc_symname_type(const char *name, long len, parser_encoding *enc, unsigned int allowed_attrset)
197
+ {
198
+ return rb_enc_symname_type(name, len, enc, allowed_attrset);
199
+ }
200
+
201
+ typedef struct {
202
+ struct parser_params *parser;
203
+ rb_encoding *enc;
204
+ NODE *succ_block;
205
+ const rb_code_location_t *loc;
206
+ } reg_named_capture_assign_t;
207
+
208
+ static int
209
+ reg_named_capture_assign_iter(const OnigUChar *name, const OnigUChar *name_end,
210
+ int back_num, int *back_refs, OnigRegex regex, void *arg0)
211
+ {
212
+ reg_named_capture_assign_t *arg = (reg_named_capture_assign_t*)arg0;
213
+ struct parser_params* p = arg->parser;
214
+ rb_encoding *enc = arg->enc;
215
+ const rb_code_location_t *loc = arg->loc;
216
+ long len = name_end - name;
217
+ const char *s = (const char *)name;
218
+
219
+ return rb_reg_named_capture_assign_iter_impl(p, s, len, enc, &arg->succ_block, loc);
220
+ }
221
+
222
+ static NODE *
223
+ reg_named_capture_assign(struct parser_params* p, VALUE regexp, const rb_code_location_t *loc)
224
+ {
225
+ reg_named_capture_assign_t arg;
226
+
227
+ arg.parser = p;
228
+ arg.enc = rb_enc_get(regexp);
229
+ arg.succ_block = 0;
230
+ arg.loc = loc;
231
+ onig_foreach_name(RREGEXP_PTR(regexp), reg_named_capture_assign_iter, &arg);
232
+
233
+ if (!arg.succ_block) return 0;
234
+ return RNODE_BLOCK(arg.succ_block)->nd_next;
235
+ }
236
+
237
+ static int
238
+ rtest(VALUE obj)
239
+ {
240
+ return (int)RB_TEST(obj);
241
+ }
242
+
243
+ static int
244
+ nil_p(VALUE obj)
245
+ {
246
+ return (int)NIL_P(obj);
247
+ }
248
+
249
+ static VALUE
250
+ syntax_error_new(void)
251
+ {
252
+ return rb_class_new_instance(0, 0, rb_eSyntaxError);
253
+ }
254
+
255
+ static void *
256
+ memmove2(void *dest, const void *src, size_t t, size_t n)
257
+ {
258
+ return memmove(dest, src, rbimpl_size_mul_or_raise(t, n));
259
+ }
260
+
261
+ static void *
262
+ nonempty_memcpy(void *dest, const void *src, size_t t, size_t n)
263
+ {
264
+ return ruby_nonempty_memcpy(dest, src, rbimpl_size_mul_or_raise(t, n));
265
+ }
266
+
267
+ static VALUE
268
+ ruby_verbose2(void)
269
+ {
270
+ return ruby_verbose;
271
+ }
272
+
273
+ static int *
274
+ rb_errno_ptr2(void)
275
+ {
276
+ return rb_errno_ptr();
277
+ }
278
+
279
+ static void *
280
+ zalloc(size_t elemsiz)
281
+ {
282
+ return ruby_xcalloc(1, elemsiz);
283
+ }
284
+
285
+ static void
286
+ gc_guard(VALUE obj)
287
+ {
288
+ RB_GC_GUARD(obj);
289
+ }
290
+
291
+ static VALUE
292
+ arg_error(void)
293
+ {
294
+ return rb_eArgError;
295
+ }
296
+
297
+ static VALUE
298
+ static_id2sym(ID id)
299
+ {
300
+ return (((VALUE)(id)<<RUBY_SPECIAL_SHIFT)|SYMBOL_FLAG);
301
+ }
302
+
303
+ static long
304
+ str_coderange_scan_restartable(const char *s, const char *e, parser_encoding *enc, int *cr)
305
+ {
306
+ return rb_str_coderange_scan_restartable(s, e, enc, cr);
307
+ }
308
+
309
+ static int
310
+ enc_mbminlen(parser_encoding *enc)
311
+ {
312
+ return rb_enc_mbminlen(enc);
313
+ }
314
+
315
+ static bool
316
+ enc_isascii(OnigCodePoint c, parser_encoding *enc)
317
+ {
318
+ return rb_enc_isascii(c, enc);
319
+ }
320
+
321
+ static OnigCodePoint
322
+ enc_mbc_to_codepoint(const char *p, const char *e, parser_encoding *enc)
323
+ {
324
+ const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
325
+ const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
326
+
327
+ return ONIGENC_MBC_TO_CODE((rb_encoding *)enc, up, ue);
328
+ }
329
+
330
+ extern VALUE rb_eArgError;
331
+
332
+ // Add for Kanayago
333
+ static void *
334
+ xmalloc_mul_add(size_t x, size_t y, size_t z)
335
+ {
336
+ return rb_xmalloc_mul_add(x, y, z);
337
+ }
338
+
339
+ static VALUE
340
+ suppress_tracing(VALUE (*func)(VALUE), VALUE arg)
341
+ {
342
+ return rb_suppress_tracing(func, arg);
343
+ }
344
+
345
+ static ID
346
+ make_temporary_id(size_t n)
347
+ {
348
+ return rb_make_temporary_id(n);
349
+ }
350
+
351
+ static int
352
+ stderr_tty_p(void)
353
+ {
354
+ return rb_stderr_tty_p();
355
+ }
356
+
357
+ static VALUE
358
+ reg_compile(VALUE str, int options, const char *sourcefile, int sourceline)
359
+ {
360
+ return rb_reg_compile(str, options, sourcefile, sourceline);
361
+ }
362
+
363
+ static VALUE
364
+ reg_check_preprocess(VALUE val)
365
+ {
366
+ return rb_reg_check_preprocess(val);
367
+ }
368
+ // End of Add for Kanayago
369
+
370
+ static const rb_parser_config_t rb_global_parser_config = {
371
+ .malloc = ruby_xmalloc,
372
+ .calloc = ruby_xcalloc,
373
+ .realloc = ruby_xrealloc,
374
+ .free = ruby_xfree,
375
+ .alloc_n = ruby_xmalloc2,
376
+ .alloc = ruby_xmalloc,
377
+ .realloc_n = ruby_xrealloc2,
378
+ .zalloc = zalloc,
379
+ .rb_memmove = memmove2,
380
+ .nonempty_memcpy = nonempty_memcpy,
381
+ .xmalloc_mul_add = xmalloc_mul_add, // use xmalloc_mul_add for Kanayago
382
+
383
+ .compile_callback = suppress_tracing, // use suppress_tracing for Kanayago
384
+ .reg_named_capture_assign = reg_named_capture_assign,
385
+
386
+ .attr_get = rb_attr_get,
387
+
388
+ .ary_new = rb_ary_new,
389
+ .ary_push = rb_ary_push,
390
+ .ary_new_from_args = rb_ary_new_from_args,
391
+ .ary_unshift = rb_ary_unshift,
392
+
393
+ .make_temporary_id = make_temporary_id, // use make_temporary_id for Kanayago
394
+ .is_local_id = is_local_id2,
395
+ .is_attrset_id = is_attrset_id2,
396
+ .is_global_name_punct = is_global_name_punct,
397
+ .id_type = id_type,
398
+ .id_attrset = rb_id_attrset,
399
+ .intern = rb_intern,
400
+ .intern2 = rb_intern2,
401
+ .intern3 = intern3,
402
+ .intern_str = rb_intern_str,
403
+ .is_notop_id = is_notop_id2,
404
+ .enc_symname_type = enc_symname_type,
405
+ .id2name = rb_id2name,
406
+ .id2str = rb_id2str,
407
+ .id2sym = rb_id2sym,
408
+ .sym2id = rb_sym2id,
409
+
410
+ .str_catf = rb_str_catf,
411
+ .str_cat_cstr = rb_str_cat_cstr,
412
+ .str_modify = rb_str_modify,
413
+ .str_set_len = rb_str_set_len,
414
+ .str_cat = rb_str_cat,
415
+ .str_resize = rb_str_resize,
416
+ .str_new = rb_str_new,
417
+ .str_new_cstr = rb_str_new_cstr,
418
+ .str_to_interned_str = rb_str_to_interned_str,
419
+ .is_ascii_string = is_ascii_string2,
420
+ .enc_str_new = enc_str_new,
421
+ .str_vcatf = rb_str_vcatf,
422
+ .rb_sprintf = rb_sprintf,
423
+ .rstring_ptr = RSTRING_PTR,
424
+ .rstring_end = RSTRING_END,
425
+ .rstring_len = RSTRING_LEN,
426
+ .obj_as_string = rb_obj_as_string,
427
+
428
+ .int2num = rb_int2num_inline,
429
+
430
+ .stderr_tty_p = stderr_tty_p, //use stderr_tty_p for Kanayago
431
+ .write_error_str = rb_write_error_str,
432
+ .io_write = rb_io_write,
433
+ .io_flush = rb_io_flush,
434
+ .io_puts = rb_io_puts,
435
+
436
+ .debug_output_stdout = rb_ractor_stdout,
437
+ .debug_output_stderr = rb_ractor_stderr,
438
+
439
+ .is_usascii_enc = is_usascii_enc,
440
+ .enc_isalnum = enc_isalnum,
441
+ .enc_precise_mbclen = enc_precise_mbclen,
442
+ .mbclen_charfound_p = mbclen_charfound_p,
443
+ .mbclen_charfound_len = mbclen_charfound_len,
444
+ .enc_name = enc_name,
445
+ .enc_prev_char = enc_prev_char,
446
+ .enc_get = enc_get,
447
+ .enc_asciicompat = enc_asciicompat,
448
+ .utf8_encoding = utf8_encoding,
449
+ .enc_associate = enc_associate,
450
+ .ascii8bit_encoding = ascii8bit_encoding,
451
+ .enc_codelen = enc_codelen,
452
+ .enc_mbcput = enc_mbcput,
453
+ .enc_find_index = rb_enc_find_index,
454
+ .enc_from_index = enc_from_index,
455
+ .enc_isspace = enc_isspace,
456
+ .enc_coderange_7bit = ENC_CODERANGE_7BIT,
457
+ .enc_coderange_unknown = ENC_CODERANGE_UNKNOWN,
458
+ .usascii_encoding = usascii_encoding,
459
+ .enc_mbminlen = enc_mbminlen,
460
+ .enc_isascii = enc_isascii,
461
+ .enc_mbc_to_codepoint = enc_mbc_to_codepoint,
462
+
463
+ .local_defined = local_defined,
464
+ .dvar_defined = dvar_defined,
465
+
466
+ .syntax_error_append = syntax_error_append,
467
+ .raise = rb_raise,
468
+ .syntax_error_new = syntax_error_new,
469
+
470
+ .errinfo = rb_errinfo,
471
+ .set_errinfo = rb_set_errinfo,
472
+ .exc_raise = rb_exc_raise,
473
+ .make_exception = rb_make_exception,
474
+
475
+ .sized_xfree = ruby_sized_xfree,
476
+ .sized_realloc_n = ruby_sized_realloc_n,
477
+ .gc_guard = gc_guard,
478
+ .gc_mark = rb_gc_mark,
479
+
480
+ .reg_compile = reg_compile, // use reg_compile for Kanayago
481
+ .reg_check_preprocess = reg_check_preprocess, // use reg_check_preprocess for Kanayago
482
+ .memcicmp = rb_memcicmp,
483
+
484
+ .compile_warn = rb_compile_warn,
485
+ .compile_warning = rb_compile_warning,
486
+ .bug = rb_bug,
487
+ .fatal = rb_fatal,
488
+ .verbose = ruby_verbose2,
489
+ .errno_ptr = rb_errno_ptr2,
490
+
491
+ .make_backtrace = rb_make_backtrace,
492
+
493
+ .scan_hex = ruby_scan_hex,
494
+ .scan_oct = ruby_scan_oct,
495
+ .scan_digits = ruby_scan_digits,
496
+ .strtod = ruby_strtod,
497
+
498
+ .rtest = rtest,
499
+ .nil_p = nil_p,
500
+ .qnil = Qnil,
501
+ .qfalse = Qfalse,
502
+ .eArgError = arg_error,
503
+ .long2int = rb_long2int,
504
+
505
+ /* For Ripper */
506
+ .static_id2sym = static_id2sym,
507
+ .str_coderange_scan_restartable = str_coderange_scan_restartable,
508
+ };
509
+ #endif
510
+
511
+ static void
512
+ parser_mark(void *ptr)
513
+ {
514
+ struct ruby_parser *parser = (struct ruby_parser*)ptr;
515
+ rb_ruby_parser_mark(parser->parser_params);
516
+
517
+ switch (parser->type) {
518
+ case lex_type_str:
519
+ rb_gc_mark(parser->data.lex_str.str);
520
+ break;
521
+ case lex_type_io:
522
+ rb_gc_mark(parser->data.lex_io.file);
523
+ break;
524
+ case lex_type_array:
525
+ rb_gc_mark(parser->data.lex_array.ary);
526
+ break;
527
+ case lex_type_generic:
528
+ /* noop. Caller of rb_parser_compile_generic should mark the objects. */
529
+ break;
530
+ }
531
+ }
532
+
533
+ static void
534
+ parser_free(void *ptr)
535
+ {
536
+ struct ruby_parser *parser = (struct ruby_parser*)ptr;
537
+ rb_ruby_parser_free(parser->parser_params);
538
+ xfree(parser);
539
+ }
540
+
541
+ static size_t
542
+ parser_memsize(const void *ptr)
543
+ {
544
+ struct ruby_parser *parser = (struct ruby_parser*)ptr;
545
+ return rb_ruby_parser_memsize(parser->parser_params);
546
+ }
547
+
548
+ // Not static const for Kanayago
549
+ const rb_data_type_t ruby_parser_data_type = {
550
+ "parser",
551
+ {
552
+ parser_mark,
553
+ parser_free,
554
+ parser_memsize,
555
+ },
556
+ 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
557
+ };
558
+
559
+ #ifdef UNIVERSAL_PARSER
560
+ const rb_parser_config_t *
561
+ rb_ruby_parser_config(void)
562
+ {
563
+ return &rb_global_parser_config;
564
+ }
565
+
566
+ rb_parser_t *
567
+ rb_parser_params_new(void)
568
+ {
569
+ return rb_ruby_parser_new(&rb_global_parser_config);
570
+ }
571
+ #else
572
+ rb_parser_t *
573
+ rb_parser_params_new(void)
574
+ {
575
+ return rb_ruby_parser_new();
576
+ }
577
+ #endif /* UNIVERSAL_PARSER */
578
+
579
+ VALUE
580
+ rb_parser_new(void)
581
+ {
582
+ struct ruby_parser *parser;
583
+ rb_parser_t *parser_params;
584
+
585
+ /*
586
+ * Create parser_params ahead of vparser because
587
+ * rb_ruby_parser_new can run GC so if create vparser
588
+ * first, parser_mark tries to mark not initialized parser_params.
589
+ */
590
+ parser_params = rb_parser_params_new();
591
+ VALUE vparser = TypedData_Make_Struct(0, struct ruby_parser,
592
+ &ruby_parser_data_type, parser);
593
+ parser->parser_params = parser_params;
594
+
595
+ return vparser;
596
+ }
597
+
598
+ void
599
+ rb_parser_set_options(VALUE vparser, int print, int loop, int chomp, int split)
600
+ {
601
+ struct ruby_parser *parser;
602
+
603
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
604
+ rb_ruby_parser_set_options(parser->parser_params, print, loop, chomp, split);
605
+ }
606
+
607
+ VALUE
608
+ rb_parser_set_context(VALUE vparser, const struct rb_iseq_struct *base, int main)
609
+ {
610
+ struct ruby_parser *parser;
611
+
612
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
613
+ rb_ruby_parser_set_context(parser->parser_params, base, main);
614
+ return vparser;
615
+ }
616
+
617
+ void
618
+ rb_parser_set_script_lines(VALUE vparser)
619
+ {
620
+ struct ruby_parser *parser;
621
+
622
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
623
+ rb_ruby_parser_set_script_lines(parser->parser_params);
624
+ }
625
+
626
+ void
627
+ rb_parser_error_tolerant(VALUE vparser)
628
+ {
629
+ struct ruby_parser *parser;
630
+
631
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
632
+ rb_ruby_parser_error_tolerant(parser->parser_params);
633
+ }
634
+
635
+ void
636
+ rb_parser_keep_tokens(VALUE vparser)
637
+ {
638
+ struct ruby_parser *parser;
639
+
640
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
641
+ rb_ruby_parser_keep_tokens(parser->parser_params);
642
+ }
643
+
644
+ rb_parser_string_t *
645
+ rb_parser_lex_get_str(struct parser_params *p, struct lex_pointer_string *ptr_str)
646
+ {
647
+ char *beg, *end, *start;
648
+ long len;
649
+ VALUE s = ptr_str->str;
650
+
651
+ beg = RSTRING_PTR(s);
652
+ len = RSTRING_LEN(s);
653
+ start = beg;
654
+ if (ptr_str->ptr) {
655
+ if (len == ptr_str->ptr) return 0;
656
+ beg += ptr_str->ptr;
657
+ len -= ptr_str->ptr;
658
+ }
659
+ end = memchr(beg, '\n', len);
660
+ if (end) len = ++end - beg;
661
+ ptr_str->ptr += len;
662
+ return rb_str_to_parser_string(p, rb_str_subseq(s, beg - start, len));
663
+ }
664
+
665
+ static rb_parser_string_t *
666
+ lex_get_str(struct parser_params *p, rb_parser_input_data input, int line_count)
667
+ {
668
+ return rb_parser_lex_get_str(p, (struct lex_pointer_string *)input);
669
+ }
670
+
671
+ static void parser_aset_script_lines_for(VALUE path, rb_parser_ary_t *lines);
672
+
673
+ static rb_ast_t*
674
+ parser_compile(rb_parser_t *p, rb_parser_lex_gets_func *gets, VALUE fname, rb_parser_input_data input, int line)
675
+ {
676
+ rb_ast_t *ast = rb_parser_compile(p, gets, fname, input, line);
677
+ parser_aset_script_lines_for(fname, ast->body.script_lines);
678
+ return ast;
679
+ }
680
+
681
+ static rb_ast_t*
682
+ parser_compile_string0(struct ruby_parser *parser, VALUE fname, VALUE s, int line)
683
+ {
684
+ VALUE str = rb_str_new_frozen(s);
685
+
686
+ parser->type = lex_type_str;
687
+ parser->data.lex_str.str = str;
688
+ parser->data.lex_str.ptr = 0;
689
+
690
+ return parser_compile(parser->parser_params, lex_get_str, fname, (rb_parser_input_data)&parser->data, line);
691
+ }
692
+
693
+ static rb_encoding *
694
+ must_be_ascii_compatible(VALUE s)
695
+ {
696
+ rb_encoding *enc = rb_enc_get(s);
697
+ if (!rb_enc_asciicompat(enc)) {
698
+ rb_raise(rb_eArgError, "invalid source encoding");
699
+ }
700
+ return enc;
701
+ }
702
+
703
+ static rb_ast_t*
704
+ parser_compile_string_path(struct ruby_parser *parser, VALUE f, VALUE s, int line)
705
+ {
706
+ must_be_ascii_compatible(s);
707
+ return parser_compile_string0(parser, f, s, line);
708
+ }
709
+
710
+ static rb_ast_t*
711
+ parser_compile_string(struct ruby_parser *parser, const char *f, VALUE s, int line)
712
+ {
713
+ return parser_compile_string_path(parser, rb_filesystem_str_new_cstr(f), s, line);
714
+ }
715
+
716
+ VALUE rb_io_gets_internal(VALUE io);
717
+
718
+ static rb_parser_string_t *
719
+ lex_io_gets(struct parser_params *p, rb_parser_input_data input, int line_count)
720
+ {
721
+ VALUE io = (VALUE)input;
722
+ VALUE line = rb_io_gets_internal(io);
723
+ if (NIL_P(line)) return 0;
724
+ return rb_str_to_parser_string(p, line);
725
+ }
726
+
727
+ static rb_parser_string_t *
728
+ lex_gets_array(struct parser_params *p, rb_parser_input_data data, int index)
729
+ {
730
+ VALUE array = (VALUE)data;
731
+ VALUE str = rb_ary_entry(array, index);
732
+ if (!NIL_P(str)) {
733
+ StringValue(str);
734
+ if (!rb_enc_asciicompat(rb_enc_get(str))) {
735
+ rb_raise(rb_eArgError, "invalid source encoding");
736
+ }
737
+ return rb_str_to_parser_string(p, str);
738
+ }
739
+ else {
740
+ return 0;
741
+ }
742
+ }
743
+
744
+ static rb_ast_t*
745
+ parser_compile_file_path(struct ruby_parser *parser, VALUE fname, VALUE file, int start)
746
+ {
747
+ parser->type = lex_type_io;
748
+ parser->data.lex_io.file = file;
749
+
750
+ return parser_compile(parser->parser_params, lex_io_gets, fname, (rb_parser_input_data)file, start);
751
+ }
752
+
753
+ static rb_ast_t*
754
+ parser_compile_array(struct ruby_parser *parser, VALUE fname, VALUE array, int start)
755
+ {
756
+ parser->type = lex_type_array;
757
+ parser->data.lex_array.ary = array;
758
+
759
+ return parser_compile(parser->parser_params, lex_gets_array, fname, (rb_parser_input_data)array, start);
760
+ }
761
+
762
+ static rb_ast_t*
763
+ parser_compile_generic(struct ruby_parser *parser, rb_parser_lex_gets_func *lex_gets, VALUE fname, VALUE input, int start)
764
+ {
765
+ parser->type = lex_type_generic;
766
+
767
+ return parser_compile(parser->parser_params, lex_gets, fname, (rb_parser_input_data)input, start);
768
+ }
769
+
770
+ static void
771
+ ast_free(void *ptr)
772
+ {
773
+ rb_ast_t *ast = (rb_ast_t *)ptr;
774
+ rb_ast_free(ast);
775
+ }
776
+
777
+ // Not static const for Kanayago
778
+ const rb_data_type_t ast_data_type = {
779
+ "AST",
780
+ {
781
+ NULL,
782
+ ast_free,
783
+ NULL, // No dsize() because this object does not appear in ObjectSpace.
784
+ },
785
+ 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
786
+ };
787
+
788
+ static VALUE
789
+ ast_alloc(void)
790
+ {
791
+ return TypedData_Wrap_Struct(0, &ast_data_type, NULL);
792
+ }
793
+
794
+ VALUE
795
+ rb_parser_compile_file_path(VALUE vparser, VALUE fname, VALUE file, int start)
796
+ {
797
+ struct ruby_parser *parser;
798
+ VALUE ast_value = ast_alloc();
799
+
800
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
801
+ DATA_PTR(ast_value) = parser_compile_file_path(parser, fname, file, start);
802
+ RB_GC_GUARD(vparser);
803
+
804
+ return ast_value;
805
+ }
806
+
807
+ VALUE
808
+ rb_parser_compile_array(VALUE vparser, VALUE fname, VALUE array, int start)
809
+ {
810
+ struct ruby_parser *parser;
811
+ VALUE ast_value = ast_alloc();
812
+
813
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
814
+ DATA_PTR(ast_value) = parser_compile_array(parser, fname, array, start);
815
+ RB_GC_GUARD(vparser);
816
+
817
+ return ast_value;
818
+ }
819
+
820
+ VALUE
821
+ rb_parser_compile_generic(VALUE vparser, rb_parser_lex_gets_func *lex_gets, VALUE fname, VALUE input, int start)
822
+ {
823
+ struct ruby_parser *parser;
824
+ VALUE ast_value = ast_alloc();
825
+
826
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
827
+ DATA_PTR(ast_value) = parser_compile_generic(parser, lex_gets, fname, input, start);
828
+ RB_GC_GUARD(vparser);
829
+
830
+ return ast_value;
831
+ }
832
+
833
+ VALUE
834
+ rb_parser_compile_string(VALUE vparser, const char *f, VALUE s, int line)
835
+ {
836
+ struct ruby_parser *parser;
837
+ VALUE ast_value = ast_alloc();
838
+
839
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
840
+ DATA_PTR(ast_value) = parser_compile_string(parser, f, s, line);
841
+ RB_GC_GUARD(vparser);
842
+
843
+ return ast_value;
844
+ }
845
+
846
+ VALUE
847
+ rb_parser_compile_string_path(VALUE vparser, VALUE f, VALUE s, int line)
848
+ {
849
+ struct ruby_parser *parser;
850
+ VALUE ast_value = ast_alloc();
851
+
852
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
853
+ DATA_PTR(ast_value) = parser_compile_string_path(parser, f, s, line);
854
+ RB_GC_GUARD(vparser);
855
+
856
+ return ast_value;
857
+ }
858
+
859
+ VALUE
860
+ rb_parser_encoding(VALUE vparser)
861
+ {
862
+ struct ruby_parser *parser;
863
+
864
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
865
+ return rb_enc_from_encoding(rb_ruby_parser_encoding(parser->parser_params));
866
+ }
867
+
868
+ VALUE
869
+ rb_parser_end_seen_p(VALUE vparser)
870
+ {
871
+ struct ruby_parser *parser;
872
+
873
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
874
+ return RBOOL(rb_ruby_parser_end_seen_p(parser->parser_params));
875
+ }
876
+
877
+ VALUE
878
+ rb_parser_set_yydebug(VALUE vparser, VALUE flag)
879
+ {
880
+ struct ruby_parser *parser;
881
+
882
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
883
+ rb_ruby_parser_set_yydebug(parser->parser_params, RTEST(flag));
884
+ return flag;
885
+ }
886
+
887
+ void
888
+ rb_set_script_lines_for(VALUE vparser, VALUE path)
889
+ {
890
+ struct ruby_parser *parser;
891
+ VALUE hash;
892
+ ID script_lines;
893
+ CONST_ID(script_lines, "SCRIPT_LINES__");
894
+ if (!rb_const_defined_at(rb_cObject, script_lines)) return;
895
+ hash = rb_const_get_at(rb_cObject, script_lines);
896
+ if (RB_TYPE_P(hash, T_HASH)) {
897
+ rb_hash_aset(hash, path, Qtrue);
898
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
899
+ rb_ruby_parser_set_script_lines(parser->parser_params);
900
+ }
901
+ }
902
+
903
+ VALUE
904
+ rb_parser_build_script_lines_from(rb_parser_ary_t *lines)
905
+ {
906
+ int i;
907
+ if (!lines) return Qnil;
908
+ if (lines->data_type != PARSER_ARY_DATA_SCRIPT_LINE) {
909
+ rb_bug("unexpected rb_parser_ary_data_type (%d) for script lines", lines->data_type);
910
+ }
911
+ VALUE script_lines = rb_ary_new_capa(lines->len);
912
+ for (i = 0; i < lines->len; i++) {
913
+ rb_parser_string_t *str = (rb_parser_string_t *)lines->data[i];
914
+ rb_ary_push(script_lines, rb_enc_str_new(str->ptr, str->len, str->enc));
915
+ }
916
+ return script_lines;
917
+ }
918
+
919
+ VALUE
920
+ rb_str_new_parser_string(rb_parser_string_t *str)
921
+ {
922
+ VALUE string = rb_enc_literal_str(str->ptr, str->len, str->enc);
923
+ rb_enc_str_coderange(string);
924
+ return string;
925
+ }
926
+
927
+ VALUE
928
+ rb_str_new_mutable_parser_string(rb_parser_string_t *str)
929
+ {
930
+ return rb_enc_str_new(str->ptr, str->len, str->enc);
931
+ }
932
+
933
+ static VALUE
934
+ negative_numeric(VALUE val)
935
+ {
936
+ if (FIXNUM_P(val)) {
937
+ return LONG2FIX(-FIX2LONG(val));
938
+ }
939
+ if (SPECIAL_CONST_P(val)) {
940
+ #if USE_FLONUM
941
+ if (FLONUM_P(val)) {
942
+ return DBL2NUM(-RFLOAT_VALUE(val));
943
+ }
944
+ #endif
945
+ goto unknown;
946
+ }
947
+ switch (BUILTIN_TYPE(val)) {
948
+ case T_BIGNUM:
949
+ BIGNUM_NEGATE(val);
950
+ val = rb_big_norm(val);
951
+ break;
952
+ case T_RATIONAL:
953
+ RATIONAL_SET_NUM(val, negative_numeric(RRATIONAL(val)->num));
954
+ break;
955
+ case T_COMPLEX:
956
+ RCOMPLEX_SET_REAL(val, negative_numeric(RCOMPLEX(val)->real));
957
+ RCOMPLEX_SET_IMAG(val, negative_numeric(RCOMPLEX(val)->imag));
958
+ break;
959
+ case T_FLOAT:
960
+ val = DBL2NUM(-RFLOAT_VALUE(val));
961
+ break;
962
+ unknown:
963
+ default:
964
+ rb_bug("unknown literal type (%s) passed to negative_numeric",
965
+ rb_builtin_class_name(val));
966
+ break;
967
+ }
968
+ return val;
969
+ }
970
+
971
+ static VALUE
972
+ integer_value(const char *val, int base)
973
+ {
974
+ return rb_cstr_to_inum(val, base, FALSE);
975
+ }
976
+
977
+ static VALUE
978
+ rational_value(const char *node_val, int base, int seen_point)
979
+ {
980
+ VALUE lit;
981
+ char* val = strdup(node_val);
982
+ if (seen_point > 0) {
983
+ int len = (int)(strlen(val));
984
+ char *point = &val[seen_point];
985
+ size_t fraclen = len-seen_point-1;
986
+ memmove(point, point+1, fraclen+1);
987
+
988
+ lit = rb_rational_new(integer_value(val, base), rb_int_positive_pow(10, fraclen));
989
+ }
990
+ else {
991
+ lit = rb_rational_raw1(integer_value(val, base));
992
+ }
993
+
994
+ free(val);
995
+
996
+ return lit;
997
+ }
998
+
999
+ VALUE
1000
+ rb_node_integer_literal_val(const NODE *n)
1001
+ {
1002
+ const rb_node_integer_t *node = RNODE_INTEGER(n);
1003
+ VALUE val = integer_value(node->val, node->base);
1004
+ if (node->minus) {
1005
+ val = negative_numeric(val);
1006
+ }
1007
+ return val;
1008
+ }
1009
+
1010
+ VALUE
1011
+ rb_node_float_literal_val(const NODE *n)
1012
+ {
1013
+ const rb_node_float_t *node = RNODE_FLOAT(n);
1014
+ double d = strtod(node->val, 0);
1015
+ if (node->minus) {
1016
+ d = -d;
1017
+ }
1018
+ VALUE val = DBL2NUM(d);
1019
+ return val;
1020
+ }
1021
+
1022
+ VALUE
1023
+ rb_node_rational_literal_val(const NODE *n)
1024
+ {
1025
+ VALUE lit;
1026
+ const rb_node_rational_t *node = RNODE_RATIONAL(n);
1027
+
1028
+ lit = rational_value(node->val, node->base, node->seen_point);
1029
+
1030
+ if (node->minus) {
1031
+ lit = negative_numeric(lit);
1032
+ }
1033
+
1034
+ return lit;
1035
+ }
1036
+
1037
+ VALUE
1038
+ rb_node_imaginary_literal_val(const NODE *n)
1039
+ {
1040
+ VALUE lit;
1041
+ const rb_node_imaginary_t *node = RNODE_IMAGINARY(n);
1042
+
1043
+ enum rb_numeric_type type = node->type;
1044
+
1045
+ switch (type) {
1046
+ case integer_literal:
1047
+ lit = integer_value(node->val, node->base);
1048
+ break;
1049
+ case float_literal:{
1050
+ double d = strtod(node->val, 0);
1051
+ lit = DBL2NUM(d);
1052
+ break;
1053
+ }
1054
+ case rational_literal:
1055
+ lit = rational_value(node->val, node->base, node->seen_point);
1056
+ break;
1057
+ default:
1058
+ rb_bug("unreachable");
1059
+ }
1060
+
1061
+ lit = rb_complex_raw(INT2FIX(0), lit);
1062
+
1063
+ if (node->minus) {
1064
+ lit = negative_numeric(lit);
1065
+ }
1066
+ return lit;
1067
+ }
1068
+
1069
+ VALUE
1070
+ rb_node_str_string_val(const NODE *node)
1071
+ {
1072
+ rb_parser_string_t *str = RNODE_STR(node)->string;
1073
+ return rb_str_new_parser_string(str);
1074
+ }
1075
+
1076
+ VALUE
1077
+ rb_node_sym_string_val(const NODE *node)
1078
+ {
1079
+ rb_parser_string_t *str = RNODE_SYM(node)->string;
1080
+ return ID2SYM(rb_intern3(str->ptr, str->len, str->enc));
1081
+ }
1082
+
1083
+ VALUE
1084
+ rb_node_dstr_string_val(const NODE *node)
1085
+ {
1086
+ rb_parser_string_t *str = RNODE_DSTR(node)->string;
1087
+ return str ? rb_str_new_parser_string(str) : Qnil;
1088
+ }
1089
+
1090
+ VALUE
1091
+ rb_node_dregx_string_val(const NODE *node)
1092
+ {
1093
+ rb_parser_string_t *str = RNODE_DREGX(node)->string;
1094
+ return rb_str_new_parser_string(str);
1095
+ }
1096
+
1097
+ VALUE
1098
+ rb_node_regx_string_val(const NODE *node)
1099
+ {
1100
+ rb_node_regx_t *node_reg = RNODE_REGX(node);
1101
+ rb_parser_string_t *string = node_reg->string;
1102
+ VALUE str = rb_enc_str_new(string->ptr, string->len, string->enc);
1103
+
1104
+ return rb_reg_compile(str, node_reg->options, NULL, 0);
1105
+ }
1106
+
1107
+ VALUE
1108
+ rb_node_line_lineno_val(const NODE *node)
1109
+ {
1110
+ return INT2FIX(node->nd_loc.beg_pos.lineno);
1111
+ }
1112
+
1113
+ VALUE
1114
+ rb_node_file_path_val(const NODE *node)
1115
+ {
1116
+ return rb_str_new_parser_string(RNODE_FILE(node)->path);
1117
+ }
1118
+
1119
+ VALUE
1120
+ rb_node_encoding_val(const NODE *node)
1121
+ {
1122
+ return rb_enc_from_encoding(RNODE_ENCODING(node)->enc);
1123
+ }
1124
+
1125
+ static void
1126
+ parser_aset_script_lines_for(VALUE path, rb_parser_ary_t *lines)
1127
+ {
1128
+ VALUE hash, script_lines;
1129
+ ID script_lines_id;
1130
+ if (NIL_P(path) || !lines) return;
1131
+ CONST_ID(script_lines_id, "SCRIPT_LINES__");
1132
+ if (!rb_const_defined_at(rb_cObject, script_lines_id)) return;
1133
+ hash = rb_const_get_at(rb_cObject, script_lines_id);
1134
+ if (!RB_TYPE_P(hash, T_HASH)) return;
1135
+ if (rb_hash_lookup(hash, path) == Qnil) return;
1136
+ script_lines = rb_parser_build_script_lines_from(lines);
1137
+ rb_hash_aset(hash, path, script_lines);
1138
+ }
1139
+
1140
+ VALUE
1141
+ rb_ruby_ast_new(const NODE *const root)
1142
+ {
1143
+ rb_ast_t *ast;
1144
+ VALUE ast_value = TypedData_Make_Struct(0, rb_ast_t, &ast_data_type, ast);
1145
+ #ifdef UNIVERSAL_PARSER
1146
+ ast->config = &rb_global_parser_config;
1147
+ #endif
1148
+ ast->body = (rb_ast_body_t){
1149
+ .root = root,
1150
+ .frozen_string_literal = -1,
1151
+ .coverage_enabled = -1,
1152
+ .script_lines = NULL,
1153
+ .line_count = 0,
1154
+ };
1155
+ return ast_value;
1156
+ }
1157
+
1158
+ rb_ast_t *
1159
+ rb_ruby_ast_data_get(VALUE ast_value)
1160
+ {
1161
+ rb_ast_t *ast;
1162
+ if (NIL_P(ast_value)) return NULL;
1163
+ TypedData_Get_Struct(ast_value, rb_ast_t, &ast_data_type, ast);
1164
+ return ast;
1165
+ }