kanayago 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. checksums.yaml +7 -0
  2. data/.rubocop.yml +15 -0
  3. data/.rubocop_todo.yml +23 -0
  4. data/LICENSE.txt +21 -0
  5. data/README.md +79 -0
  6. data/Rakefile +182 -0
  7. data/ext/kanayago/ccan/check_type/check_type.h +63 -0
  8. data/ext/kanayago/ccan/container_of/container_of.h +142 -0
  9. data/ext/kanayago/ccan/list/list.h +791 -0
  10. data/ext/kanayago/ccan/str/str.h +17 -0
  11. data/ext/kanayago/constant.h +53 -0
  12. data/ext/kanayago/extconf.rb +21 -0
  13. data/ext/kanayago/id.h +347 -0
  14. data/ext/kanayago/id_table.h +39 -0
  15. data/ext/kanayago/internal/array.h +151 -0
  16. data/ext/kanayago/internal/basic_operators.h +64 -0
  17. data/ext/kanayago/internal/bignum.h +244 -0
  18. data/ext/kanayago/internal/bits.h +568 -0
  19. data/ext/kanayago/internal/compile.h +34 -0
  20. data/ext/kanayago/internal/compilers.h +107 -0
  21. data/ext/kanayago/internal/complex.h +29 -0
  22. data/ext/kanayago/internal/encoding.h +36 -0
  23. data/ext/kanayago/internal/error.h +218 -0
  24. data/ext/kanayago/internal/fixnum.h +184 -0
  25. data/ext/kanayago/internal/gc.h +322 -0
  26. data/ext/kanayago/internal/hash.h +191 -0
  27. data/ext/kanayago/internal/imemo.h +261 -0
  28. data/ext/kanayago/internal/io.h +140 -0
  29. data/ext/kanayago/internal/numeric.h +274 -0
  30. data/ext/kanayago/internal/parse.h +117 -0
  31. data/ext/kanayago/internal/rational.h +71 -0
  32. data/ext/kanayago/internal/re.h +28 -0
  33. data/ext/kanayago/internal/ruby_parser.h +125 -0
  34. data/ext/kanayago/internal/sanitizers.h +297 -0
  35. data/ext/kanayago/internal/serial.h +23 -0
  36. data/ext/kanayago/internal/static_assert.h +16 -0
  37. data/ext/kanayago/internal/string.h +186 -0
  38. data/ext/kanayago/internal/symbol.h +45 -0
  39. data/ext/kanayago/internal/thread.h +79 -0
  40. data/ext/kanayago/internal/variable.h +72 -0
  41. data/ext/kanayago/internal/vm.h +137 -0
  42. data/ext/kanayago/internal/warnings.h +16 -0
  43. data/ext/kanayago/internal.h +108 -0
  44. data/ext/kanayago/kanayago.c +420 -0
  45. data/ext/kanayago/kanayago.h +21 -0
  46. data/ext/kanayago/lex.c +302 -0
  47. data/ext/kanayago/method.h +255 -0
  48. data/ext/kanayago/node.c +440 -0
  49. data/ext/kanayago/node.h +111 -0
  50. data/ext/kanayago/node_name.inc +224 -0
  51. data/ext/kanayago/parse.c +26931 -0
  52. data/ext/kanayago/parse.h +244 -0
  53. data/ext/kanayago/parse.tmp.y +16145 -0
  54. data/ext/kanayago/parser_bits.h +564 -0
  55. data/ext/kanayago/parser_node.h +32 -0
  56. data/ext/kanayago/parser_st.c +164 -0
  57. data/ext/kanayago/parser_st.h +162 -0
  58. data/ext/kanayago/parser_value.h +106 -0
  59. data/ext/kanayago/probes.h +4 -0
  60. data/ext/kanayago/ruby_assert.h +14 -0
  61. data/ext/kanayago/ruby_atomic.h +23 -0
  62. data/ext/kanayago/ruby_parser.c +1165 -0
  63. data/ext/kanayago/rubyparser.h +1391 -0
  64. data/ext/kanayago/shape.h +234 -0
  65. data/ext/kanayago/st.c +2339 -0
  66. data/ext/kanayago/symbol.h +123 -0
  67. data/ext/kanayago/thread_pthread.h +168 -0
  68. data/ext/kanayago/universal_parser.c +230 -0
  69. data/ext/kanayago/vm_core.h +2215 -0
  70. data/ext/kanayago/vm_opts.h +67 -0
  71. data/lib/kanayago/version.rb +5 -0
  72. data/lib/kanayago.rb +11 -0
  73. data/sig/kanayago.rbs +4 -0
  74. metadata +116 -0
@@ -0,0 +1,1165 @@
1
+ /* This is a wrapper for parse.y */
2
+
3
+ #include "internal/parse.h"
4
+ #include "internal/re.h"
5
+ #include "internal/ruby_parser.h"
6
+
7
+ #include "node.h"
8
+ #include "rubyparser.h"
9
+ #include "internal/error.h"
10
+
11
+ #ifdef UNIVERSAL_PARSER
12
+
13
+ #include "internal.h"
14
+ #include "internal/array.h"
15
+ #include "internal/bignum.h"
16
+ #include "internal/compile.h"
17
+ #include "internal/complex.h"
18
+ #include "internal/encoding.h"
19
+ #include "internal/gc.h"
20
+ #include "internal/hash.h"
21
+ #include "internal/io.h"
22
+ #include "internal/rational.h"
23
+ #include "internal/re.h"
24
+ #include "internal/string.h"
25
+ #include "internal/symbol.h"
26
+ #include "internal/thread.h"
27
+
28
+ #include "ruby/ractor.h"
29
+ #include "ruby/ruby.h"
30
+ #include "ruby/util.h"
31
+ #include "internal.h"
32
+ #include "vm_core.h"
33
+ #include "symbol.h"
34
+
35
+ #define parser_encoding const void
36
+
37
+ static int
38
+ is_ascii_string2(VALUE str)
39
+ {
40
+ return is_ascii_string(str);
41
+ }
42
+
43
+ RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 6, 0)
44
+ static VALUE
45
+ syntax_error_append(VALUE exc, VALUE file, int line, int column,
46
+ parser_encoding *enc, const char *fmt, va_list args)
47
+ {
48
+ return rb_syntax_error_append(exc, file, line, column, enc, fmt, args);
49
+ }
50
+
51
+ static int
52
+ local_defined(ID id, const void *p)
53
+ {
54
+ return rb_local_defined(id, (const rb_iseq_t *)p);
55
+ }
56
+
57
+ static int
58
+ dvar_defined(ID id, const void *p)
59
+ {
60
+ return rb_dvar_defined(id, (const rb_iseq_t *)p);
61
+ }
62
+
63
+ static int
64
+ is_usascii_enc(parser_encoding *enc)
65
+ {
66
+ return rb_is_usascii_enc(enc);
67
+ }
68
+
69
+ static int
70
+ is_local_id2(ID id)
71
+ {
72
+ return is_local_id(id);
73
+ }
74
+
75
+ static int
76
+ is_attrset_id2(ID id)
77
+ {
78
+ return is_attrset_id(id);
79
+ }
80
+
81
+ static int
82
+ is_notop_id2(ID id)
83
+ {
84
+ return is_notop_id(id);
85
+ }
86
+
87
+ static VALUE
88
+ enc_str_new(const char *ptr, long len, parser_encoding *enc)
89
+ {
90
+ return rb_enc_str_new(ptr, len, enc);
91
+ }
92
+
93
+ static int
94
+ enc_isalnum(OnigCodePoint c, parser_encoding *enc)
95
+ {
96
+ return rb_enc_isalnum(c, enc);
97
+ }
98
+
99
+ static int
100
+ enc_precise_mbclen(const char *p, const char *e, parser_encoding *enc)
101
+ {
102
+ return rb_enc_precise_mbclen(p, e, enc);
103
+ }
104
+
105
+ static int
106
+ mbclen_charfound_p(int len)
107
+ {
108
+ return MBCLEN_CHARFOUND_P(len);
109
+ }
110
+
111
+ static int
112
+ mbclen_charfound_len(int len)
113
+ {
114
+ return MBCLEN_CHARFOUND_LEN(len);
115
+ }
116
+
117
+ static const char *
118
+ enc_name(parser_encoding *enc)
119
+ {
120
+ return rb_enc_name(enc);
121
+ }
122
+
123
+ static char *
124
+ enc_prev_char(const char *s, const char *p, const char *e, parser_encoding *enc)
125
+ {
126
+ return rb_enc_prev_char(s, p, e, enc);
127
+ }
128
+
129
+ static parser_encoding *
130
+ enc_get(VALUE obj)
131
+ {
132
+ return rb_enc_get(obj);
133
+ }
134
+
135
+ static int
136
+ enc_asciicompat(parser_encoding *enc)
137
+ {
138
+ return rb_enc_asciicompat(enc);
139
+ }
140
+
141
+ static parser_encoding *
142
+ utf8_encoding(void)
143
+ {
144
+ return rb_utf8_encoding();
145
+ }
146
+
147
+ static VALUE
148
+ enc_associate(VALUE obj, parser_encoding *enc)
149
+ {
150
+ return rb_enc_associate(obj, enc);
151
+ }
152
+
153
+ static parser_encoding *
154
+ ascii8bit_encoding(void)
155
+ {
156
+ return rb_ascii8bit_encoding();
157
+ }
158
+
159
+ static int
160
+ enc_codelen(int c, parser_encoding *enc)
161
+ {
162
+ return rb_enc_codelen(c, enc);
163
+ }
164
+
165
+ static int
166
+ enc_mbcput(unsigned int c, void *buf, parser_encoding *enc)
167
+ {
168
+ return rb_enc_mbcput(c, buf, enc);
169
+ }
170
+
171
+ static parser_encoding *
172
+ enc_from_index(int idx)
173
+ {
174
+ return rb_enc_from_index(idx);
175
+ }
176
+
177
+ static int
178
+ enc_isspace(OnigCodePoint c, parser_encoding *enc)
179
+ {
180
+ return rb_enc_isspace(c, enc);
181
+ }
182
+
183
+ static ID
184
+ intern3(const char *name, long len, parser_encoding *enc)
185
+ {
186
+ return rb_intern3(name, len, enc);
187
+ }
188
+
189
+ static parser_encoding *
190
+ usascii_encoding(void)
191
+ {
192
+ return rb_usascii_encoding();
193
+ }
194
+
195
+ static int
196
+ enc_symname_type(const char *name, long len, parser_encoding *enc, unsigned int allowed_attrset)
197
+ {
198
+ return rb_enc_symname_type(name, len, enc, allowed_attrset);
199
+ }
200
+
201
+ typedef struct {
202
+ struct parser_params *parser;
203
+ rb_encoding *enc;
204
+ NODE *succ_block;
205
+ const rb_code_location_t *loc;
206
+ } reg_named_capture_assign_t;
207
+
208
+ static int
209
+ reg_named_capture_assign_iter(const OnigUChar *name, const OnigUChar *name_end,
210
+ int back_num, int *back_refs, OnigRegex regex, void *arg0)
211
+ {
212
+ reg_named_capture_assign_t *arg = (reg_named_capture_assign_t*)arg0;
213
+ struct parser_params* p = arg->parser;
214
+ rb_encoding *enc = arg->enc;
215
+ const rb_code_location_t *loc = arg->loc;
216
+ long len = name_end - name;
217
+ const char *s = (const char *)name;
218
+
219
+ return rb_reg_named_capture_assign_iter_impl(p, s, len, enc, &arg->succ_block, loc);
220
+ }
221
+
222
+ static NODE *
223
+ reg_named_capture_assign(struct parser_params* p, VALUE regexp, const rb_code_location_t *loc)
224
+ {
225
+ reg_named_capture_assign_t arg;
226
+
227
+ arg.parser = p;
228
+ arg.enc = rb_enc_get(regexp);
229
+ arg.succ_block = 0;
230
+ arg.loc = loc;
231
+ onig_foreach_name(RREGEXP_PTR(regexp), reg_named_capture_assign_iter, &arg);
232
+
233
+ if (!arg.succ_block) return 0;
234
+ return RNODE_BLOCK(arg.succ_block)->nd_next;
235
+ }
236
+
237
+ static int
238
+ rtest(VALUE obj)
239
+ {
240
+ return (int)RB_TEST(obj);
241
+ }
242
+
243
+ static int
244
+ nil_p(VALUE obj)
245
+ {
246
+ return (int)NIL_P(obj);
247
+ }
248
+
249
+ static VALUE
250
+ syntax_error_new(void)
251
+ {
252
+ return rb_class_new_instance(0, 0, rb_eSyntaxError);
253
+ }
254
+
255
+ static void *
256
+ memmove2(void *dest, const void *src, size_t t, size_t n)
257
+ {
258
+ return memmove(dest, src, rbimpl_size_mul_or_raise(t, n));
259
+ }
260
+
261
+ static void *
262
+ nonempty_memcpy(void *dest, const void *src, size_t t, size_t n)
263
+ {
264
+ return ruby_nonempty_memcpy(dest, src, rbimpl_size_mul_or_raise(t, n));
265
+ }
266
+
267
+ static VALUE
268
+ ruby_verbose2(void)
269
+ {
270
+ return ruby_verbose;
271
+ }
272
+
273
+ static int *
274
+ rb_errno_ptr2(void)
275
+ {
276
+ return rb_errno_ptr();
277
+ }
278
+
279
+ static void *
280
+ zalloc(size_t elemsiz)
281
+ {
282
+ return ruby_xcalloc(1, elemsiz);
283
+ }
284
+
285
+ static void
286
+ gc_guard(VALUE obj)
287
+ {
288
+ RB_GC_GUARD(obj);
289
+ }
290
+
291
+ static VALUE
292
+ arg_error(void)
293
+ {
294
+ return rb_eArgError;
295
+ }
296
+
297
+ static VALUE
298
+ static_id2sym(ID id)
299
+ {
300
+ return (((VALUE)(id)<<RUBY_SPECIAL_SHIFT)|SYMBOL_FLAG);
301
+ }
302
+
303
+ static long
304
+ str_coderange_scan_restartable(const char *s, const char *e, parser_encoding *enc, int *cr)
305
+ {
306
+ return rb_str_coderange_scan_restartable(s, e, enc, cr);
307
+ }
308
+
309
+ static int
310
+ enc_mbminlen(parser_encoding *enc)
311
+ {
312
+ return rb_enc_mbminlen(enc);
313
+ }
314
+
315
+ static bool
316
+ enc_isascii(OnigCodePoint c, parser_encoding *enc)
317
+ {
318
+ return rb_enc_isascii(c, enc);
319
+ }
320
+
321
+ static OnigCodePoint
322
+ enc_mbc_to_codepoint(const char *p, const char *e, parser_encoding *enc)
323
+ {
324
+ const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
325
+ const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
326
+
327
+ return ONIGENC_MBC_TO_CODE((rb_encoding *)enc, up, ue);
328
+ }
329
+
330
+ extern VALUE rb_eArgError;
331
+
332
+ // Add for Kanayago
333
+ static void *
334
+ xmalloc_mul_add(size_t x, size_t y, size_t z)
335
+ {
336
+ return rb_xmalloc_mul_add(x, y, z);
337
+ }
338
+
339
+ static VALUE
340
+ suppress_tracing(VALUE (*func)(VALUE), VALUE arg)
341
+ {
342
+ return rb_suppress_tracing(func, arg);
343
+ }
344
+
345
+ static ID
346
+ make_temporary_id(size_t n)
347
+ {
348
+ return rb_make_temporary_id(n);
349
+ }
350
+
351
+ static int
352
+ stderr_tty_p(void)
353
+ {
354
+ return rb_stderr_tty_p();
355
+ }
356
+
357
+ static VALUE
358
+ reg_compile(VALUE str, int options, const char *sourcefile, int sourceline)
359
+ {
360
+ return rb_reg_compile(str, options, sourcefile, sourceline);
361
+ }
362
+
363
+ static VALUE
364
+ reg_check_preprocess(VALUE val)
365
+ {
366
+ return rb_reg_check_preprocess(val);
367
+ }
368
+ // End of Add for Kanayago
369
+
370
+ static const rb_parser_config_t rb_global_parser_config = {
371
+ .malloc = ruby_xmalloc,
372
+ .calloc = ruby_xcalloc,
373
+ .realloc = ruby_xrealloc,
374
+ .free = ruby_xfree,
375
+ .alloc_n = ruby_xmalloc2,
376
+ .alloc = ruby_xmalloc,
377
+ .realloc_n = ruby_xrealloc2,
378
+ .zalloc = zalloc,
379
+ .rb_memmove = memmove2,
380
+ .nonempty_memcpy = nonempty_memcpy,
381
+ .xmalloc_mul_add = xmalloc_mul_add, // use xmalloc_mul_add for Kanayago
382
+
383
+ .compile_callback = suppress_tracing, // use suppress_tracing for Kanayago
384
+ .reg_named_capture_assign = reg_named_capture_assign,
385
+
386
+ .attr_get = rb_attr_get,
387
+
388
+ .ary_new = rb_ary_new,
389
+ .ary_push = rb_ary_push,
390
+ .ary_new_from_args = rb_ary_new_from_args,
391
+ .ary_unshift = rb_ary_unshift,
392
+
393
+ .make_temporary_id = make_temporary_id, // use make_temporary_id for Kanayago
394
+ .is_local_id = is_local_id2,
395
+ .is_attrset_id = is_attrset_id2,
396
+ .is_global_name_punct = is_global_name_punct,
397
+ .id_type = id_type,
398
+ .id_attrset = rb_id_attrset,
399
+ .intern = rb_intern,
400
+ .intern2 = rb_intern2,
401
+ .intern3 = intern3,
402
+ .intern_str = rb_intern_str,
403
+ .is_notop_id = is_notop_id2,
404
+ .enc_symname_type = enc_symname_type,
405
+ .id2name = rb_id2name,
406
+ .id2str = rb_id2str,
407
+ .id2sym = rb_id2sym,
408
+ .sym2id = rb_sym2id,
409
+
410
+ .str_catf = rb_str_catf,
411
+ .str_cat_cstr = rb_str_cat_cstr,
412
+ .str_modify = rb_str_modify,
413
+ .str_set_len = rb_str_set_len,
414
+ .str_cat = rb_str_cat,
415
+ .str_resize = rb_str_resize,
416
+ .str_new = rb_str_new,
417
+ .str_new_cstr = rb_str_new_cstr,
418
+ .str_to_interned_str = rb_str_to_interned_str,
419
+ .is_ascii_string = is_ascii_string2,
420
+ .enc_str_new = enc_str_new,
421
+ .str_vcatf = rb_str_vcatf,
422
+ .rb_sprintf = rb_sprintf,
423
+ .rstring_ptr = RSTRING_PTR,
424
+ .rstring_end = RSTRING_END,
425
+ .rstring_len = RSTRING_LEN,
426
+ .obj_as_string = rb_obj_as_string,
427
+
428
+ .int2num = rb_int2num_inline,
429
+
430
+ .stderr_tty_p = stderr_tty_p, //use stderr_tty_p for Kanayago
431
+ .write_error_str = rb_write_error_str,
432
+ .io_write = rb_io_write,
433
+ .io_flush = rb_io_flush,
434
+ .io_puts = rb_io_puts,
435
+
436
+ .debug_output_stdout = rb_ractor_stdout,
437
+ .debug_output_stderr = rb_ractor_stderr,
438
+
439
+ .is_usascii_enc = is_usascii_enc,
440
+ .enc_isalnum = enc_isalnum,
441
+ .enc_precise_mbclen = enc_precise_mbclen,
442
+ .mbclen_charfound_p = mbclen_charfound_p,
443
+ .mbclen_charfound_len = mbclen_charfound_len,
444
+ .enc_name = enc_name,
445
+ .enc_prev_char = enc_prev_char,
446
+ .enc_get = enc_get,
447
+ .enc_asciicompat = enc_asciicompat,
448
+ .utf8_encoding = utf8_encoding,
449
+ .enc_associate = enc_associate,
450
+ .ascii8bit_encoding = ascii8bit_encoding,
451
+ .enc_codelen = enc_codelen,
452
+ .enc_mbcput = enc_mbcput,
453
+ .enc_find_index = rb_enc_find_index,
454
+ .enc_from_index = enc_from_index,
455
+ .enc_isspace = enc_isspace,
456
+ .enc_coderange_7bit = ENC_CODERANGE_7BIT,
457
+ .enc_coderange_unknown = ENC_CODERANGE_UNKNOWN,
458
+ .usascii_encoding = usascii_encoding,
459
+ .enc_mbminlen = enc_mbminlen,
460
+ .enc_isascii = enc_isascii,
461
+ .enc_mbc_to_codepoint = enc_mbc_to_codepoint,
462
+
463
+ .local_defined = local_defined,
464
+ .dvar_defined = dvar_defined,
465
+
466
+ .syntax_error_append = syntax_error_append,
467
+ .raise = rb_raise,
468
+ .syntax_error_new = syntax_error_new,
469
+
470
+ .errinfo = rb_errinfo,
471
+ .set_errinfo = rb_set_errinfo,
472
+ .exc_raise = rb_exc_raise,
473
+ .make_exception = rb_make_exception,
474
+
475
+ .sized_xfree = ruby_sized_xfree,
476
+ .sized_realloc_n = ruby_sized_realloc_n,
477
+ .gc_guard = gc_guard,
478
+ .gc_mark = rb_gc_mark,
479
+
480
+ .reg_compile = reg_compile, // use reg_compile for Kanayago
481
+ .reg_check_preprocess = reg_check_preprocess, // use reg_check_preprocess for Kanayago
482
+ .memcicmp = rb_memcicmp,
483
+
484
+ .compile_warn = rb_compile_warn,
485
+ .compile_warning = rb_compile_warning,
486
+ .bug = rb_bug,
487
+ .fatal = rb_fatal,
488
+ .verbose = ruby_verbose2,
489
+ .errno_ptr = rb_errno_ptr2,
490
+
491
+ .make_backtrace = rb_make_backtrace,
492
+
493
+ .scan_hex = ruby_scan_hex,
494
+ .scan_oct = ruby_scan_oct,
495
+ .scan_digits = ruby_scan_digits,
496
+ .strtod = ruby_strtod,
497
+
498
+ .rtest = rtest,
499
+ .nil_p = nil_p,
500
+ .qnil = Qnil,
501
+ .qfalse = Qfalse,
502
+ .eArgError = arg_error,
503
+ .long2int = rb_long2int,
504
+
505
+ /* For Ripper */
506
+ .static_id2sym = static_id2sym,
507
+ .str_coderange_scan_restartable = str_coderange_scan_restartable,
508
+ };
509
+ #endif
510
+
511
+ static void
512
+ parser_mark(void *ptr)
513
+ {
514
+ struct ruby_parser *parser = (struct ruby_parser*)ptr;
515
+ rb_ruby_parser_mark(parser->parser_params);
516
+
517
+ switch (parser->type) {
518
+ case lex_type_str:
519
+ rb_gc_mark(parser->data.lex_str.str);
520
+ break;
521
+ case lex_type_io:
522
+ rb_gc_mark(parser->data.lex_io.file);
523
+ break;
524
+ case lex_type_array:
525
+ rb_gc_mark(parser->data.lex_array.ary);
526
+ break;
527
+ case lex_type_generic:
528
+ /* noop. Caller of rb_parser_compile_generic should mark the objects. */
529
+ break;
530
+ }
531
+ }
532
+
533
+ static void
534
+ parser_free(void *ptr)
535
+ {
536
+ struct ruby_parser *parser = (struct ruby_parser*)ptr;
537
+ rb_ruby_parser_free(parser->parser_params);
538
+ xfree(parser);
539
+ }
540
+
541
+ static size_t
542
+ parser_memsize(const void *ptr)
543
+ {
544
+ struct ruby_parser *parser = (struct ruby_parser*)ptr;
545
+ return rb_ruby_parser_memsize(parser->parser_params);
546
+ }
547
+
548
+ // Not static const for Kanayago
549
+ const rb_data_type_t ruby_parser_data_type = {
550
+ "parser",
551
+ {
552
+ parser_mark,
553
+ parser_free,
554
+ parser_memsize,
555
+ },
556
+ 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
557
+ };
558
+
559
+ #ifdef UNIVERSAL_PARSER
560
+ const rb_parser_config_t *
561
+ rb_ruby_parser_config(void)
562
+ {
563
+ return &rb_global_parser_config;
564
+ }
565
+
566
+ rb_parser_t *
567
+ rb_parser_params_new(void)
568
+ {
569
+ return rb_ruby_parser_new(&rb_global_parser_config);
570
+ }
571
+ #else
572
+ rb_parser_t *
573
+ rb_parser_params_new(void)
574
+ {
575
+ return rb_ruby_parser_new();
576
+ }
577
+ #endif /* UNIVERSAL_PARSER */
578
+
579
+ VALUE
580
+ rb_parser_new(void)
581
+ {
582
+ struct ruby_parser *parser;
583
+ rb_parser_t *parser_params;
584
+
585
+ /*
586
+ * Create parser_params ahead of vparser because
587
+ * rb_ruby_parser_new can run GC so if create vparser
588
+ * first, parser_mark tries to mark not initialized parser_params.
589
+ */
590
+ parser_params = rb_parser_params_new();
591
+ VALUE vparser = TypedData_Make_Struct(0, struct ruby_parser,
592
+ &ruby_parser_data_type, parser);
593
+ parser->parser_params = parser_params;
594
+
595
+ return vparser;
596
+ }
597
+
598
+ void
599
+ rb_parser_set_options(VALUE vparser, int print, int loop, int chomp, int split)
600
+ {
601
+ struct ruby_parser *parser;
602
+
603
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
604
+ rb_ruby_parser_set_options(parser->parser_params, print, loop, chomp, split);
605
+ }
606
+
607
+ VALUE
608
+ rb_parser_set_context(VALUE vparser, const struct rb_iseq_struct *base, int main)
609
+ {
610
+ struct ruby_parser *parser;
611
+
612
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
613
+ rb_ruby_parser_set_context(parser->parser_params, base, main);
614
+ return vparser;
615
+ }
616
+
617
+ void
618
+ rb_parser_set_script_lines(VALUE vparser)
619
+ {
620
+ struct ruby_parser *parser;
621
+
622
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
623
+ rb_ruby_parser_set_script_lines(parser->parser_params);
624
+ }
625
+
626
+ void
627
+ rb_parser_error_tolerant(VALUE vparser)
628
+ {
629
+ struct ruby_parser *parser;
630
+
631
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
632
+ rb_ruby_parser_error_tolerant(parser->parser_params);
633
+ }
634
+
635
+ void
636
+ rb_parser_keep_tokens(VALUE vparser)
637
+ {
638
+ struct ruby_parser *parser;
639
+
640
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
641
+ rb_ruby_parser_keep_tokens(parser->parser_params);
642
+ }
643
+
644
+ rb_parser_string_t *
645
+ rb_parser_lex_get_str(struct parser_params *p, struct lex_pointer_string *ptr_str)
646
+ {
647
+ char *beg, *end, *start;
648
+ long len;
649
+ VALUE s = ptr_str->str;
650
+
651
+ beg = RSTRING_PTR(s);
652
+ len = RSTRING_LEN(s);
653
+ start = beg;
654
+ if (ptr_str->ptr) {
655
+ if (len == ptr_str->ptr) return 0;
656
+ beg += ptr_str->ptr;
657
+ len -= ptr_str->ptr;
658
+ }
659
+ end = memchr(beg, '\n', len);
660
+ if (end) len = ++end - beg;
661
+ ptr_str->ptr += len;
662
+ return rb_str_to_parser_string(p, rb_str_subseq(s, beg - start, len));
663
+ }
664
+
665
+ static rb_parser_string_t *
666
+ lex_get_str(struct parser_params *p, rb_parser_input_data input, int line_count)
667
+ {
668
+ return rb_parser_lex_get_str(p, (struct lex_pointer_string *)input);
669
+ }
670
+
671
+ static void parser_aset_script_lines_for(VALUE path, rb_parser_ary_t *lines);
672
+
673
+ static rb_ast_t*
674
+ parser_compile(rb_parser_t *p, rb_parser_lex_gets_func *gets, VALUE fname, rb_parser_input_data input, int line)
675
+ {
676
+ rb_ast_t *ast = rb_parser_compile(p, gets, fname, input, line);
677
+ parser_aset_script_lines_for(fname, ast->body.script_lines);
678
+ return ast;
679
+ }
680
+
681
+ static rb_ast_t*
682
+ parser_compile_string0(struct ruby_parser *parser, VALUE fname, VALUE s, int line)
683
+ {
684
+ VALUE str = rb_str_new_frozen(s);
685
+
686
+ parser->type = lex_type_str;
687
+ parser->data.lex_str.str = str;
688
+ parser->data.lex_str.ptr = 0;
689
+
690
+ return parser_compile(parser->parser_params, lex_get_str, fname, (rb_parser_input_data)&parser->data, line);
691
+ }
692
+
693
+ static rb_encoding *
694
+ must_be_ascii_compatible(VALUE s)
695
+ {
696
+ rb_encoding *enc = rb_enc_get(s);
697
+ if (!rb_enc_asciicompat(enc)) {
698
+ rb_raise(rb_eArgError, "invalid source encoding");
699
+ }
700
+ return enc;
701
+ }
702
+
703
+ static rb_ast_t*
704
+ parser_compile_string_path(struct ruby_parser *parser, VALUE f, VALUE s, int line)
705
+ {
706
+ must_be_ascii_compatible(s);
707
+ return parser_compile_string0(parser, f, s, line);
708
+ }
709
+
710
+ static rb_ast_t*
711
+ parser_compile_string(struct ruby_parser *parser, const char *f, VALUE s, int line)
712
+ {
713
+ return parser_compile_string_path(parser, rb_filesystem_str_new_cstr(f), s, line);
714
+ }
715
+
716
+ VALUE rb_io_gets_internal(VALUE io);
717
+
718
+ static rb_parser_string_t *
719
+ lex_io_gets(struct parser_params *p, rb_parser_input_data input, int line_count)
720
+ {
721
+ VALUE io = (VALUE)input;
722
+ VALUE line = rb_io_gets_internal(io);
723
+ if (NIL_P(line)) return 0;
724
+ return rb_str_to_parser_string(p, line);
725
+ }
726
+
727
+ static rb_parser_string_t *
728
+ lex_gets_array(struct parser_params *p, rb_parser_input_data data, int index)
729
+ {
730
+ VALUE array = (VALUE)data;
731
+ VALUE str = rb_ary_entry(array, index);
732
+ if (!NIL_P(str)) {
733
+ StringValue(str);
734
+ if (!rb_enc_asciicompat(rb_enc_get(str))) {
735
+ rb_raise(rb_eArgError, "invalid source encoding");
736
+ }
737
+ return rb_str_to_parser_string(p, str);
738
+ }
739
+ else {
740
+ return 0;
741
+ }
742
+ }
743
+
744
+ static rb_ast_t*
745
+ parser_compile_file_path(struct ruby_parser *parser, VALUE fname, VALUE file, int start)
746
+ {
747
+ parser->type = lex_type_io;
748
+ parser->data.lex_io.file = file;
749
+
750
+ return parser_compile(parser->parser_params, lex_io_gets, fname, (rb_parser_input_data)file, start);
751
+ }
752
+
753
+ static rb_ast_t*
754
+ parser_compile_array(struct ruby_parser *parser, VALUE fname, VALUE array, int start)
755
+ {
756
+ parser->type = lex_type_array;
757
+ parser->data.lex_array.ary = array;
758
+
759
+ return parser_compile(parser->parser_params, lex_gets_array, fname, (rb_parser_input_data)array, start);
760
+ }
761
+
762
+ static rb_ast_t*
763
+ parser_compile_generic(struct ruby_parser *parser, rb_parser_lex_gets_func *lex_gets, VALUE fname, VALUE input, int start)
764
+ {
765
+ parser->type = lex_type_generic;
766
+
767
+ return parser_compile(parser->parser_params, lex_gets, fname, (rb_parser_input_data)input, start);
768
+ }
769
+
770
+ static void
771
+ ast_free(void *ptr)
772
+ {
773
+ rb_ast_t *ast = (rb_ast_t *)ptr;
774
+ rb_ast_free(ast);
775
+ }
776
+
777
+ // Not static const for Kanayago
778
+ const rb_data_type_t ast_data_type = {
779
+ "AST",
780
+ {
781
+ NULL,
782
+ ast_free,
783
+ NULL, // No dsize() because this object does not appear in ObjectSpace.
784
+ },
785
+ 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
786
+ };
787
+
788
+ static VALUE
789
+ ast_alloc(void)
790
+ {
791
+ return TypedData_Wrap_Struct(0, &ast_data_type, NULL);
792
+ }
793
+
794
+ VALUE
795
+ rb_parser_compile_file_path(VALUE vparser, VALUE fname, VALUE file, int start)
796
+ {
797
+ struct ruby_parser *parser;
798
+ VALUE ast_value = ast_alloc();
799
+
800
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
801
+ DATA_PTR(ast_value) = parser_compile_file_path(parser, fname, file, start);
802
+ RB_GC_GUARD(vparser);
803
+
804
+ return ast_value;
805
+ }
806
+
807
+ VALUE
808
+ rb_parser_compile_array(VALUE vparser, VALUE fname, VALUE array, int start)
809
+ {
810
+ struct ruby_parser *parser;
811
+ VALUE ast_value = ast_alloc();
812
+
813
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
814
+ DATA_PTR(ast_value) = parser_compile_array(parser, fname, array, start);
815
+ RB_GC_GUARD(vparser);
816
+
817
+ return ast_value;
818
+ }
819
+
820
+ VALUE
821
+ rb_parser_compile_generic(VALUE vparser, rb_parser_lex_gets_func *lex_gets, VALUE fname, VALUE input, int start)
822
+ {
823
+ struct ruby_parser *parser;
824
+ VALUE ast_value = ast_alloc();
825
+
826
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
827
+ DATA_PTR(ast_value) = parser_compile_generic(parser, lex_gets, fname, input, start);
828
+ RB_GC_GUARD(vparser);
829
+
830
+ return ast_value;
831
+ }
832
+
833
+ VALUE
834
+ rb_parser_compile_string(VALUE vparser, const char *f, VALUE s, int line)
835
+ {
836
+ struct ruby_parser *parser;
837
+ VALUE ast_value = ast_alloc();
838
+
839
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
840
+ DATA_PTR(ast_value) = parser_compile_string(parser, f, s, line);
841
+ RB_GC_GUARD(vparser);
842
+
843
+ return ast_value;
844
+ }
845
+
846
+ VALUE
847
+ rb_parser_compile_string_path(VALUE vparser, VALUE f, VALUE s, int line)
848
+ {
849
+ struct ruby_parser *parser;
850
+ VALUE ast_value = ast_alloc();
851
+
852
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
853
+ DATA_PTR(ast_value) = parser_compile_string_path(parser, f, s, line);
854
+ RB_GC_GUARD(vparser);
855
+
856
+ return ast_value;
857
+ }
858
+
859
+ VALUE
860
+ rb_parser_encoding(VALUE vparser)
861
+ {
862
+ struct ruby_parser *parser;
863
+
864
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
865
+ return rb_enc_from_encoding(rb_ruby_parser_encoding(parser->parser_params));
866
+ }
867
+
868
+ VALUE
869
+ rb_parser_end_seen_p(VALUE vparser)
870
+ {
871
+ struct ruby_parser *parser;
872
+
873
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
874
+ return RBOOL(rb_ruby_parser_end_seen_p(parser->parser_params));
875
+ }
876
+
877
+ VALUE
878
+ rb_parser_set_yydebug(VALUE vparser, VALUE flag)
879
+ {
880
+ struct ruby_parser *parser;
881
+
882
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
883
+ rb_ruby_parser_set_yydebug(parser->parser_params, RTEST(flag));
884
+ return flag;
885
+ }
886
+
887
+ void
888
+ rb_set_script_lines_for(VALUE vparser, VALUE path)
889
+ {
890
+ struct ruby_parser *parser;
891
+ VALUE hash;
892
+ ID script_lines;
893
+ CONST_ID(script_lines, "SCRIPT_LINES__");
894
+ if (!rb_const_defined_at(rb_cObject, script_lines)) return;
895
+ hash = rb_const_get_at(rb_cObject, script_lines);
896
+ if (RB_TYPE_P(hash, T_HASH)) {
897
+ rb_hash_aset(hash, path, Qtrue);
898
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
899
+ rb_ruby_parser_set_script_lines(parser->parser_params);
900
+ }
901
+ }
902
+
903
+ VALUE
904
+ rb_parser_build_script_lines_from(rb_parser_ary_t *lines)
905
+ {
906
+ int i;
907
+ if (!lines) return Qnil;
908
+ if (lines->data_type != PARSER_ARY_DATA_SCRIPT_LINE) {
909
+ rb_bug("unexpected rb_parser_ary_data_type (%d) for script lines", lines->data_type);
910
+ }
911
+ VALUE script_lines = rb_ary_new_capa(lines->len);
912
+ for (i = 0; i < lines->len; i++) {
913
+ rb_parser_string_t *str = (rb_parser_string_t *)lines->data[i];
914
+ rb_ary_push(script_lines, rb_enc_str_new(str->ptr, str->len, str->enc));
915
+ }
916
+ return script_lines;
917
+ }
918
+
919
+ VALUE
920
+ rb_str_new_parser_string(rb_parser_string_t *str)
921
+ {
922
+ VALUE string = rb_enc_literal_str(str->ptr, str->len, str->enc);
923
+ rb_enc_str_coderange(string);
924
+ return string;
925
+ }
926
+
927
+ VALUE
928
+ rb_str_new_mutable_parser_string(rb_parser_string_t *str)
929
+ {
930
+ return rb_enc_str_new(str->ptr, str->len, str->enc);
931
+ }
932
+
933
+ static VALUE
934
+ negative_numeric(VALUE val)
935
+ {
936
+ if (FIXNUM_P(val)) {
937
+ return LONG2FIX(-FIX2LONG(val));
938
+ }
939
+ if (SPECIAL_CONST_P(val)) {
940
+ #if USE_FLONUM
941
+ if (FLONUM_P(val)) {
942
+ return DBL2NUM(-RFLOAT_VALUE(val));
943
+ }
944
+ #endif
945
+ goto unknown;
946
+ }
947
+ switch (BUILTIN_TYPE(val)) {
948
+ case T_BIGNUM:
949
+ BIGNUM_NEGATE(val);
950
+ val = rb_big_norm(val);
951
+ break;
952
+ case T_RATIONAL:
953
+ RATIONAL_SET_NUM(val, negative_numeric(RRATIONAL(val)->num));
954
+ break;
955
+ case T_COMPLEX:
956
+ RCOMPLEX_SET_REAL(val, negative_numeric(RCOMPLEX(val)->real));
957
+ RCOMPLEX_SET_IMAG(val, negative_numeric(RCOMPLEX(val)->imag));
958
+ break;
959
+ case T_FLOAT:
960
+ val = DBL2NUM(-RFLOAT_VALUE(val));
961
+ break;
962
+ unknown:
963
+ default:
964
+ rb_bug("unknown literal type (%s) passed to negative_numeric",
965
+ rb_builtin_class_name(val));
966
+ break;
967
+ }
968
+ return val;
969
+ }
970
+
971
+ static VALUE
972
+ integer_value(const char *val, int base)
973
+ {
974
+ return rb_cstr_to_inum(val, base, FALSE);
975
+ }
976
+
977
+ static VALUE
978
+ rational_value(const char *node_val, int base, int seen_point)
979
+ {
980
+ VALUE lit;
981
+ char* val = strdup(node_val);
982
+ if (seen_point > 0) {
983
+ int len = (int)(strlen(val));
984
+ char *point = &val[seen_point];
985
+ size_t fraclen = len-seen_point-1;
986
+ memmove(point, point+1, fraclen+1);
987
+
988
+ lit = rb_rational_new(integer_value(val, base), rb_int_positive_pow(10, fraclen));
989
+ }
990
+ else {
991
+ lit = rb_rational_raw1(integer_value(val, base));
992
+ }
993
+
994
+ free(val);
995
+
996
+ return lit;
997
+ }
998
+
999
+ VALUE
1000
+ rb_node_integer_literal_val(const NODE *n)
1001
+ {
1002
+ const rb_node_integer_t *node = RNODE_INTEGER(n);
1003
+ VALUE val = integer_value(node->val, node->base);
1004
+ if (node->minus) {
1005
+ val = negative_numeric(val);
1006
+ }
1007
+ return val;
1008
+ }
1009
+
1010
+ VALUE
1011
+ rb_node_float_literal_val(const NODE *n)
1012
+ {
1013
+ const rb_node_float_t *node = RNODE_FLOAT(n);
1014
+ double d = strtod(node->val, 0);
1015
+ if (node->minus) {
1016
+ d = -d;
1017
+ }
1018
+ VALUE val = DBL2NUM(d);
1019
+ return val;
1020
+ }
1021
+
1022
+ VALUE
1023
+ rb_node_rational_literal_val(const NODE *n)
1024
+ {
1025
+ VALUE lit;
1026
+ const rb_node_rational_t *node = RNODE_RATIONAL(n);
1027
+
1028
+ lit = rational_value(node->val, node->base, node->seen_point);
1029
+
1030
+ if (node->minus) {
1031
+ lit = negative_numeric(lit);
1032
+ }
1033
+
1034
+ return lit;
1035
+ }
1036
+
1037
+ VALUE
1038
+ rb_node_imaginary_literal_val(const NODE *n)
1039
+ {
1040
+ VALUE lit;
1041
+ const rb_node_imaginary_t *node = RNODE_IMAGINARY(n);
1042
+
1043
+ enum rb_numeric_type type = node->type;
1044
+
1045
+ switch (type) {
1046
+ case integer_literal:
1047
+ lit = integer_value(node->val, node->base);
1048
+ break;
1049
+ case float_literal:{
1050
+ double d = strtod(node->val, 0);
1051
+ lit = DBL2NUM(d);
1052
+ break;
1053
+ }
1054
+ case rational_literal:
1055
+ lit = rational_value(node->val, node->base, node->seen_point);
1056
+ break;
1057
+ default:
1058
+ rb_bug("unreachable");
1059
+ }
1060
+
1061
+ lit = rb_complex_raw(INT2FIX(0), lit);
1062
+
1063
+ if (node->minus) {
1064
+ lit = negative_numeric(lit);
1065
+ }
1066
+ return lit;
1067
+ }
1068
+
1069
+ VALUE
1070
+ rb_node_str_string_val(const NODE *node)
1071
+ {
1072
+ rb_parser_string_t *str = RNODE_STR(node)->string;
1073
+ return rb_str_new_parser_string(str);
1074
+ }
1075
+
1076
+ VALUE
1077
+ rb_node_sym_string_val(const NODE *node)
1078
+ {
1079
+ rb_parser_string_t *str = RNODE_SYM(node)->string;
1080
+ return ID2SYM(rb_intern3(str->ptr, str->len, str->enc));
1081
+ }
1082
+
1083
+ VALUE
1084
+ rb_node_dstr_string_val(const NODE *node)
1085
+ {
1086
+ rb_parser_string_t *str = RNODE_DSTR(node)->string;
1087
+ return str ? rb_str_new_parser_string(str) : Qnil;
1088
+ }
1089
+
1090
+ VALUE
1091
+ rb_node_dregx_string_val(const NODE *node)
1092
+ {
1093
+ rb_parser_string_t *str = RNODE_DREGX(node)->string;
1094
+ return rb_str_new_parser_string(str);
1095
+ }
1096
+
1097
+ VALUE
1098
+ rb_node_regx_string_val(const NODE *node)
1099
+ {
1100
+ rb_node_regx_t *node_reg = RNODE_REGX(node);
1101
+ rb_parser_string_t *string = node_reg->string;
1102
+ VALUE str = rb_enc_str_new(string->ptr, string->len, string->enc);
1103
+
1104
+ return rb_reg_compile(str, node_reg->options, NULL, 0);
1105
+ }
1106
+
1107
+ VALUE
1108
+ rb_node_line_lineno_val(const NODE *node)
1109
+ {
1110
+ return INT2FIX(node->nd_loc.beg_pos.lineno);
1111
+ }
1112
+
1113
+ VALUE
1114
+ rb_node_file_path_val(const NODE *node)
1115
+ {
1116
+ return rb_str_new_parser_string(RNODE_FILE(node)->path);
1117
+ }
1118
+
1119
+ VALUE
1120
+ rb_node_encoding_val(const NODE *node)
1121
+ {
1122
+ return rb_enc_from_encoding(RNODE_ENCODING(node)->enc);
1123
+ }
1124
+
1125
+ static void
1126
+ parser_aset_script_lines_for(VALUE path, rb_parser_ary_t *lines)
1127
+ {
1128
+ VALUE hash, script_lines;
1129
+ ID script_lines_id;
1130
+ if (NIL_P(path) || !lines) return;
1131
+ CONST_ID(script_lines_id, "SCRIPT_LINES__");
1132
+ if (!rb_const_defined_at(rb_cObject, script_lines_id)) return;
1133
+ hash = rb_const_get_at(rb_cObject, script_lines_id);
1134
+ if (!RB_TYPE_P(hash, T_HASH)) return;
1135
+ if (rb_hash_lookup(hash, path) == Qnil) return;
1136
+ script_lines = rb_parser_build_script_lines_from(lines);
1137
+ rb_hash_aset(hash, path, script_lines);
1138
+ }
1139
+
1140
+ VALUE
1141
+ rb_ruby_ast_new(const NODE *const root)
1142
+ {
1143
+ rb_ast_t *ast;
1144
+ VALUE ast_value = TypedData_Make_Struct(0, rb_ast_t, &ast_data_type, ast);
1145
+ #ifdef UNIVERSAL_PARSER
1146
+ ast->config = &rb_global_parser_config;
1147
+ #endif
1148
+ ast->body = (rb_ast_body_t){
1149
+ .root = root,
1150
+ .frozen_string_literal = -1,
1151
+ .coverage_enabled = -1,
1152
+ .script_lines = NULL,
1153
+ .line_count = 0,
1154
+ };
1155
+ return ast_value;
1156
+ }
1157
+
1158
+ rb_ast_t *
1159
+ rb_ruby_ast_data_get(VALUE ast_value)
1160
+ {
1161
+ rb_ast_t *ast;
1162
+ if (NIL_P(ast_value)) return NULL;
1163
+ TypedData_Get_Struct(ast_value, rb_ast_t, &ast_data_type, ast);
1164
+ return ast;
1165
+ }