kanayago 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/.rubocop_todo.yml +22 -11
- data/CHANGELOG.md +32 -0
- data/README.md +44 -25
- data/Rakefile +97 -2
- data/ext/kanayago/kanayago.c +630 -1
- data/lib/kanayago/version.rb +1 -1
- data/patch/3.4.0/copy_target.rb +78 -0
- data/patch/3.4.0/kanayago.patch +210 -0
- data/patch/3.4.1/copy_target.rb +78 -0
- data/patch/3.4.1/kanayago.patch +210 -0
- data/patch/3.4.2/copy_target.rb +78 -0
- data/patch/3.4.2/kanayago.patch +210 -0
- data/patch/3.4.3/copy_target.rb +78 -0
- data/patch/3.4.3/kanayago.patch +210 -0
- data/patch/3.4.4/copy_target.rb +78 -0
- data/patch/3.4.4/kanayago.patch +210 -0
- data/patch/3.4.5/copy_target.rb +78 -0
- data/patch/3.4.5/kanayago.patch +210 -0
- data/patch/3.4.6/copy_target.rb +78 -0
- data/patch/3.4.6/kanayago.patch +210 -0
- data/patch/3.4.7/copy_target.rb +78 -0
- data/patch/3.4.7/kanayago.patch +210 -0
- data/patch/3.4.8/copy_target.rb +78 -0
- data/patch/3.4.8/kanayago.patch +210 -0
- data/patch/4.0.0/copy_target.rb +85 -0
- data/patch/4.0.0/kanayago.patch +282 -0
- data/patch/4.0.0/macos.patch +46 -0
- data/patch/head/kanayago.patch +1 -1
- data/patch/head/macos.patch +65 -0
- data/patch/head/macos.patch.rej +49 -0
- data/script/setup_parser.rb +40 -3
- metadata +25 -2
data/ext/kanayago/kanayago.c
CHANGED
|
@@ -1,3 +1,82 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Kanayago - Ruby Parser Extension
|
|
3
|
+
*
|
|
4
|
+
* This file contains code derived from CRuby (https://www.ruby-lang.org/).
|
|
5
|
+
*
|
|
6
|
+
* The following functions are based on ruby_parser.c:
|
|
7
|
+
* - kanayago_zalloc
|
|
8
|
+
* - kanayago_memmove
|
|
9
|
+
* - kanayago_nonempty_memcpy
|
|
10
|
+
* - kanayago_is_local_id
|
|
11
|
+
* - kanayago_is_attrset_id
|
|
12
|
+
* - kanayago_is_notop_id
|
|
13
|
+
* - kanayago_enc_str_new
|
|
14
|
+
* - kanayago_enc_isalnum
|
|
15
|
+
* - kanayago_enc_precise_mbclen
|
|
16
|
+
* - kanayago_mbclen_charfound_p
|
|
17
|
+
* - kanayago_mbclen_charfound_len
|
|
18
|
+
* - kanayago_enc_name
|
|
19
|
+
* - kanayago_enc_prev_char
|
|
20
|
+
* - kanayago_enc_get
|
|
21
|
+
* - kanayago_enc_asciicompat
|
|
22
|
+
* - kanayago_utf8_encoding
|
|
23
|
+
* - kanayago_ascii8bit_encoding
|
|
24
|
+
* - kanayago_enc_codelen
|
|
25
|
+
* - kanayago_enc_mbcput
|
|
26
|
+
* - kanayago_enc_from_index
|
|
27
|
+
* - kanayago_enc_isspace
|
|
28
|
+
* - kanayago_intern3
|
|
29
|
+
* - kanayago_enc_symname_type
|
|
30
|
+
* - kanayago_is_usascii_enc
|
|
31
|
+
* - kanayago_local_defined
|
|
32
|
+
* - kanayago_dvar_defined
|
|
33
|
+
* - kanayago_rtest
|
|
34
|
+
* - kanayago_nil_p
|
|
35
|
+
* - kanayago_syntax_error_new
|
|
36
|
+
* - kanayago_ruby_verbose
|
|
37
|
+
* - kanayago_errno_ptr
|
|
38
|
+
* - kanayago_gc_guard
|
|
39
|
+
* - kanayago_arg_error
|
|
40
|
+
* - kanayago_static_id2sym
|
|
41
|
+
* - kanayago_str_coderange_scan_restartable
|
|
42
|
+
* - kanayago_enc_mbminlen
|
|
43
|
+
* - kanayago_enc_isascii
|
|
44
|
+
* - kanayago_enc_mbc_to_codepoint
|
|
45
|
+
* - kanayago_reg_named_capture_assign
|
|
46
|
+
* - kanayago_reg_named_capture_assign_iter
|
|
47
|
+
*
|
|
48
|
+
* The following functions are based on error.c:
|
|
49
|
+
* - kanayago_err_vcatf
|
|
50
|
+
* - kanayago_syntax_error_with_path
|
|
51
|
+
* - kanayago_syntax_error_append
|
|
52
|
+
*
|
|
53
|
+
* Ruby is copyrighted free software by Yukihiro Matsumoto <matz@netlab.jp>.
|
|
54
|
+
* Ruby is available under the terms of the 2-clause BSD License:
|
|
55
|
+
*
|
|
56
|
+
* Redistribution and use in source and binary forms, with or without
|
|
57
|
+
* modification, are permitted provided that the following conditions are met:
|
|
58
|
+
*
|
|
59
|
+
* 1. Redistributions of source code must retain the above copyright notice,
|
|
60
|
+
* this list of conditions and the following disclaimer.
|
|
61
|
+
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
62
|
+
* this list of conditions and the following disclaimer in the documentation
|
|
63
|
+
* and/or other materials provided with the distribution.
|
|
64
|
+
*
|
|
65
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
66
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
67
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
68
|
+
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
69
|
+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
70
|
+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
71
|
+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
72
|
+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
73
|
+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
74
|
+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
75
|
+
* POSSIBILITY OF SUCH DAMAGE.
|
|
76
|
+
*
|
|
77
|
+
* See https://www.ruby-lang.org/en/about/license.txt for the full Ruby license.
|
|
78
|
+
*/
|
|
79
|
+
|
|
1
80
|
#include "kanayago.h"
|
|
2
81
|
#include "scope_node.h"
|
|
3
82
|
#include "literal_node.h"
|
|
@@ -9,6 +88,549 @@
|
|
|
9
88
|
#include "internal/ruby_parser.h"
|
|
10
89
|
#include "rubyparser.h"
|
|
11
90
|
|
|
91
|
+
#include <unistd.h>
|
|
92
|
+
#include "internal.h"
|
|
93
|
+
#include "internal/array.h"
|
|
94
|
+
#include "internal/bignum.h"
|
|
95
|
+
#include "internal/compile.h"
|
|
96
|
+
#include "internal/complex.h"
|
|
97
|
+
#include "internal/gc.h"
|
|
98
|
+
#include "internal/hash.h"
|
|
99
|
+
#include "internal/io.h"
|
|
100
|
+
#include "internal/rational.h"
|
|
101
|
+
#include "internal/re.h"
|
|
102
|
+
#include "internal/string.h"
|
|
103
|
+
#include "internal/symbol.h"
|
|
104
|
+
#include "internal/thread.h"
|
|
105
|
+
#include "ruby/ractor.h"
|
|
106
|
+
#include "ruby/util.h"
|
|
107
|
+
#include "vm_core.h"
|
|
108
|
+
#include "symbol.h"
|
|
109
|
+
|
|
110
|
+
#define parser_encoding const void
|
|
111
|
+
|
|
112
|
+
/*
|
|
113
|
+
* Kanayago's own adapter implementation
|
|
114
|
+
* To eliminate dependency on Universal Parser's rb_global_parser_config,
|
|
115
|
+
* we define our own kanayago_parser_config.
|
|
116
|
+
*/
|
|
117
|
+
|
|
118
|
+
/* Memory allocation with overflow check */
|
|
119
|
+
static void *
|
|
120
|
+
kanayago_xmalloc_mul_add(size_t x, size_t y, size_t z)
|
|
121
|
+
{
|
|
122
|
+
size_t size;
|
|
123
|
+
if (y != 0 && x > (SIZE_MAX - z) / y) {
|
|
124
|
+
rb_raise(rb_eArgError, "allocation size overflow");
|
|
125
|
+
}
|
|
126
|
+
size = x * y + z;
|
|
127
|
+
return ruby_xmalloc(size);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/* Temporary ID generation */
|
|
131
|
+
static size_t kanayago_tmp_id_counter = 0;
|
|
132
|
+
|
|
133
|
+
static ID
|
|
134
|
+
kanayago_make_temporary_id(size_t n)
|
|
135
|
+
{
|
|
136
|
+
char buf[64];
|
|
137
|
+
snprintf(buf, sizeof(buf), "@kanayago_tmp_%zu_%zu", n, kanayago_tmp_id_counter++);
|
|
138
|
+
return rb_intern(buf);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/* TTY detection */
|
|
142
|
+
static int
|
|
143
|
+
kanayago_stderr_tty_p(void)
|
|
144
|
+
{
|
|
145
|
+
return isatty(fileno(stderr));
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/* Regex compilation */
|
|
149
|
+
static VALUE
|
|
150
|
+
kanayago_reg_compile(VALUE str, int options, const char *sourcefile, int sourceline)
|
|
151
|
+
{
|
|
152
|
+
return rb_reg_new_str(str, options);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/* Regex preprocessing */
|
|
156
|
+
static VALUE
|
|
157
|
+
kanayago_reg_check_preprocess(VALUE val)
|
|
158
|
+
{
|
|
159
|
+
return Qnil;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/* Tracing suppression */
|
|
163
|
+
static VALUE
|
|
164
|
+
kanayago_suppress_tracing(VALUE (*func)(VALUE), VALUE arg)
|
|
165
|
+
{
|
|
166
|
+
return func(arg);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/* Helper functions (ported from ruby_parser.c) */
|
|
170
|
+
static void *
|
|
171
|
+
kanayago_zalloc(size_t elemsiz)
|
|
172
|
+
{
|
|
173
|
+
return ruby_xcalloc(1, elemsiz);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
static void *
|
|
177
|
+
kanayago_memmove(void *dest, const void *src, size_t t, size_t n)
|
|
178
|
+
{
|
|
179
|
+
return memmove(dest, src, rbimpl_size_mul_or_raise(t, n));
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
static void *
|
|
183
|
+
kanayago_nonempty_memcpy(void *dest, const void *src, size_t t, size_t n)
|
|
184
|
+
{
|
|
185
|
+
return ruby_nonempty_memcpy(dest, src, rbimpl_size_mul_or_raise(t, n));
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
static int
|
|
189
|
+
kanayago_is_local_id(ID id)
|
|
190
|
+
{
|
|
191
|
+
return is_local_id(id);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
static int
|
|
195
|
+
kanayago_is_attrset_id(ID id)
|
|
196
|
+
{
|
|
197
|
+
return is_attrset_id(id);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
static int
|
|
201
|
+
kanayago_is_notop_id(ID id)
|
|
202
|
+
{
|
|
203
|
+
return is_notop_id(id);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
static VALUE
|
|
207
|
+
kanayago_enc_str_new(const char *ptr, long len, parser_encoding *enc)
|
|
208
|
+
{
|
|
209
|
+
return rb_enc_str_new(ptr, len, enc);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
static int
|
|
213
|
+
kanayago_enc_isalnum(OnigCodePoint c, parser_encoding *enc)
|
|
214
|
+
{
|
|
215
|
+
return rb_enc_isalnum(c, enc);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
static int
|
|
219
|
+
kanayago_enc_precise_mbclen(const char *p, const char *e, parser_encoding *enc)
|
|
220
|
+
{
|
|
221
|
+
return rb_enc_precise_mbclen(p, e, enc);
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
static int
|
|
225
|
+
kanayago_mbclen_charfound_p(int len)
|
|
226
|
+
{
|
|
227
|
+
return MBCLEN_CHARFOUND_P(len);
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
static int
|
|
231
|
+
kanayago_mbclen_charfound_len(int len)
|
|
232
|
+
{
|
|
233
|
+
return MBCLEN_CHARFOUND_LEN(len);
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
static const char *
|
|
237
|
+
kanayago_enc_name(parser_encoding *enc)
|
|
238
|
+
{
|
|
239
|
+
return rb_enc_name(enc);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
static char *
|
|
243
|
+
kanayago_enc_prev_char(const char *s, const char *p, const char *e, parser_encoding *enc)
|
|
244
|
+
{
|
|
245
|
+
return rb_enc_prev_char(s, p, e, enc);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
static parser_encoding *
|
|
249
|
+
kanayago_enc_get(VALUE obj)
|
|
250
|
+
{
|
|
251
|
+
return rb_enc_get(obj);
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
static int
|
|
255
|
+
kanayago_enc_asciicompat(parser_encoding *enc)
|
|
256
|
+
{
|
|
257
|
+
return rb_enc_asciicompat(enc);
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
static parser_encoding *
|
|
261
|
+
kanayago_utf8_encoding(void)
|
|
262
|
+
{
|
|
263
|
+
return rb_utf8_encoding();
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
static parser_encoding *
|
|
267
|
+
kanayago_ascii8bit_encoding(void)
|
|
268
|
+
{
|
|
269
|
+
return rb_ascii8bit_encoding();
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
static int
|
|
273
|
+
kanayago_enc_codelen(int c, parser_encoding *enc)
|
|
274
|
+
{
|
|
275
|
+
return rb_enc_codelen(c, enc);
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
static int
|
|
279
|
+
kanayago_enc_mbcput(unsigned int c, void *buf, parser_encoding *enc)
|
|
280
|
+
{
|
|
281
|
+
return rb_enc_mbcput(c, buf, enc);
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
static parser_encoding *
|
|
285
|
+
kanayago_enc_from_index(int idx)
|
|
286
|
+
{
|
|
287
|
+
return rb_enc_from_index(idx);
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
static int
|
|
291
|
+
kanayago_enc_isspace(OnigCodePoint c, parser_encoding *enc)
|
|
292
|
+
{
|
|
293
|
+
return rb_enc_isspace(c, enc);
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
static ID
|
|
297
|
+
kanayago_intern3(const char *name, long len, parser_encoding *enc)
|
|
298
|
+
{
|
|
299
|
+
return rb_intern3(name, len, enc);
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
static int
|
|
303
|
+
kanayago_enc_symname_type(const char *name, long len, parser_encoding *enc, unsigned int allowed_attrset)
|
|
304
|
+
{
|
|
305
|
+
return rb_enc_symname_type(name, len, enc, allowed_attrset);
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
static int
|
|
309
|
+
kanayago_is_usascii_enc(parser_encoding *enc)
|
|
310
|
+
{
|
|
311
|
+
return rb_is_usascii_enc(enc);
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
static int
|
|
315
|
+
kanayago_local_defined(ID id, const void *p)
|
|
316
|
+
{
|
|
317
|
+
// Kanayago doesn't have external ISEQ context
|
|
318
|
+
// parent_iseq is always NULL, so always return 0
|
|
319
|
+
(void)id;
|
|
320
|
+
(void)p;
|
|
321
|
+
return 0;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
static int
|
|
325
|
+
kanayago_dvar_defined(ID id, const void *p)
|
|
326
|
+
{
|
|
327
|
+
// Kanayago doesn't have external ISEQ context
|
|
328
|
+
// parent_iseq is always NULL, so always return 0
|
|
329
|
+
(void)id;
|
|
330
|
+
(void)p;
|
|
331
|
+
return 0;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
static int
|
|
335
|
+
kanayago_rtest(VALUE obj)
|
|
336
|
+
{
|
|
337
|
+
return (int)RB_TEST(obj);
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
static int
|
|
341
|
+
kanayago_nil_p(VALUE obj)
|
|
342
|
+
{
|
|
343
|
+
return (int)NIL_P(obj);
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
static VALUE
|
|
347
|
+
kanayago_syntax_error_new(void)
|
|
348
|
+
{
|
|
349
|
+
return rb_class_new_instance(0, 0, rb_eSyntaxError);
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
static VALUE
|
|
353
|
+
kanayago_ruby_verbose(void)
|
|
354
|
+
{
|
|
355
|
+
return ruby_verbose;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
static int *
|
|
359
|
+
kanayago_errno_ptr(void)
|
|
360
|
+
{
|
|
361
|
+
return rb_errno_ptr();
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
static void
|
|
365
|
+
kanayago_gc_guard(VALUE obj)
|
|
366
|
+
{
|
|
367
|
+
RB_GC_GUARD(obj);
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
static VALUE
|
|
371
|
+
kanayago_arg_error(void)
|
|
372
|
+
{
|
|
373
|
+
return rb_eArgError;
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
static VALUE
|
|
377
|
+
kanayago_static_id2sym(ID id)
|
|
378
|
+
{
|
|
379
|
+
return (((VALUE)(id)<<RUBY_SPECIAL_SHIFT)|SYMBOL_FLAG);
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
static long
|
|
383
|
+
kanayago_str_coderange_scan_restartable(const char *s, const char *e, parser_encoding *enc, int *cr)
|
|
384
|
+
{
|
|
385
|
+
return rb_str_coderange_scan_restartable(s, e, enc, cr);
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
static int
|
|
389
|
+
kanayago_enc_mbminlen(parser_encoding *enc)
|
|
390
|
+
{
|
|
391
|
+
return rb_enc_mbminlen(enc);
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
static bool
|
|
395
|
+
kanayago_enc_isascii(OnigCodePoint c, parser_encoding *enc)
|
|
396
|
+
{
|
|
397
|
+
return rb_enc_isascii(c, enc);
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
static OnigCodePoint
|
|
401
|
+
kanayago_enc_mbc_to_codepoint(const char *p, const char *e, parser_encoding *enc)
|
|
402
|
+
{
|
|
403
|
+
const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
|
|
404
|
+
const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
|
|
405
|
+
|
|
406
|
+
return ONIGENC_MBC_TO_CODE((rb_encoding *)enc, up, ue);
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
/* Syntax Error Append (ported from error.c) */
|
|
410
|
+
static VALUE
|
|
411
|
+
kanayago_err_vcatf(VALUE str, const char *pre, const char *file, int line,
|
|
412
|
+
const char *fmt, va_list args)
|
|
413
|
+
{
|
|
414
|
+
if (file) {
|
|
415
|
+
rb_str_cat_cstr(str, file);
|
|
416
|
+
if (line) rb_str_catf(str, ":%d", line);
|
|
417
|
+
rb_str_cat_cstr(str, ": ");
|
|
418
|
+
}
|
|
419
|
+
if (pre) rb_str_cat_cstr(str, pre);
|
|
420
|
+
rb_str_vcatf(str, fmt, args);
|
|
421
|
+
return str;
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
static VALUE
|
|
425
|
+
kanayago_syntax_error_with_path(VALUE exc, VALUE file, VALUE *mesg, rb_encoding *enc)
|
|
426
|
+
{
|
|
427
|
+
if (NIL_P(exc) || exc == Qfalse) {
|
|
428
|
+
exc = rb_class_new_instance(0, 0, rb_eSyntaxError);
|
|
429
|
+
}
|
|
430
|
+
*mesg = rb_attr_get(exc, rb_intern("mesg"));
|
|
431
|
+
if (NIL_P(*mesg) || OBJ_FROZEN(*mesg)) {
|
|
432
|
+
*mesg = rb_enc_str_new(0, 0, enc);
|
|
433
|
+
rb_ivar_set(exc, rb_intern("mesg"), *mesg);
|
|
434
|
+
}
|
|
435
|
+
return exc;
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 6, 0)
|
|
439
|
+
static VALUE
|
|
440
|
+
kanayago_syntax_error_append(VALUE exc, VALUE file, int line, int column,
|
|
441
|
+
parser_encoding *enc, const char *fmt, va_list args)
|
|
442
|
+
{
|
|
443
|
+
const char *fn = NIL_P(file) ? NULL : RSTRING_PTR(file);
|
|
444
|
+
if (!exc) {
|
|
445
|
+
exc = rb_class_new_instance(0, 0, rb_eSyntaxError);
|
|
446
|
+
VALUE mesg = rb_attr_get(exc, rb_intern("mesg"));
|
|
447
|
+
if (NIL_P(mesg) || OBJ_FROZEN(mesg)) {
|
|
448
|
+
mesg = rb_enc_str_new(0, 0, enc);
|
|
449
|
+
rb_ivar_set(exc, rb_intern("mesg"), mesg);
|
|
450
|
+
}
|
|
451
|
+
kanayago_err_vcatf(mesg, NULL, fn, line, fmt, args);
|
|
452
|
+
VALUE err_mesg = rb_str_dup(mesg);
|
|
453
|
+
rb_str_cat_cstr(err_mesg, "\n");
|
|
454
|
+
rb_write_error_str(err_mesg);
|
|
455
|
+
}
|
|
456
|
+
else {
|
|
457
|
+
VALUE mesg;
|
|
458
|
+
exc = kanayago_syntax_error_with_path(exc, file, &mesg, enc);
|
|
459
|
+
kanayago_err_vcatf(mesg, NULL, fn, line, fmt, args);
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
return exc;
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
/* reg_named_capture_assign (ported from ruby_parser.c) */
|
|
466
|
+
typedef struct {
|
|
467
|
+
struct parser_params *parser;
|
|
468
|
+
rb_encoding *enc;
|
|
469
|
+
NODE *succ_block;
|
|
470
|
+
const rb_code_location_t *loc;
|
|
471
|
+
rb_parser_assignable_func assignable;
|
|
472
|
+
} kanayago_reg_named_capture_assign_t;
|
|
473
|
+
|
|
474
|
+
static int
|
|
475
|
+
kanayago_reg_named_capture_assign_iter(const OnigUChar *name, const OnigUChar *name_end,
|
|
476
|
+
int back_num, int *back_refs, OnigRegex regex, void *arg0)
|
|
477
|
+
{
|
|
478
|
+
kanayago_reg_named_capture_assign_t *arg = (kanayago_reg_named_capture_assign_t*)arg0;
|
|
479
|
+
struct parser_params* p = arg->parser;
|
|
480
|
+
rb_encoding *enc = arg->enc;
|
|
481
|
+
const rb_code_location_t *loc = arg->loc;
|
|
482
|
+
long len = name_end - name;
|
|
483
|
+
const char *s = (const char *)name;
|
|
484
|
+
|
|
485
|
+
return rb_reg_named_capture_assign_iter_impl(p, s, len, enc, &arg->succ_block, loc, arg->assignable);
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
static NODE *
|
|
489
|
+
kanayago_reg_named_capture_assign(struct parser_params* p, VALUE regexp, const rb_code_location_t *loc,
|
|
490
|
+
rb_parser_assignable_func assignable)
|
|
491
|
+
{
|
|
492
|
+
kanayago_reg_named_capture_assign_t arg;
|
|
493
|
+
|
|
494
|
+
arg.parser = p;
|
|
495
|
+
arg.enc = rb_enc_get(regexp);
|
|
496
|
+
arg.succ_block = 0;
|
|
497
|
+
arg.loc = loc;
|
|
498
|
+
arg.assignable = assignable;
|
|
499
|
+
onig_foreach_name(RREGEXP_PTR(regexp), kanayago_reg_named_capture_assign_iter, &arg);
|
|
500
|
+
|
|
501
|
+
if (!arg.succ_block) return 0;
|
|
502
|
+
return RNODE_BLOCK(arg.succ_block)->nd_next;
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
/* Kanayago's own parser config */
|
|
506
|
+
static const rb_parser_config_t kanayago_parser_config = {
|
|
507
|
+
.malloc = ruby_xmalloc,
|
|
508
|
+
.calloc = ruby_xcalloc,
|
|
509
|
+
.realloc = ruby_xrealloc,
|
|
510
|
+
.free = ruby_xfree,
|
|
511
|
+
.alloc_n = ruby_xmalloc2,
|
|
512
|
+
.alloc = ruby_xmalloc,
|
|
513
|
+
.realloc_n = ruby_xrealloc2,
|
|
514
|
+
.zalloc = kanayago_zalloc,
|
|
515
|
+
.rb_memmove = kanayago_memmove,
|
|
516
|
+
.nonempty_memcpy = kanayago_nonempty_memcpy,
|
|
517
|
+
.xmalloc_mul_add = kanayago_xmalloc_mul_add,
|
|
518
|
+
|
|
519
|
+
.compile_callback = kanayago_suppress_tracing,
|
|
520
|
+
.reg_named_capture_assign = kanayago_reg_named_capture_assign,
|
|
521
|
+
|
|
522
|
+
.attr_get = rb_attr_get,
|
|
523
|
+
|
|
524
|
+
.ary_new_from_args = rb_ary_new_from_args,
|
|
525
|
+
.ary_unshift = rb_ary_unshift,
|
|
526
|
+
|
|
527
|
+
.make_temporary_id = kanayago_make_temporary_id,
|
|
528
|
+
.is_local_id = kanayago_is_local_id,
|
|
529
|
+
.is_attrset_id = kanayago_is_attrset_id,
|
|
530
|
+
.is_global_name_punct = is_global_name_punct,
|
|
531
|
+
.id_type = id_type,
|
|
532
|
+
.id_attrset = rb_id_attrset,
|
|
533
|
+
.intern = rb_intern,
|
|
534
|
+
.intern2 = rb_intern2,
|
|
535
|
+
.intern3 = kanayago_intern3,
|
|
536
|
+
.intern_str = rb_intern_str,
|
|
537
|
+
.is_notop_id = kanayago_is_notop_id,
|
|
538
|
+
.enc_symname_type = kanayago_enc_symname_type,
|
|
539
|
+
.id2name = rb_id2name,
|
|
540
|
+
.id2str = rb_id2str,
|
|
541
|
+
.id2sym = rb_id2sym,
|
|
542
|
+
|
|
543
|
+
.str_catf = rb_str_catf,
|
|
544
|
+
.str_cat_cstr = rb_str_cat_cstr,
|
|
545
|
+
.str_resize = rb_str_resize,
|
|
546
|
+
.str_new = rb_str_new,
|
|
547
|
+
.str_new_cstr = rb_str_new_cstr,
|
|
548
|
+
.str_to_interned_str = rb_str_to_interned_str,
|
|
549
|
+
.enc_str_new = kanayago_enc_str_new,
|
|
550
|
+
.str_vcatf = rb_str_vcatf,
|
|
551
|
+
.rb_sprintf = rb_sprintf,
|
|
552
|
+
.rstring_ptr = RSTRING_PTR,
|
|
553
|
+
.rstring_len = RSTRING_LEN,
|
|
554
|
+
|
|
555
|
+
.int2num = rb_int2num_inline,
|
|
556
|
+
|
|
557
|
+
.stderr_tty_p = kanayago_stderr_tty_p,
|
|
558
|
+
.write_error_str = rb_write_error_str,
|
|
559
|
+
.io_write = rb_io_write,
|
|
560
|
+
.io_flush = rb_io_flush,
|
|
561
|
+
.io_puts = rb_io_puts,
|
|
562
|
+
|
|
563
|
+
.debug_output_stdout = rb_ractor_stdout,
|
|
564
|
+
.debug_output_stderr = rb_ractor_stderr,
|
|
565
|
+
|
|
566
|
+
.is_usascii_enc = kanayago_is_usascii_enc,
|
|
567
|
+
.enc_isalnum = kanayago_enc_isalnum,
|
|
568
|
+
.enc_precise_mbclen = kanayago_enc_precise_mbclen,
|
|
569
|
+
.mbclen_charfound_p = kanayago_mbclen_charfound_p,
|
|
570
|
+
.mbclen_charfound_len = kanayago_mbclen_charfound_len,
|
|
571
|
+
.enc_name = kanayago_enc_name,
|
|
572
|
+
.enc_prev_char = kanayago_enc_prev_char,
|
|
573
|
+
.enc_get = kanayago_enc_get,
|
|
574
|
+
.enc_asciicompat = kanayago_enc_asciicompat,
|
|
575
|
+
.utf8_encoding = kanayago_utf8_encoding,
|
|
576
|
+
.ascii8bit_encoding = kanayago_ascii8bit_encoding,
|
|
577
|
+
.enc_codelen = kanayago_enc_codelen,
|
|
578
|
+
.enc_mbcput = kanayago_enc_mbcput,
|
|
579
|
+
.enc_find_index = rb_enc_find_index,
|
|
580
|
+
.enc_from_index = kanayago_enc_from_index,
|
|
581
|
+
.enc_isspace = kanayago_enc_isspace,
|
|
582
|
+
.enc_coderange_7bit = ENC_CODERANGE_7BIT,
|
|
583
|
+
.enc_coderange_unknown = ENC_CODERANGE_UNKNOWN,
|
|
584
|
+
.enc_mbminlen = kanayago_enc_mbminlen,
|
|
585
|
+
.enc_isascii = kanayago_enc_isascii,
|
|
586
|
+
.enc_mbc_to_codepoint = kanayago_enc_mbc_to_codepoint,
|
|
587
|
+
|
|
588
|
+
.local_defined = kanayago_local_defined,
|
|
589
|
+
.dvar_defined = kanayago_dvar_defined,
|
|
590
|
+
|
|
591
|
+
.syntax_error_append = kanayago_syntax_error_append,
|
|
592
|
+
.raise = rb_raise,
|
|
593
|
+
.syntax_error_new = kanayago_syntax_error_new,
|
|
594
|
+
|
|
595
|
+
.errinfo = rb_errinfo,
|
|
596
|
+
.set_errinfo = rb_set_errinfo,
|
|
597
|
+
.make_exception = rb_make_exception,
|
|
598
|
+
|
|
599
|
+
.sized_xfree = ruby_sized_xfree,
|
|
600
|
+
.sized_realloc_n = ruby_sized_realloc_n,
|
|
601
|
+
.gc_guard = kanayago_gc_guard,
|
|
602
|
+
.gc_mark = rb_gc_mark,
|
|
603
|
+
|
|
604
|
+
.reg_compile = kanayago_reg_compile,
|
|
605
|
+
.reg_check_preprocess = kanayago_reg_check_preprocess,
|
|
606
|
+
.memcicmp = rb_memcicmp,
|
|
607
|
+
|
|
608
|
+
.compile_warn = rb_compile_warn,
|
|
609
|
+
.compile_warning = rb_compile_warning,
|
|
610
|
+
.bug = rb_bug,
|
|
611
|
+
.fatal = rb_fatal,
|
|
612
|
+
.verbose = kanayago_ruby_verbose,
|
|
613
|
+
.errno_ptr = kanayago_errno_ptr,
|
|
614
|
+
|
|
615
|
+
.make_backtrace = rb_make_backtrace,
|
|
616
|
+
|
|
617
|
+
.scan_hex = ruby_scan_hex,
|
|
618
|
+
.scan_oct = ruby_scan_oct,
|
|
619
|
+
.scan_digits = ruby_scan_digits,
|
|
620
|
+
.strtod = ruby_strtod,
|
|
621
|
+
|
|
622
|
+
.rtest = kanayago_rtest,
|
|
623
|
+
.nil_p = kanayago_nil_p,
|
|
624
|
+
.qnil = Qnil,
|
|
625
|
+
.qfalse = Qfalse,
|
|
626
|
+
.eArgError = kanayago_arg_error,
|
|
627
|
+
.long2int = rb_long2int,
|
|
628
|
+
|
|
629
|
+
/* For Ripper */
|
|
630
|
+
.static_id2sym = kanayago_static_id2sym,
|
|
631
|
+
.str_coderange_scan_restartable = kanayago_str_coderange_scan_restartable,
|
|
632
|
+
};
|
|
633
|
+
|
|
12
634
|
VALUE rb_mKanayago;
|
|
13
635
|
|
|
14
636
|
VALUE rb_cConstantNode;
|
|
@@ -512,7 +1134,8 @@ kanayago_parse(VALUE self, VALUE source)
|
|
|
512
1134
|
struct ruby_parser *parser;
|
|
513
1135
|
rb_parser_t *parser_params;
|
|
514
1136
|
|
|
515
|
-
|
|
1137
|
+
/* Use Kanayago's own parser config */
|
|
1138
|
+
parser_params = rb_ruby_parser_new(&kanayago_parser_config);
|
|
516
1139
|
VALUE vparser = TypedData_Make_Struct(0, struct ruby_parser,
|
|
517
1140
|
&ruby_parser_data_type, parser);
|
|
518
1141
|
parser->parser_params = parser_params;
|
|
@@ -528,6 +1151,12 @@ kanayago_parse(VALUE self, VALUE source)
|
|
|
528
1151
|
rb_ast_t *ast = rb_ruby_ast_data_get(vast);
|
|
529
1152
|
VALUE ast_node = ast_to_node_instance(ast->body.root);
|
|
530
1153
|
|
|
1154
|
+
/* Ensure vast and vparser are not garbage collected during AST processing.
|
|
1155
|
+
* The AST data (ast->body.root) is owned by vast, so we need to
|
|
1156
|
+
* keep vast alive until we're done traversing the AST. */
|
|
1157
|
+
RB_GC_GUARD(vast);
|
|
1158
|
+
RB_GC_GUARD(vparser);
|
|
1159
|
+
|
|
531
1160
|
// Get error_buffer from parser_params using accessor function
|
|
532
1161
|
VALUE error_buffer = rb_ruby_parser_error_buffer_get(parser_params);
|
|
533
1162
|
|
data/lib/kanayago/version.rb
CHANGED