prism 0.21.0 → 0.23.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +40 -1
- data/README.md +2 -1
- data/docs/releasing.md +84 -16
- data/docs/ruby_parser_translation.md +19 -0
- data/docs/serialization.md +2 -0
- data/ext/prism/api_node.c +784 -785
- data/ext/prism/extension.c +56 -19
- data/ext/prism/extension.h +2 -2
- data/include/prism/diagnostic.h +11 -6
- data/include/prism/encoding.h +7 -0
- data/include/prism/util/pm_constant_pool.h +1 -1
- data/include/prism/util/pm_strpbrk.h +4 -1
- data/include/prism/version.h +2 -2
- data/lib/prism/ffi.rb +8 -3
- data/lib/prism/lex_compat.rb +17 -1
- data/lib/prism/node.rb +212 -32
- data/lib/prism/node_ext.rb +25 -2
- data/lib/prism/parse_result.rb +46 -16
- data/lib/prism/serialize.rb +14 -6
- data/lib/prism/translation/parser/compiler.rb +16 -6
- data/lib/prism/translation/parser.rb +19 -12
- data/lib/prism/translation/ripper.rb +577 -0
- data/lib/prism/translation/ruby_parser.rb +1521 -0
- data/lib/prism/translation.rb +3 -3
- data/lib/prism.rb +0 -1
- data/prism.gemspec +5 -3
- data/src/diagnostic.c +20 -15
- data/src/encoding.c +16 -17
- data/src/options.c +7 -2
- data/src/prism.c +145 -90
- data/src/serialize.c +24 -13
- data/src/token_type.c +3 -3
- data/src/util/pm_constant_pool.c +1 -1
- data/src/util/pm_string.c +0 -7
- data/src/util/pm_strpbrk.c +122 -14
- metadata +6 -4
- data/lib/prism/ripper_compat.rb +0 -207
data/ext/prism/extension.c
CHANGED
@@ -1,5 +1,9 @@
|
|
1
1
|
#include "prism/extension.h"
|
2
2
|
|
3
|
+
#ifdef _WIN32
|
4
|
+
#include <ruby/win32.h>
|
5
|
+
#endif
|
6
|
+
|
3
7
|
// NOTE: this file should contain only bindings. All non-trivial logic should be
|
4
8
|
// in libprism so it can be shared its the various callers.
|
5
9
|
|
@@ -212,20 +216,29 @@ string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options)
|
|
212
216
|
/**
|
213
217
|
* Read options for methods that look like (filepath, **options).
|
214
218
|
*/
|
215
|
-
static
|
219
|
+
static void
|
216
220
|
file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
|
217
221
|
VALUE filepath;
|
218
222
|
VALUE keywords;
|
219
223
|
rb_scan_args(argc, argv, "1:", &filepath, &keywords);
|
220
224
|
|
225
|
+
Check_Type(filepath, T_STRING);
|
226
|
+
|
221
227
|
extract_options(options, filepath, keywords);
|
222
228
|
|
223
|
-
|
229
|
+
const char * string_source = (const char *) pm_string_source(&options->filepath);
|
230
|
+
|
231
|
+
if (!pm_string_mapped_init(input, string_source)) {
|
224
232
|
pm_options_free(options);
|
225
|
-
return false;
|
226
|
-
}
|
227
233
|
|
228
|
-
|
234
|
+
#ifdef _WIN32
|
235
|
+
int e = rb_w32_map_errno(GetLastError());
|
236
|
+
#else
|
237
|
+
int e = errno;
|
238
|
+
#endif
|
239
|
+
|
240
|
+
rb_syserr_fail(e, string_source);
|
241
|
+
}
|
229
242
|
}
|
230
243
|
|
231
244
|
/******************************************************************************/
|
@@ -299,7 +312,8 @@ static VALUE
|
|
299
312
|
dump_file(int argc, VALUE *argv, VALUE self) {
|
300
313
|
pm_string_t input;
|
301
314
|
pm_options_t options = { 0 };
|
302
|
-
|
315
|
+
|
316
|
+
file_options(argc, argv, &input, &options);
|
303
317
|
|
304
318
|
VALUE value = dump_input(&input, &options);
|
305
319
|
pm_string_free(&input);
|
@@ -404,6 +418,9 @@ parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
|
|
404
418
|
case PM_ERROR_LEVEL_FATAL:
|
405
419
|
level = ID2SYM(rb_intern("fatal"));
|
406
420
|
break;
|
421
|
+
case PM_ERROR_LEVEL_ARGUMENT:
|
422
|
+
level = ID2SYM(rb_intern("argument"));
|
423
|
+
break;
|
407
424
|
default:
|
408
425
|
rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, error->level);
|
409
426
|
}
|
@@ -525,8 +542,9 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
|
|
525
542
|
pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
|
526
543
|
pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
|
527
544
|
|
545
|
+
VALUE source_string = rb_str_new((const char *) pm_string_source(input), pm_string_length(input));
|
528
546
|
VALUE offsets = rb_ary_new();
|
529
|
-
VALUE source_argv[] = {
|
547
|
+
VALUE source_argv[] = { source_string, LONG2NUM(parser.start_line), offsets };
|
530
548
|
VALUE source = rb_class_new_instance(3, source_argv, rb_cPrismSource);
|
531
549
|
|
532
550
|
parse_lex_data_t parse_lex_data = {
|
@@ -544,17 +562,21 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
|
|
544
562
|
parser.lex_callback = &lex_callback;
|
545
563
|
pm_node_t *node = pm_parse(&parser);
|
546
564
|
|
547
|
-
// Here we need to update the
|
548
|
-
//
|
549
|
-
// it
|
565
|
+
// Here we need to update the Source object to have the correct
|
566
|
+
// encoding for the source string and the correct newline offsets.
|
567
|
+
// We do it here because we've already created the Source object and given
|
568
|
+
// it over to all of the tokens, and both of these are only set after pm_parse().
|
569
|
+
rb_encoding *encoding = rb_enc_find(parser.encoding->name);
|
570
|
+
rb_enc_associate(source_string, encoding);
|
571
|
+
|
550
572
|
for (size_t index = 0; index < parser.newline_list.size; index++) {
|
551
|
-
rb_ary_push(offsets,
|
573
|
+
rb_ary_push(offsets, ULONG2NUM(parser.newline_list.offsets[index]));
|
552
574
|
}
|
553
575
|
|
554
576
|
VALUE value;
|
555
577
|
if (return_nodes) {
|
556
578
|
value = rb_ary_new_capa(2);
|
557
|
-
rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding));
|
579
|
+
rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding, source));
|
558
580
|
rb_ary_push(value, parse_lex_data.tokens);
|
559
581
|
} else {
|
560
582
|
value = parse_lex_data.tokens;
|
@@ -606,7 +628,8 @@ static VALUE
|
|
606
628
|
lex_file(int argc, VALUE *argv, VALUE self) {
|
607
629
|
pm_string_t input;
|
608
630
|
pm_options_t options = { 0 };
|
609
|
-
|
631
|
+
|
632
|
+
file_options(argc, argv, &input, &options);
|
610
633
|
|
611
634
|
VALUE value = parse_lex_input(&input, &options, false);
|
612
635
|
pm_string_free(&input);
|
@@ -632,7 +655,7 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
|
|
632
655
|
|
633
656
|
VALUE source = pm_source_new(&parser, encoding);
|
634
657
|
VALUE result_argv[] = {
|
635
|
-
pm_ast_new(&parser, node, encoding),
|
658
|
+
pm_ast_new(&parser, node, encoding, source),
|
636
659
|
parser_comments(&parser, source),
|
637
660
|
parser_magic_comments(&parser, source),
|
638
661
|
parser_data_loc(&parser, source),
|
@@ -707,7 +730,8 @@ static VALUE
|
|
707
730
|
parse_file(int argc, VALUE *argv, VALUE self) {
|
708
731
|
pm_string_t input;
|
709
732
|
pm_options_t options = { 0 };
|
710
|
-
|
733
|
+
|
734
|
+
file_options(argc, argv, &input, &options);
|
711
735
|
|
712
736
|
VALUE value = parse_input(&input, &options);
|
713
737
|
pm_string_free(&input);
|
@@ -767,7 +791,8 @@ static VALUE
|
|
767
791
|
parse_file_comments(int argc, VALUE *argv, VALUE self) {
|
768
792
|
pm_string_t input;
|
769
793
|
pm_options_t options = { 0 };
|
770
|
-
|
794
|
+
|
795
|
+
file_options(argc, argv, &input, &options);
|
771
796
|
|
772
797
|
VALUE value = parse_input_comments(&input, &options);
|
773
798
|
pm_string_free(&input);
|
@@ -821,7 +846,8 @@ static VALUE
|
|
821
846
|
parse_lex_file(int argc, VALUE *argv, VALUE self) {
|
822
847
|
pm_string_t input;
|
823
848
|
pm_options_t options = { 0 };
|
824
|
-
|
849
|
+
|
850
|
+
file_options(argc, argv, &input, &options);
|
825
851
|
|
826
852
|
VALUE value = parse_lex_input(&input, &options, true);
|
827
853
|
pm_string_free(&input);
|
@@ -878,7 +904,8 @@ static VALUE
|
|
878
904
|
parse_file_success_p(int argc, VALUE *argv, VALUE self) {
|
879
905
|
pm_string_t input;
|
880
906
|
pm_options_t options = { 0 };
|
881
|
-
|
907
|
+
|
908
|
+
file_options(argc, argv, &input, &options);
|
882
909
|
|
883
910
|
VALUE result = parse_input_success_p(&input, &options);
|
884
911
|
pm_string_free(&input);
|
@@ -956,7 +983,17 @@ profile_file(VALUE self, VALUE filepath) {
|
|
956
983
|
pm_string_t input;
|
957
984
|
|
958
985
|
const char *checked = check_string(filepath);
|
959
|
-
|
986
|
+
Check_Type(filepath, T_STRING);
|
987
|
+
|
988
|
+
if (!pm_string_mapped_init(&input, checked)) {
|
989
|
+
#ifdef _WIN32
|
990
|
+
int e = rb_w32_map_errno(GetLastError());
|
991
|
+
#else
|
992
|
+
int e = errno;
|
993
|
+
#endif
|
994
|
+
|
995
|
+
rb_syserr_fail(e, checked);
|
996
|
+
}
|
960
997
|
|
961
998
|
pm_options_t options = { 0 };
|
962
999
|
pm_options_filepath_set(&options, checked);
|
data/ext/prism/extension.h
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#ifndef PRISM_EXT_NODE_H
|
2
2
|
#define PRISM_EXT_NODE_H
|
3
3
|
|
4
|
-
#define EXPECTED_PRISM_VERSION "0.
|
4
|
+
#define EXPECTED_PRISM_VERSION "0.23.0"
|
5
5
|
|
6
6
|
#include <ruby.h>
|
7
7
|
#include <ruby/encoding.h>
|
@@ -9,7 +9,7 @@
|
|
9
9
|
|
10
10
|
VALUE pm_source_new(pm_parser_t *parser, rb_encoding *encoding);
|
11
11
|
VALUE pm_token_new(pm_parser_t *parser, pm_token_t *token, rb_encoding *encoding, VALUE source);
|
12
|
-
VALUE pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding);
|
12
|
+
VALUE pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding, VALUE source);
|
13
13
|
|
14
14
|
void Init_prism_api_node(void);
|
15
15
|
void Init_prism_pack(void);
|
data/include/prism/diagnostic.h
CHANGED
@@ -19,7 +19,10 @@
|
|
19
19
|
*/
|
20
20
|
typedef enum {
|
21
21
|
/** For errors that cannot be recovered from. */
|
22
|
-
PM_ERROR_LEVEL_FATAL = 0
|
22
|
+
PM_ERROR_LEVEL_FATAL = 0,
|
23
|
+
|
24
|
+
/** For errors that should raise an argument error. */
|
25
|
+
PM_ERROR_LEVEL_ARGUMENT = 1
|
23
26
|
} pm_error_level_t;
|
24
27
|
|
25
28
|
/**
|
@@ -28,6 +31,7 @@ typedef enum {
|
|
28
31
|
typedef enum {
|
29
32
|
/** For warnings which should be emitted if $VERBOSE != nil. */
|
30
33
|
PM_WARNING_LEVEL_DEFAULT = 0,
|
34
|
+
|
31
35
|
/** For warnings which should be emitted if $VERBOSE == true. */
|
32
36
|
PM_WARNING_LEVEL_VERBOSE = 1
|
33
37
|
} pm_warning_level_t;
|
@@ -192,8 +196,10 @@ typedef enum {
|
|
192
196
|
PM_ERR_INVALID_NUMBER_HEXADECIMAL,
|
193
197
|
PM_ERR_INVALID_NUMBER_OCTAL,
|
194
198
|
PM_ERR_INVALID_NUMBER_UNDERSCORE,
|
199
|
+
PM_ERR_INVALID_CHARACTER,
|
200
|
+
PM_ERR_INVALID_MULTIBYTE_CHARACTER,
|
201
|
+
PM_ERR_INVALID_PRINTABLE_CHARACTER,
|
195
202
|
PM_ERR_INVALID_PERCENT,
|
196
|
-
PM_ERR_INVALID_TOKEN,
|
197
203
|
PM_ERR_INVALID_VARIABLE_GLOBAL,
|
198
204
|
PM_ERR_IT_NOT_ALLOWED,
|
199
205
|
PM_ERR_LAMBDA_OPEN,
|
@@ -213,6 +219,7 @@ typedef enum {
|
|
213
219
|
PM_ERR_MODULE_NAME,
|
214
220
|
PM_ERR_MODULE_TERM,
|
215
221
|
PM_ERR_MULTI_ASSIGN_MULTI_SPLATS,
|
222
|
+
PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST,
|
216
223
|
PM_ERR_NOT_EXPRESSION,
|
217
224
|
PM_ERR_NO_LOCAL_VARIABLE,
|
218
225
|
PM_ERR_NUMBER_LITERAL_UNDERSCORE,
|
@@ -266,6 +273,7 @@ typedef enum {
|
|
266
273
|
PM_ERR_STATEMENT_UNDEF,
|
267
274
|
PM_ERR_STRING_CONCATENATION,
|
268
275
|
PM_ERR_STRING_INTERPOLATED_TERM,
|
276
|
+
PM_ERR_STRING_LITERAL_EOF,
|
269
277
|
PM_ERR_STRING_LITERAL_TERM,
|
270
278
|
PM_ERR_SYMBOL_INVALID,
|
271
279
|
PM_ERR_SYMBOL_TERM_DYNAMIC,
|
@@ -273,10 +281,7 @@ typedef enum {
|
|
273
281
|
PM_ERR_TERNARY_COLON,
|
274
282
|
PM_ERR_TERNARY_EXPRESSION_FALSE,
|
275
283
|
PM_ERR_TERNARY_EXPRESSION_TRUE,
|
276
|
-
|
277
|
-
PM_ERR_UNARY_RECEIVER_MINUS,
|
278
|
-
PM_ERR_UNARY_RECEIVER_PLUS,
|
279
|
-
PM_ERR_UNARY_RECEIVER_TILDE,
|
284
|
+
PM_ERR_UNARY_RECEIVER,
|
280
285
|
PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT,
|
281
286
|
PM_ERR_UNEXPECTED_TOKEN_IGNORE,
|
282
287
|
PM_ERR_UNDEF_ARGUMENT,
|
data/include/prism/encoding.h
CHANGED
@@ -245,6 +245,13 @@ extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM];
|
|
245
245
|
*/
|
246
246
|
#define PM_ENCODING_US_ASCII_ENTRY (&pm_encodings[PM_ENCODING_US_ASCII])
|
247
247
|
|
248
|
+
/**
|
249
|
+
* This is the ASCII-8BIT encoding. We need a reference to it so that pm_strpbrk
|
250
|
+
* can compare against it because invalid multibyte characters are not a thing
|
251
|
+
* in this encoding.
|
252
|
+
*/
|
253
|
+
#define PM_ENCODING_ASCII_8BIT_ENTRY (&pm_encodings[PM_ENCODING_ASCII_8BIT])
|
254
|
+
|
248
255
|
/**
|
249
256
|
* Parse the given name of an encoding and return a pointer to the corresponding
|
250
257
|
* encoding struct if one can be found, otherwise return NULL.
|
@@ -163,7 +163,7 @@ pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool,
|
|
163
163
|
* @param length The length of the constant.
|
164
164
|
* @return The id of the constant.
|
165
165
|
*/
|
166
|
-
pm_constant_id_t pm_constant_pool_find(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
|
166
|
+
pm_constant_id_t pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length);
|
167
167
|
|
168
168
|
/**
|
169
169
|
* Insert a constant into a constant pool that is a slice of a source string.
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#define PRISM_STRPBRK_H
|
8
8
|
|
9
9
|
#include "prism/defines.h"
|
10
|
+
#include "prism/diagnostic.h"
|
10
11
|
#include "prism/parser.h"
|
11
12
|
|
12
13
|
#include <stddef.h>
|
@@ -35,9 +36,11 @@
|
|
35
36
|
* @param source The source to search.
|
36
37
|
* @param charset The charset to search for.
|
37
38
|
* @param length The maximum number of bytes to search.
|
39
|
+
* @param validate Whether to validate that the source string is valid in the
|
40
|
+
* current encoding of the parser.
|
38
41
|
* @return A pointer to the first character in the source string that is in the
|
39
42
|
* charset, or NULL if no such character exists.
|
40
43
|
*/
|
41
|
-
const uint8_t * pm_strpbrk(
|
44
|
+
const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate);
|
42
45
|
|
43
46
|
#endif
|
data/include/prism/version.h
CHANGED
@@ -14,7 +14,7 @@
|
|
14
14
|
/**
|
15
15
|
* The minor version of the Prism library as an int.
|
16
16
|
*/
|
17
|
-
#define PRISM_VERSION_MINOR
|
17
|
+
#define PRISM_VERSION_MINOR 23
|
18
18
|
|
19
19
|
/**
|
20
20
|
* The patch version of the Prism library as an int.
|
@@ -24,6 +24,6 @@
|
|
24
24
|
/**
|
25
25
|
* The version of the Prism library as a constant string.
|
26
26
|
*/
|
27
|
-
#define PRISM_VERSION "0.
|
27
|
+
#define PRISM_VERSION "0.23.0"
|
28
28
|
|
29
29
|
#endif
|
data/lib/prism/ffi.rb
CHANGED
@@ -160,8 +160,13 @@ module Prism
|
|
160
160
|
pointer = FFI::MemoryPointer.new(SIZEOF)
|
161
161
|
|
162
162
|
begin
|
163
|
-
raise unless
|
164
|
-
|
163
|
+
raise TypeError unless filepath.is_a?(String)
|
164
|
+
|
165
|
+
if LibRubyParser.pm_string_mapped_init(pointer, filepath)
|
166
|
+
yield new(pointer)
|
167
|
+
else
|
168
|
+
raise SystemCallError.new(filepath, FFI.errno)
|
169
|
+
end
|
165
170
|
ensure
|
166
171
|
LibRubyParser.pm_string_free(pointer)
|
167
172
|
pointer.free
|
@@ -312,7 +317,7 @@ module Prism
|
|
312
317
|
values << (options.fetch(:frozen_string_literal, false) ? 1 : 0)
|
313
318
|
|
314
319
|
template << "C"
|
315
|
-
values << { nil => 0, "3.3.0" => 1, "latest" => 0 }.fetch(options[:version])
|
320
|
+
values << { nil => 0, "3.3.0" => 1, "3.4.0" => 0, "latest" => 0 }.fetch(options[:version])
|
316
321
|
|
317
322
|
template << "L"
|
318
323
|
if (scopes = options[:scopes])
|
data/lib/prism/lex_compat.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "delegate"
|
4
|
+
require "ripper"
|
4
5
|
|
5
6
|
module Prism
|
6
7
|
# This class is responsible for lexing the source using prism and then
|
@@ -860,7 +861,7 @@ module Prism
|
|
860
861
|
previous = []
|
861
862
|
results = []
|
862
863
|
|
863
|
-
|
864
|
+
lex(source).each do |token|
|
864
865
|
case token[1]
|
865
866
|
when :on_sp
|
866
867
|
# skip
|
@@ -886,6 +887,21 @@ module Prism
|
|
886
887
|
|
887
888
|
results
|
888
889
|
end
|
890
|
+
|
891
|
+
private
|
892
|
+
|
893
|
+
if Ripper.method(:lex).parameters.assoc(:keyrest)
|
894
|
+
def lex(source)
|
895
|
+
Ripper.lex(source, raise_errors: true)
|
896
|
+
end
|
897
|
+
else
|
898
|
+
def lex(source)
|
899
|
+
ripper = Ripper::Lexer.new(source)
|
900
|
+
ripper.lex.tap do |result|
|
901
|
+
raise SyntaxError, ripper.errors.map(&:message).join(' ;') if ripper.errors.any?
|
902
|
+
end
|
903
|
+
end
|
904
|
+
end
|
889
905
|
end
|
890
906
|
|
891
907
|
private_constant :LexRipper
|