prism 0.21.0 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +40 -1
- data/README.md +2 -1
- data/docs/releasing.md +84 -16
- data/docs/ruby_parser_translation.md +19 -0
- data/docs/serialization.md +2 -0
- data/ext/prism/api_node.c +784 -785
- data/ext/prism/extension.c +56 -19
- data/ext/prism/extension.h +2 -2
- data/include/prism/diagnostic.h +11 -6
- data/include/prism/encoding.h +7 -0
- data/include/prism/util/pm_constant_pool.h +1 -1
- data/include/prism/util/pm_strpbrk.h +4 -1
- data/include/prism/version.h +2 -2
- data/lib/prism/ffi.rb +8 -3
- data/lib/prism/lex_compat.rb +17 -1
- data/lib/prism/node.rb +212 -32
- data/lib/prism/node_ext.rb +25 -2
- data/lib/prism/parse_result.rb +46 -16
- data/lib/prism/serialize.rb +14 -6
- data/lib/prism/translation/parser/compiler.rb +16 -6
- data/lib/prism/translation/parser.rb +19 -12
- data/lib/prism/translation/ripper.rb +577 -0
- data/lib/prism/translation/ruby_parser.rb +1521 -0
- data/lib/prism/translation.rb +3 -3
- data/lib/prism.rb +0 -1
- data/prism.gemspec +5 -3
- data/src/diagnostic.c +20 -15
- data/src/encoding.c +16 -17
- data/src/options.c +7 -2
- data/src/prism.c +145 -90
- data/src/serialize.c +24 -13
- data/src/token_type.c +3 -3
- data/src/util/pm_constant_pool.c +1 -1
- data/src/util/pm_string.c +0 -7
- data/src/util/pm_strpbrk.c +122 -14
- metadata +6 -4
- data/lib/prism/ripper_compat.rb +0 -207
data/ext/prism/extension.c
CHANGED
@@ -1,5 +1,9 @@
|
|
1
1
|
#include "prism/extension.h"
|
2
2
|
|
3
|
+
#ifdef _WIN32
|
4
|
+
#include <ruby/win32.h>
|
5
|
+
#endif
|
6
|
+
|
3
7
|
// NOTE: this file should contain only bindings. All non-trivial logic should be
|
4
8
|
// in libprism so it can be shared its the various callers.
|
5
9
|
|
@@ -212,20 +216,29 @@ string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options)
|
|
212
216
|
/**
|
213
217
|
* Read options for methods that look like (filepath, **options).
|
214
218
|
*/
|
215
|
-
static
|
219
|
+
static void
|
216
220
|
file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
|
217
221
|
VALUE filepath;
|
218
222
|
VALUE keywords;
|
219
223
|
rb_scan_args(argc, argv, "1:", &filepath, &keywords);
|
220
224
|
|
225
|
+
Check_Type(filepath, T_STRING);
|
226
|
+
|
221
227
|
extract_options(options, filepath, keywords);
|
222
228
|
|
223
|
-
|
229
|
+
const char * string_source = (const char *) pm_string_source(&options->filepath);
|
230
|
+
|
231
|
+
if (!pm_string_mapped_init(input, string_source)) {
|
224
232
|
pm_options_free(options);
|
225
|
-
return false;
|
226
|
-
}
|
227
233
|
|
228
|
-
|
234
|
+
#ifdef _WIN32
|
235
|
+
int e = rb_w32_map_errno(GetLastError());
|
236
|
+
#else
|
237
|
+
int e = errno;
|
238
|
+
#endif
|
239
|
+
|
240
|
+
rb_syserr_fail(e, string_source);
|
241
|
+
}
|
229
242
|
}
|
230
243
|
|
231
244
|
/******************************************************************************/
|
@@ -299,7 +312,8 @@ static VALUE
|
|
299
312
|
dump_file(int argc, VALUE *argv, VALUE self) {
|
300
313
|
pm_string_t input;
|
301
314
|
pm_options_t options = { 0 };
|
302
|
-
|
315
|
+
|
316
|
+
file_options(argc, argv, &input, &options);
|
303
317
|
|
304
318
|
VALUE value = dump_input(&input, &options);
|
305
319
|
pm_string_free(&input);
|
@@ -404,6 +418,9 @@ parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
|
|
404
418
|
case PM_ERROR_LEVEL_FATAL:
|
405
419
|
level = ID2SYM(rb_intern("fatal"));
|
406
420
|
break;
|
421
|
+
case PM_ERROR_LEVEL_ARGUMENT:
|
422
|
+
level = ID2SYM(rb_intern("argument"));
|
423
|
+
break;
|
407
424
|
default:
|
408
425
|
rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, error->level);
|
409
426
|
}
|
@@ -525,8 +542,9 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
|
|
525
542
|
pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
|
526
543
|
pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
|
527
544
|
|
545
|
+
VALUE source_string = rb_str_new((const char *) pm_string_source(input), pm_string_length(input));
|
528
546
|
VALUE offsets = rb_ary_new();
|
529
|
-
VALUE source_argv[] = {
|
547
|
+
VALUE source_argv[] = { source_string, LONG2NUM(parser.start_line), offsets };
|
530
548
|
VALUE source = rb_class_new_instance(3, source_argv, rb_cPrismSource);
|
531
549
|
|
532
550
|
parse_lex_data_t parse_lex_data = {
|
@@ -544,17 +562,21 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
|
|
544
562
|
parser.lex_callback = &lex_callback;
|
545
563
|
pm_node_t *node = pm_parse(&parser);
|
546
564
|
|
547
|
-
// Here we need to update the
|
548
|
-
//
|
549
|
-
// it
|
565
|
+
// Here we need to update the Source object to have the correct
|
566
|
+
// encoding for the source string and the correct newline offsets.
|
567
|
+
// We do it here because we've already created the Source object and given
|
568
|
+
// it over to all of the tokens, and both of these are only set after pm_parse().
|
569
|
+
rb_encoding *encoding = rb_enc_find(parser.encoding->name);
|
570
|
+
rb_enc_associate(source_string, encoding);
|
571
|
+
|
550
572
|
for (size_t index = 0; index < parser.newline_list.size; index++) {
|
551
|
-
rb_ary_push(offsets,
|
573
|
+
rb_ary_push(offsets, ULONG2NUM(parser.newline_list.offsets[index]));
|
552
574
|
}
|
553
575
|
|
554
576
|
VALUE value;
|
555
577
|
if (return_nodes) {
|
556
578
|
value = rb_ary_new_capa(2);
|
557
|
-
rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding));
|
579
|
+
rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding, source));
|
558
580
|
rb_ary_push(value, parse_lex_data.tokens);
|
559
581
|
} else {
|
560
582
|
value = parse_lex_data.tokens;
|
@@ -606,7 +628,8 @@ static VALUE
|
|
606
628
|
lex_file(int argc, VALUE *argv, VALUE self) {
|
607
629
|
pm_string_t input;
|
608
630
|
pm_options_t options = { 0 };
|
609
|
-
|
631
|
+
|
632
|
+
file_options(argc, argv, &input, &options);
|
610
633
|
|
611
634
|
VALUE value = parse_lex_input(&input, &options, false);
|
612
635
|
pm_string_free(&input);
|
@@ -632,7 +655,7 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
|
|
632
655
|
|
633
656
|
VALUE source = pm_source_new(&parser, encoding);
|
634
657
|
VALUE result_argv[] = {
|
635
|
-
pm_ast_new(&parser, node, encoding),
|
658
|
+
pm_ast_new(&parser, node, encoding, source),
|
636
659
|
parser_comments(&parser, source),
|
637
660
|
parser_magic_comments(&parser, source),
|
638
661
|
parser_data_loc(&parser, source),
|
@@ -707,7 +730,8 @@ static VALUE
|
|
707
730
|
parse_file(int argc, VALUE *argv, VALUE self) {
|
708
731
|
pm_string_t input;
|
709
732
|
pm_options_t options = { 0 };
|
710
|
-
|
733
|
+
|
734
|
+
file_options(argc, argv, &input, &options);
|
711
735
|
|
712
736
|
VALUE value = parse_input(&input, &options);
|
713
737
|
pm_string_free(&input);
|
@@ -767,7 +791,8 @@ static VALUE
|
|
767
791
|
parse_file_comments(int argc, VALUE *argv, VALUE self) {
|
768
792
|
pm_string_t input;
|
769
793
|
pm_options_t options = { 0 };
|
770
|
-
|
794
|
+
|
795
|
+
file_options(argc, argv, &input, &options);
|
771
796
|
|
772
797
|
VALUE value = parse_input_comments(&input, &options);
|
773
798
|
pm_string_free(&input);
|
@@ -821,7 +846,8 @@ static VALUE
|
|
821
846
|
parse_lex_file(int argc, VALUE *argv, VALUE self) {
|
822
847
|
pm_string_t input;
|
823
848
|
pm_options_t options = { 0 };
|
824
|
-
|
849
|
+
|
850
|
+
file_options(argc, argv, &input, &options);
|
825
851
|
|
826
852
|
VALUE value = parse_lex_input(&input, &options, true);
|
827
853
|
pm_string_free(&input);
|
@@ -878,7 +904,8 @@ static VALUE
|
|
878
904
|
parse_file_success_p(int argc, VALUE *argv, VALUE self) {
|
879
905
|
pm_string_t input;
|
880
906
|
pm_options_t options = { 0 };
|
881
|
-
|
907
|
+
|
908
|
+
file_options(argc, argv, &input, &options);
|
882
909
|
|
883
910
|
VALUE result = parse_input_success_p(&input, &options);
|
884
911
|
pm_string_free(&input);
|
@@ -956,7 +983,17 @@ profile_file(VALUE self, VALUE filepath) {
|
|
956
983
|
pm_string_t input;
|
957
984
|
|
958
985
|
const char *checked = check_string(filepath);
|
959
|
-
|
986
|
+
Check_Type(filepath, T_STRING);
|
987
|
+
|
988
|
+
if (!pm_string_mapped_init(&input, checked)) {
|
989
|
+
#ifdef _WIN32
|
990
|
+
int e = rb_w32_map_errno(GetLastError());
|
991
|
+
#else
|
992
|
+
int e = errno;
|
993
|
+
#endif
|
994
|
+
|
995
|
+
rb_syserr_fail(e, checked);
|
996
|
+
}
|
960
997
|
|
961
998
|
pm_options_t options = { 0 };
|
962
999
|
pm_options_filepath_set(&options, checked);
|
data/ext/prism/extension.h
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#ifndef PRISM_EXT_NODE_H
|
2
2
|
#define PRISM_EXT_NODE_H
|
3
3
|
|
4
|
-
#define EXPECTED_PRISM_VERSION "0.
|
4
|
+
#define EXPECTED_PRISM_VERSION "0.23.0"
|
5
5
|
|
6
6
|
#include <ruby.h>
|
7
7
|
#include <ruby/encoding.h>
|
@@ -9,7 +9,7 @@
|
|
9
9
|
|
10
10
|
VALUE pm_source_new(pm_parser_t *parser, rb_encoding *encoding);
|
11
11
|
VALUE pm_token_new(pm_parser_t *parser, pm_token_t *token, rb_encoding *encoding, VALUE source);
|
12
|
-
VALUE pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding);
|
12
|
+
VALUE pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding, VALUE source);
|
13
13
|
|
14
14
|
void Init_prism_api_node(void);
|
15
15
|
void Init_prism_pack(void);
|
data/include/prism/diagnostic.h
CHANGED
@@ -19,7 +19,10 @@
|
|
19
19
|
*/
|
20
20
|
typedef enum {
|
21
21
|
/** For errors that cannot be recovered from. */
|
22
|
-
PM_ERROR_LEVEL_FATAL = 0
|
22
|
+
PM_ERROR_LEVEL_FATAL = 0,
|
23
|
+
|
24
|
+
/** For errors that should raise an argument error. */
|
25
|
+
PM_ERROR_LEVEL_ARGUMENT = 1
|
23
26
|
} pm_error_level_t;
|
24
27
|
|
25
28
|
/**
|
@@ -28,6 +31,7 @@ typedef enum {
|
|
28
31
|
typedef enum {
|
29
32
|
/** For warnings which should be emitted if $VERBOSE != nil. */
|
30
33
|
PM_WARNING_LEVEL_DEFAULT = 0,
|
34
|
+
|
31
35
|
/** For warnings which should be emitted if $VERBOSE == true. */
|
32
36
|
PM_WARNING_LEVEL_VERBOSE = 1
|
33
37
|
} pm_warning_level_t;
|
@@ -192,8 +196,10 @@ typedef enum {
|
|
192
196
|
PM_ERR_INVALID_NUMBER_HEXADECIMAL,
|
193
197
|
PM_ERR_INVALID_NUMBER_OCTAL,
|
194
198
|
PM_ERR_INVALID_NUMBER_UNDERSCORE,
|
199
|
+
PM_ERR_INVALID_CHARACTER,
|
200
|
+
PM_ERR_INVALID_MULTIBYTE_CHARACTER,
|
201
|
+
PM_ERR_INVALID_PRINTABLE_CHARACTER,
|
195
202
|
PM_ERR_INVALID_PERCENT,
|
196
|
-
PM_ERR_INVALID_TOKEN,
|
197
203
|
PM_ERR_INVALID_VARIABLE_GLOBAL,
|
198
204
|
PM_ERR_IT_NOT_ALLOWED,
|
199
205
|
PM_ERR_LAMBDA_OPEN,
|
@@ -213,6 +219,7 @@ typedef enum {
|
|
213
219
|
PM_ERR_MODULE_NAME,
|
214
220
|
PM_ERR_MODULE_TERM,
|
215
221
|
PM_ERR_MULTI_ASSIGN_MULTI_SPLATS,
|
222
|
+
PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST,
|
216
223
|
PM_ERR_NOT_EXPRESSION,
|
217
224
|
PM_ERR_NO_LOCAL_VARIABLE,
|
218
225
|
PM_ERR_NUMBER_LITERAL_UNDERSCORE,
|
@@ -266,6 +273,7 @@ typedef enum {
|
|
266
273
|
PM_ERR_STATEMENT_UNDEF,
|
267
274
|
PM_ERR_STRING_CONCATENATION,
|
268
275
|
PM_ERR_STRING_INTERPOLATED_TERM,
|
276
|
+
PM_ERR_STRING_LITERAL_EOF,
|
269
277
|
PM_ERR_STRING_LITERAL_TERM,
|
270
278
|
PM_ERR_SYMBOL_INVALID,
|
271
279
|
PM_ERR_SYMBOL_TERM_DYNAMIC,
|
@@ -273,10 +281,7 @@ typedef enum {
|
|
273
281
|
PM_ERR_TERNARY_COLON,
|
274
282
|
PM_ERR_TERNARY_EXPRESSION_FALSE,
|
275
283
|
PM_ERR_TERNARY_EXPRESSION_TRUE,
|
276
|
-
|
277
|
-
PM_ERR_UNARY_RECEIVER_MINUS,
|
278
|
-
PM_ERR_UNARY_RECEIVER_PLUS,
|
279
|
-
PM_ERR_UNARY_RECEIVER_TILDE,
|
284
|
+
PM_ERR_UNARY_RECEIVER,
|
280
285
|
PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT,
|
281
286
|
PM_ERR_UNEXPECTED_TOKEN_IGNORE,
|
282
287
|
PM_ERR_UNDEF_ARGUMENT,
|
data/include/prism/encoding.h
CHANGED
@@ -245,6 +245,13 @@ extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM];
|
|
245
245
|
*/
|
246
246
|
#define PM_ENCODING_US_ASCII_ENTRY (&pm_encodings[PM_ENCODING_US_ASCII])
|
247
247
|
|
248
|
+
/**
|
249
|
+
* This is the ASCII-8BIT encoding. We need a reference to it so that pm_strpbrk
|
250
|
+
* can compare against it because invalid multibyte characters are not a thing
|
251
|
+
* in this encoding.
|
252
|
+
*/
|
253
|
+
#define PM_ENCODING_ASCII_8BIT_ENTRY (&pm_encodings[PM_ENCODING_ASCII_8BIT])
|
254
|
+
|
248
255
|
/**
|
249
256
|
* Parse the given name of an encoding and return a pointer to the corresponding
|
250
257
|
* encoding struct if one can be found, otherwise return NULL.
|
@@ -163,7 +163,7 @@ pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool,
|
|
163
163
|
* @param length The length of the constant.
|
164
164
|
* @return The id of the constant.
|
165
165
|
*/
|
166
|
-
pm_constant_id_t pm_constant_pool_find(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
|
166
|
+
pm_constant_id_t pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length);
|
167
167
|
|
168
168
|
/**
|
169
169
|
* Insert a constant into a constant pool that is a slice of a source string.
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#define PRISM_STRPBRK_H
|
8
8
|
|
9
9
|
#include "prism/defines.h"
|
10
|
+
#include "prism/diagnostic.h"
|
10
11
|
#include "prism/parser.h"
|
11
12
|
|
12
13
|
#include <stddef.h>
|
@@ -35,9 +36,11 @@
|
|
35
36
|
* @param source The source to search.
|
36
37
|
* @param charset The charset to search for.
|
37
38
|
* @param length The maximum number of bytes to search.
|
39
|
+
* @param validate Whether to validate that the source string is valid in the
|
40
|
+
* current encoding of the parser.
|
38
41
|
* @return A pointer to the first character in the source string that is in the
|
39
42
|
* charset, or NULL if no such character exists.
|
40
43
|
*/
|
41
|
-
const uint8_t * pm_strpbrk(
|
44
|
+
const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate);
|
42
45
|
|
43
46
|
#endif
|
data/include/prism/version.h
CHANGED
@@ -14,7 +14,7 @@
|
|
14
14
|
/**
|
15
15
|
* The minor version of the Prism library as an int.
|
16
16
|
*/
|
17
|
-
#define PRISM_VERSION_MINOR
|
17
|
+
#define PRISM_VERSION_MINOR 23
|
18
18
|
|
19
19
|
/**
|
20
20
|
* The patch version of the Prism library as an int.
|
@@ -24,6 +24,6 @@
|
|
24
24
|
/**
|
25
25
|
* The version of the Prism library as a constant string.
|
26
26
|
*/
|
27
|
-
#define PRISM_VERSION "0.
|
27
|
+
#define PRISM_VERSION "0.23.0"
|
28
28
|
|
29
29
|
#endif
|
data/lib/prism/ffi.rb
CHANGED
@@ -160,8 +160,13 @@ module Prism
|
|
160
160
|
pointer = FFI::MemoryPointer.new(SIZEOF)
|
161
161
|
|
162
162
|
begin
|
163
|
-
raise unless
|
164
|
-
|
163
|
+
raise TypeError unless filepath.is_a?(String)
|
164
|
+
|
165
|
+
if LibRubyParser.pm_string_mapped_init(pointer, filepath)
|
166
|
+
yield new(pointer)
|
167
|
+
else
|
168
|
+
raise SystemCallError.new(filepath, FFI.errno)
|
169
|
+
end
|
165
170
|
ensure
|
166
171
|
LibRubyParser.pm_string_free(pointer)
|
167
172
|
pointer.free
|
@@ -312,7 +317,7 @@ module Prism
|
|
312
317
|
values << (options.fetch(:frozen_string_literal, false) ? 1 : 0)
|
313
318
|
|
314
319
|
template << "C"
|
315
|
-
values << { nil => 0, "3.3.0" => 1, "latest" => 0 }.fetch(options[:version])
|
320
|
+
values << { nil => 0, "3.3.0" => 1, "3.4.0" => 0, "latest" => 0 }.fetch(options[:version])
|
316
321
|
|
317
322
|
template << "L"
|
318
323
|
if (scopes = options[:scopes])
|
data/lib/prism/lex_compat.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "delegate"
|
4
|
+
require "ripper"
|
4
5
|
|
5
6
|
module Prism
|
6
7
|
# This class is responsible for lexing the source using prism and then
|
@@ -860,7 +861,7 @@ module Prism
|
|
860
861
|
previous = []
|
861
862
|
results = []
|
862
863
|
|
863
|
-
|
864
|
+
lex(source).each do |token|
|
864
865
|
case token[1]
|
865
866
|
when :on_sp
|
866
867
|
# skip
|
@@ -886,6 +887,21 @@ module Prism
|
|
886
887
|
|
887
888
|
results
|
888
889
|
end
|
890
|
+
|
891
|
+
private
|
892
|
+
|
893
|
+
if Ripper.method(:lex).parameters.assoc(:keyrest)
|
894
|
+
def lex(source)
|
895
|
+
Ripper.lex(source, raise_errors: true)
|
896
|
+
end
|
897
|
+
else
|
898
|
+
def lex(source)
|
899
|
+
ripper = Ripper::Lexer.new(source)
|
900
|
+
ripper.lex.tap do |result|
|
901
|
+
raise SyntaxError, ripper.errors.map(&:message).join(' ;') if ripper.errors.any?
|
902
|
+
end
|
903
|
+
end
|
904
|
+
end
|
889
905
|
end
|
890
906
|
|
891
907
|
private_constant :LexRipper
|