prism 0.21.0 → 0.23.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,9 @@
1
1
  #include "prism/extension.h"
2
2
 
3
+ #ifdef _WIN32
4
+ #include <ruby/win32.h>
5
+ #endif
6
+
3
7
  // NOTE: this file should contain only bindings. All non-trivial logic should be
4
8
  // in libprism so it can be shared its the various callers.
5
9
 
@@ -212,20 +216,29 @@ string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options)
212
216
  /**
213
217
  * Read options for methods that look like (filepath, **options).
214
218
  */
215
- static bool
219
+ static void
216
220
  file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
217
221
  VALUE filepath;
218
222
  VALUE keywords;
219
223
  rb_scan_args(argc, argv, "1:", &filepath, &keywords);
220
224
 
225
+ Check_Type(filepath, T_STRING);
226
+
221
227
  extract_options(options, filepath, keywords);
222
228
 
223
- if (!pm_string_mapped_init(input, (const char *) pm_string_source(&options->filepath))) {
229
+ const char * string_source = (const char *) pm_string_source(&options->filepath);
230
+
231
+ if (!pm_string_mapped_init(input, string_source)) {
224
232
  pm_options_free(options);
225
- return false;
226
- }
227
233
 
228
- return true;
234
+ #ifdef _WIN32
235
+ int e = rb_w32_map_errno(GetLastError());
236
+ #else
237
+ int e = errno;
238
+ #endif
239
+
240
+ rb_syserr_fail(e, string_source);
241
+ }
229
242
  }
230
243
 
231
244
  /******************************************************************************/
@@ -299,7 +312,8 @@ static VALUE
299
312
  dump_file(int argc, VALUE *argv, VALUE self) {
300
313
  pm_string_t input;
301
314
  pm_options_t options = { 0 };
302
- if (!file_options(argc, argv, &input, &options)) return Qnil;
315
+
316
+ file_options(argc, argv, &input, &options);
303
317
 
304
318
  VALUE value = dump_input(&input, &options);
305
319
  pm_string_free(&input);
@@ -404,6 +418,9 @@ parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
404
418
  case PM_ERROR_LEVEL_FATAL:
405
419
  level = ID2SYM(rb_intern("fatal"));
406
420
  break;
421
+ case PM_ERROR_LEVEL_ARGUMENT:
422
+ level = ID2SYM(rb_intern("argument"));
423
+ break;
407
424
  default:
408
425
  rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, error->level);
409
426
  }
@@ -525,8 +542,9 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
525
542
  pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
526
543
  pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
527
544
 
545
+ VALUE source_string = rb_str_new((const char *) pm_string_source(input), pm_string_length(input));
528
546
  VALUE offsets = rb_ary_new();
529
- VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), ULONG2NUM(parser.start_line), offsets };
547
+ VALUE source_argv[] = { source_string, LONG2NUM(parser.start_line), offsets };
530
548
  VALUE source = rb_class_new_instance(3, source_argv, rb_cPrismSource);
531
549
 
532
550
  parse_lex_data_t parse_lex_data = {
@@ -544,17 +562,21 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
544
562
  parser.lex_callback = &lex_callback;
545
563
  pm_node_t *node = pm_parse(&parser);
546
564
 
547
- // Here we need to update the source range to have the correct newline
548
- // offsets. We do it here because we've already created the object and given
549
- // it over to all of the tokens.
565
+ // Here we need to update the Source object to have the correct
566
+ // encoding for the source string and the correct newline offsets.
567
+ // We do it here because we've already created the Source object and given
568
+ // it over to all of the tokens, and both of these are only set after pm_parse().
569
+ rb_encoding *encoding = rb_enc_find(parser.encoding->name);
570
+ rb_enc_associate(source_string, encoding);
571
+
550
572
  for (size_t index = 0; index < parser.newline_list.size; index++) {
551
- rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index]));
573
+ rb_ary_push(offsets, ULONG2NUM(parser.newline_list.offsets[index]));
552
574
  }
553
575
 
554
576
  VALUE value;
555
577
  if (return_nodes) {
556
578
  value = rb_ary_new_capa(2);
557
- rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding));
579
+ rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding, source));
558
580
  rb_ary_push(value, parse_lex_data.tokens);
559
581
  } else {
560
582
  value = parse_lex_data.tokens;
@@ -606,7 +628,8 @@ static VALUE
606
628
  lex_file(int argc, VALUE *argv, VALUE self) {
607
629
  pm_string_t input;
608
630
  pm_options_t options = { 0 };
609
- if (!file_options(argc, argv, &input, &options)) return Qnil;
631
+
632
+ file_options(argc, argv, &input, &options);
610
633
 
611
634
  VALUE value = parse_lex_input(&input, &options, false);
612
635
  pm_string_free(&input);
@@ -632,7 +655,7 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
632
655
 
633
656
  VALUE source = pm_source_new(&parser, encoding);
634
657
  VALUE result_argv[] = {
635
- pm_ast_new(&parser, node, encoding),
658
+ pm_ast_new(&parser, node, encoding, source),
636
659
  parser_comments(&parser, source),
637
660
  parser_magic_comments(&parser, source),
638
661
  parser_data_loc(&parser, source),
@@ -707,7 +730,8 @@ static VALUE
707
730
  parse_file(int argc, VALUE *argv, VALUE self) {
708
731
  pm_string_t input;
709
732
  pm_options_t options = { 0 };
710
- if (!file_options(argc, argv, &input, &options)) return Qnil;
733
+
734
+ file_options(argc, argv, &input, &options);
711
735
 
712
736
  VALUE value = parse_input(&input, &options);
713
737
  pm_string_free(&input);
@@ -767,7 +791,8 @@ static VALUE
767
791
  parse_file_comments(int argc, VALUE *argv, VALUE self) {
768
792
  pm_string_t input;
769
793
  pm_options_t options = { 0 };
770
- if (!file_options(argc, argv, &input, &options)) return Qnil;
794
+
795
+ file_options(argc, argv, &input, &options);
771
796
 
772
797
  VALUE value = parse_input_comments(&input, &options);
773
798
  pm_string_free(&input);
@@ -821,7 +846,8 @@ static VALUE
821
846
  parse_lex_file(int argc, VALUE *argv, VALUE self) {
822
847
  pm_string_t input;
823
848
  pm_options_t options = { 0 };
824
- if (!file_options(argc, argv, &input, &options)) return Qnil;
849
+
850
+ file_options(argc, argv, &input, &options);
825
851
 
826
852
  VALUE value = parse_lex_input(&input, &options, true);
827
853
  pm_string_free(&input);
@@ -878,7 +904,8 @@ static VALUE
878
904
  parse_file_success_p(int argc, VALUE *argv, VALUE self) {
879
905
  pm_string_t input;
880
906
  pm_options_t options = { 0 };
881
- if (!file_options(argc, argv, &input, &options)) return Qnil;
907
+
908
+ file_options(argc, argv, &input, &options);
882
909
 
883
910
  VALUE result = parse_input_success_p(&input, &options);
884
911
  pm_string_free(&input);
@@ -956,7 +983,17 @@ profile_file(VALUE self, VALUE filepath) {
956
983
  pm_string_t input;
957
984
 
958
985
  const char *checked = check_string(filepath);
959
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
986
+ Check_Type(filepath, T_STRING);
987
+
988
+ if (!pm_string_mapped_init(&input, checked)) {
989
+ #ifdef _WIN32
990
+ int e = rb_w32_map_errno(GetLastError());
991
+ #else
992
+ int e = errno;
993
+ #endif
994
+
995
+ rb_syserr_fail(e, checked);
996
+ }
960
997
 
961
998
  pm_options_t options = { 0 };
962
999
  pm_options_filepath_set(&options, checked);
@@ -1,7 +1,7 @@
1
1
  #ifndef PRISM_EXT_NODE_H
2
2
  #define PRISM_EXT_NODE_H
3
3
 
4
- #define EXPECTED_PRISM_VERSION "0.21.0"
4
+ #define EXPECTED_PRISM_VERSION "0.23.0"
5
5
 
6
6
  #include <ruby.h>
7
7
  #include <ruby/encoding.h>
@@ -9,7 +9,7 @@
9
9
 
10
10
  VALUE pm_source_new(pm_parser_t *parser, rb_encoding *encoding);
11
11
  VALUE pm_token_new(pm_parser_t *parser, pm_token_t *token, rb_encoding *encoding, VALUE source);
12
- VALUE pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding);
12
+ VALUE pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding, VALUE source);
13
13
 
14
14
  void Init_prism_api_node(void);
15
15
  void Init_prism_pack(void);
@@ -19,7 +19,10 @@
19
19
  */
20
20
  typedef enum {
21
21
  /** For errors that cannot be recovered from. */
22
- PM_ERROR_LEVEL_FATAL = 0
22
+ PM_ERROR_LEVEL_FATAL = 0,
23
+
24
+ /** For errors that should raise an argument error. */
25
+ PM_ERROR_LEVEL_ARGUMENT = 1
23
26
  } pm_error_level_t;
24
27
 
25
28
  /**
@@ -28,6 +31,7 @@ typedef enum {
28
31
  typedef enum {
29
32
  /** For warnings which should be emitted if $VERBOSE != nil. */
30
33
  PM_WARNING_LEVEL_DEFAULT = 0,
34
+
31
35
  /** For warnings which should be emitted if $VERBOSE == true. */
32
36
  PM_WARNING_LEVEL_VERBOSE = 1
33
37
  } pm_warning_level_t;
@@ -192,8 +196,10 @@ typedef enum {
192
196
  PM_ERR_INVALID_NUMBER_HEXADECIMAL,
193
197
  PM_ERR_INVALID_NUMBER_OCTAL,
194
198
  PM_ERR_INVALID_NUMBER_UNDERSCORE,
199
+ PM_ERR_INVALID_CHARACTER,
200
+ PM_ERR_INVALID_MULTIBYTE_CHARACTER,
201
+ PM_ERR_INVALID_PRINTABLE_CHARACTER,
195
202
  PM_ERR_INVALID_PERCENT,
196
- PM_ERR_INVALID_TOKEN,
197
203
  PM_ERR_INVALID_VARIABLE_GLOBAL,
198
204
  PM_ERR_IT_NOT_ALLOWED,
199
205
  PM_ERR_LAMBDA_OPEN,
@@ -213,6 +219,7 @@ typedef enum {
213
219
  PM_ERR_MODULE_NAME,
214
220
  PM_ERR_MODULE_TERM,
215
221
  PM_ERR_MULTI_ASSIGN_MULTI_SPLATS,
222
+ PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST,
216
223
  PM_ERR_NOT_EXPRESSION,
217
224
  PM_ERR_NO_LOCAL_VARIABLE,
218
225
  PM_ERR_NUMBER_LITERAL_UNDERSCORE,
@@ -266,6 +273,7 @@ typedef enum {
266
273
  PM_ERR_STATEMENT_UNDEF,
267
274
  PM_ERR_STRING_CONCATENATION,
268
275
  PM_ERR_STRING_INTERPOLATED_TERM,
276
+ PM_ERR_STRING_LITERAL_EOF,
269
277
  PM_ERR_STRING_LITERAL_TERM,
270
278
  PM_ERR_SYMBOL_INVALID,
271
279
  PM_ERR_SYMBOL_TERM_DYNAMIC,
@@ -273,10 +281,7 @@ typedef enum {
273
281
  PM_ERR_TERNARY_COLON,
274
282
  PM_ERR_TERNARY_EXPRESSION_FALSE,
275
283
  PM_ERR_TERNARY_EXPRESSION_TRUE,
276
- PM_ERR_UNARY_RECEIVER_BANG,
277
- PM_ERR_UNARY_RECEIVER_MINUS,
278
- PM_ERR_UNARY_RECEIVER_PLUS,
279
- PM_ERR_UNARY_RECEIVER_TILDE,
284
+ PM_ERR_UNARY_RECEIVER,
280
285
  PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT,
281
286
  PM_ERR_UNEXPECTED_TOKEN_IGNORE,
282
287
  PM_ERR_UNDEF_ARGUMENT,
@@ -245,6 +245,13 @@ extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM];
245
245
  */
246
246
  #define PM_ENCODING_US_ASCII_ENTRY (&pm_encodings[PM_ENCODING_US_ASCII])
247
247
 
248
+ /**
249
+ * This is the ASCII-8BIT encoding. We need a reference to it so that pm_strpbrk
250
+ * can compare against it because invalid multibyte characters are not a thing
251
+ * in this encoding.
252
+ */
253
+ #define PM_ENCODING_ASCII_8BIT_ENTRY (&pm_encodings[PM_ENCODING_ASCII_8BIT])
254
+
248
255
  /**
249
256
  * Parse the given name of an encoding and return a pointer to the corresponding
250
257
  * encoding struct if one can be found, otherwise return NULL.
@@ -163,7 +163,7 @@ pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool,
163
163
  * @param length The length of the constant.
164
164
  * @return The id of the constant.
165
165
  */
166
- pm_constant_id_t pm_constant_pool_find(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
166
+ pm_constant_id_t pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length);
167
167
 
168
168
  /**
169
169
  * Insert a constant into a constant pool that is a slice of a source string.
@@ -7,6 +7,7 @@
7
7
  #define PRISM_STRPBRK_H
8
8
 
9
9
  #include "prism/defines.h"
10
+ #include "prism/diagnostic.h"
10
11
  #include "prism/parser.h"
11
12
 
12
13
  #include <stddef.h>
@@ -35,9 +36,11 @@
35
36
  * @param source The source to search.
36
37
  * @param charset The charset to search for.
37
38
  * @param length The maximum number of bytes to search.
39
+ * @param validate Whether to validate that the source string is valid in the
40
+ * current encoding of the parser.
38
41
  * @return A pointer to the first character in the source string that is in the
39
42
  * charset, or NULL if no such character exists.
40
43
  */
41
- const uint8_t * pm_strpbrk(const pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length);
44
+ const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate);
42
45
 
43
46
  #endif
@@ -14,7 +14,7 @@
14
14
  /**
15
15
  * The minor version of the Prism library as an int.
16
16
  */
17
- #define PRISM_VERSION_MINOR 21
17
+ #define PRISM_VERSION_MINOR 23
18
18
 
19
19
  /**
20
20
  * The patch version of the Prism library as an int.
@@ -24,6 +24,6 @@
24
24
  /**
25
25
  * The version of the Prism library as a constant string.
26
26
  */
27
- #define PRISM_VERSION "0.21.0"
27
+ #define PRISM_VERSION "0.23.0"
28
28
 
29
29
  #endif
data/lib/prism/ffi.rb CHANGED
@@ -160,8 +160,13 @@ module Prism
160
160
  pointer = FFI::MemoryPointer.new(SIZEOF)
161
161
 
162
162
  begin
163
- raise unless LibRubyParser.pm_string_mapped_init(pointer, filepath)
164
- yield new(pointer)
163
+ raise TypeError unless filepath.is_a?(String)
164
+
165
+ if LibRubyParser.pm_string_mapped_init(pointer, filepath)
166
+ yield new(pointer)
167
+ else
168
+ raise SystemCallError.new(filepath, FFI.errno)
169
+ end
165
170
  ensure
166
171
  LibRubyParser.pm_string_free(pointer)
167
172
  pointer.free
@@ -312,7 +317,7 @@ module Prism
312
317
  values << (options.fetch(:frozen_string_literal, false) ? 1 : 0)
313
318
 
314
319
  template << "C"
315
- values << { nil => 0, "3.3.0" => 1, "latest" => 0 }.fetch(options[:version])
320
+ values << { nil => 0, "3.3.0" => 1, "3.4.0" => 0, "latest" => 0 }.fetch(options[:version])
316
321
 
317
322
  template << "L"
318
323
  if (scopes = options[:scopes])
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "delegate"
4
+ require "ripper"
4
5
 
5
6
  module Prism
6
7
  # This class is responsible for lexing the source using prism and then
@@ -860,7 +861,7 @@ module Prism
860
861
  previous = []
861
862
  results = []
862
863
 
863
- Ripper.lex(source, raise_errors: true).each do |token|
864
+ lex(source).each do |token|
864
865
  case token[1]
865
866
  when :on_sp
866
867
  # skip
@@ -886,6 +887,21 @@ module Prism
886
887
 
887
888
  results
888
889
  end
890
+
891
+ private
892
+
893
+ if Ripper.method(:lex).parameters.assoc(:keyrest)
894
+ def lex(source)
895
+ Ripper.lex(source, raise_errors: true)
896
+ end
897
+ else
898
+ def lex(source)
899
+ ripper = Ripper::Lexer.new(source)
900
+ ripper.lex.tap do |result|
901
+ raise SyntaxError, ripper.errors.map(&:message).join(' ;') if ripper.errors.any?
902
+ end
903
+ end
904
+ end
889
905
  end
890
906
 
891
907
  private_constant :LexRipper