prism 0.21.0 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,9 @@
1
1
  #include "prism/extension.h"
2
2
 
3
+ #ifdef _WIN32
4
+ #include <ruby/win32.h>
5
+ #endif
6
+
3
7
  // NOTE: this file should contain only bindings. All non-trivial logic should be
4
8
  // in libprism so it can be shared its the various callers.
5
9
 
@@ -212,20 +216,29 @@ string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options)
212
216
  /**
213
217
  * Read options for methods that look like (filepath, **options).
214
218
  */
215
- static bool
219
+ static void
216
220
  file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
217
221
  VALUE filepath;
218
222
  VALUE keywords;
219
223
  rb_scan_args(argc, argv, "1:", &filepath, &keywords);
220
224
 
225
+ Check_Type(filepath, T_STRING);
226
+
221
227
  extract_options(options, filepath, keywords);
222
228
 
223
- if (!pm_string_mapped_init(input, (const char *) pm_string_source(&options->filepath))) {
229
+ const char * string_source = (const char *) pm_string_source(&options->filepath);
230
+
231
+ if (!pm_string_mapped_init(input, string_source)) {
224
232
  pm_options_free(options);
225
- return false;
226
- }
227
233
 
228
- return true;
234
+ #ifdef _WIN32
235
+ int e = rb_w32_map_errno(GetLastError());
236
+ #else
237
+ int e = errno;
238
+ #endif
239
+
240
+ rb_syserr_fail(e, string_source);
241
+ }
229
242
  }
230
243
 
231
244
  /******************************************************************************/
@@ -299,7 +312,8 @@ static VALUE
299
312
  dump_file(int argc, VALUE *argv, VALUE self) {
300
313
  pm_string_t input;
301
314
  pm_options_t options = { 0 };
302
- if (!file_options(argc, argv, &input, &options)) return Qnil;
315
+
316
+ file_options(argc, argv, &input, &options);
303
317
 
304
318
  VALUE value = dump_input(&input, &options);
305
319
  pm_string_free(&input);
@@ -404,6 +418,9 @@ parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
404
418
  case PM_ERROR_LEVEL_FATAL:
405
419
  level = ID2SYM(rb_intern("fatal"));
406
420
  break;
421
+ case PM_ERROR_LEVEL_ARGUMENT:
422
+ level = ID2SYM(rb_intern("argument"));
423
+ break;
407
424
  default:
408
425
  rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, error->level);
409
426
  }
@@ -525,8 +542,9 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
525
542
  pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
526
543
  pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
527
544
 
545
+ VALUE source_string = rb_str_new((const char *) pm_string_source(input), pm_string_length(input));
528
546
  VALUE offsets = rb_ary_new();
529
- VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), ULONG2NUM(parser.start_line), offsets };
547
+ VALUE source_argv[] = { source_string, LONG2NUM(parser.start_line), offsets };
530
548
  VALUE source = rb_class_new_instance(3, source_argv, rb_cPrismSource);
531
549
 
532
550
  parse_lex_data_t parse_lex_data = {
@@ -544,17 +562,21 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
544
562
  parser.lex_callback = &lex_callback;
545
563
  pm_node_t *node = pm_parse(&parser);
546
564
 
547
- // Here we need to update the source range to have the correct newline
548
- // offsets. We do it here because we've already created the object and given
549
- // it over to all of the tokens.
565
+ // Here we need to update the Source object to have the correct
566
+ // encoding for the source string and the correct newline offsets.
567
+ // We do it here because we've already created the Source object and given
568
+ // it over to all of the tokens, and both of these are only set after pm_parse().
569
+ rb_encoding *encoding = rb_enc_find(parser.encoding->name);
570
+ rb_enc_associate(source_string, encoding);
571
+
550
572
  for (size_t index = 0; index < parser.newline_list.size; index++) {
551
- rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index]));
573
+ rb_ary_push(offsets, ULONG2NUM(parser.newline_list.offsets[index]));
552
574
  }
553
575
 
554
576
  VALUE value;
555
577
  if (return_nodes) {
556
578
  value = rb_ary_new_capa(2);
557
- rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding));
579
+ rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding, source));
558
580
  rb_ary_push(value, parse_lex_data.tokens);
559
581
  } else {
560
582
  value = parse_lex_data.tokens;
@@ -606,7 +628,8 @@ static VALUE
606
628
  lex_file(int argc, VALUE *argv, VALUE self) {
607
629
  pm_string_t input;
608
630
  pm_options_t options = { 0 };
609
- if (!file_options(argc, argv, &input, &options)) return Qnil;
631
+
632
+ file_options(argc, argv, &input, &options);
610
633
 
611
634
  VALUE value = parse_lex_input(&input, &options, false);
612
635
  pm_string_free(&input);
@@ -632,7 +655,7 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
632
655
 
633
656
  VALUE source = pm_source_new(&parser, encoding);
634
657
  VALUE result_argv[] = {
635
- pm_ast_new(&parser, node, encoding),
658
+ pm_ast_new(&parser, node, encoding, source),
636
659
  parser_comments(&parser, source),
637
660
  parser_magic_comments(&parser, source),
638
661
  parser_data_loc(&parser, source),
@@ -707,7 +730,8 @@ static VALUE
707
730
  parse_file(int argc, VALUE *argv, VALUE self) {
708
731
  pm_string_t input;
709
732
  pm_options_t options = { 0 };
710
- if (!file_options(argc, argv, &input, &options)) return Qnil;
733
+
734
+ file_options(argc, argv, &input, &options);
711
735
 
712
736
  VALUE value = parse_input(&input, &options);
713
737
  pm_string_free(&input);
@@ -767,7 +791,8 @@ static VALUE
767
791
  parse_file_comments(int argc, VALUE *argv, VALUE self) {
768
792
  pm_string_t input;
769
793
  pm_options_t options = { 0 };
770
- if (!file_options(argc, argv, &input, &options)) return Qnil;
794
+
795
+ file_options(argc, argv, &input, &options);
771
796
 
772
797
  VALUE value = parse_input_comments(&input, &options);
773
798
  pm_string_free(&input);
@@ -821,7 +846,8 @@ static VALUE
821
846
  parse_lex_file(int argc, VALUE *argv, VALUE self) {
822
847
  pm_string_t input;
823
848
  pm_options_t options = { 0 };
824
- if (!file_options(argc, argv, &input, &options)) return Qnil;
849
+
850
+ file_options(argc, argv, &input, &options);
825
851
 
826
852
  VALUE value = parse_lex_input(&input, &options, true);
827
853
  pm_string_free(&input);
@@ -878,7 +904,8 @@ static VALUE
878
904
  parse_file_success_p(int argc, VALUE *argv, VALUE self) {
879
905
  pm_string_t input;
880
906
  pm_options_t options = { 0 };
881
- if (!file_options(argc, argv, &input, &options)) return Qnil;
907
+
908
+ file_options(argc, argv, &input, &options);
882
909
 
883
910
  VALUE result = parse_input_success_p(&input, &options);
884
911
  pm_string_free(&input);
@@ -956,7 +983,17 @@ profile_file(VALUE self, VALUE filepath) {
956
983
  pm_string_t input;
957
984
 
958
985
  const char *checked = check_string(filepath);
959
- if (!pm_string_mapped_init(&input, checked)) return Qnil;
986
+ Check_Type(filepath, T_STRING);
987
+
988
+ if (!pm_string_mapped_init(&input, checked)) {
989
+ #ifdef _WIN32
990
+ int e = rb_w32_map_errno(GetLastError());
991
+ #else
992
+ int e = errno;
993
+ #endif
994
+
995
+ rb_syserr_fail(e, checked);
996
+ }
960
997
 
961
998
  pm_options_t options = { 0 };
962
999
  pm_options_filepath_set(&options, checked);
@@ -1,7 +1,7 @@
1
1
  #ifndef PRISM_EXT_NODE_H
2
2
  #define PRISM_EXT_NODE_H
3
3
 
4
- #define EXPECTED_PRISM_VERSION "0.21.0"
4
+ #define EXPECTED_PRISM_VERSION "0.23.0"
5
5
 
6
6
  #include <ruby.h>
7
7
  #include <ruby/encoding.h>
@@ -9,7 +9,7 @@
9
9
 
10
10
  VALUE pm_source_new(pm_parser_t *parser, rb_encoding *encoding);
11
11
  VALUE pm_token_new(pm_parser_t *parser, pm_token_t *token, rb_encoding *encoding, VALUE source);
12
- VALUE pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding);
12
+ VALUE pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding, VALUE source);
13
13
 
14
14
  void Init_prism_api_node(void);
15
15
  void Init_prism_pack(void);
@@ -19,7 +19,10 @@
19
19
  */
20
20
  typedef enum {
21
21
  /** For errors that cannot be recovered from. */
22
- PM_ERROR_LEVEL_FATAL = 0
22
+ PM_ERROR_LEVEL_FATAL = 0,
23
+
24
+ /** For errors that should raise an argument error. */
25
+ PM_ERROR_LEVEL_ARGUMENT = 1
23
26
  } pm_error_level_t;
24
27
 
25
28
  /**
@@ -28,6 +31,7 @@ typedef enum {
28
31
  typedef enum {
29
32
  /** For warnings which should be emitted if $VERBOSE != nil. */
30
33
  PM_WARNING_LEVEL_DEFAULT = 0,
34
+
31
35
  /** For warnings which should be emitted if $VERBOSE == true. */
32
36
  PM_WARNING_LEVEL_VERBOSE = 1
33
37
  } pm_warning_level_t;
@@ -192,8 +196,10 @@ typedef enum {
192
196
  PM_ERR_INVALID_NUMBER_HEXADECIMAL,
193
197
  PM_ERR_INVALID_NUMBER_OCTAL,
194
198
  PM_ERR_INVALID_NUMBER_UNDERSCORE,
199
+ PM_ERR_INVALID_CHARACTER,
200
+ PM_ERR_INVALID_MULTIBYTE_CHARACTER,
201
+ PM_ERR_INVALID_PRINTABLE_CHARACTER,
195
202
  PM_ERR_INVALID_PERCENT,
196
- PM_ERR_INVALID_TOKEN,
197
203
  PM_ERR_INVALID_VARIABLE_GLOBAL,
198
204
  PM_ERR_IT_NOT_ALLOWED,
199
205
  PM_ERR_LAMBDA_OPEN,
@@ -213,6 +219,7 @@ typedef enum {
213
219
  PM_ERR_MODULE_NAME,
214
220
  PM_ERR_MODULE_TERM,
215
221
  PM_ERR_MULTI_ASSIGN_MULTI_SPLATS,
222
+ PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST,
216
223
  PM_ERR_NOT_EXPRESSION,
217
224
  PM_ERR_NO_LOCAL_VARIABLE,
218
225
  PM_ERR_NUMBER_LITERAL_UNDERSCORE,
@@ -266,6 +273,7 @@ typedef enum {
266
273
  PM_ERR_STATEMENT_UNDEF,
267
274
  PM_ERR_STRING_CONCATENATION,
268
275
  PM_ERR_STRING_INTERPOLATED_TERM,
276
+ PM_ERR_STRING_LITERAL_EOF,
269
277
  PM_ERR_STRING_LITERAL_TERM,
270
278
  PM_ERR_SYMBOL_INVALID,
271
279
  PM_ERR_SYMBOL_TERM_DYNAMIC,
@@ -273,10 +281,7 @@ typedef enum {
273
281
  PM_ERR_TERNARY_COLON,
274
282
  PM_ERR_TERNARY_EXPRESSION_FALSE,
275
283
  PM_ERR_TERNARY_EXPRESSION_TRUE,
276
- PM_ERR_UNARY_RECEIVER_BANG,
277
- PM_ERR_UNARY_RECEIVER_MINUS,
278
- PM_ERR_UNARY_RECEIVER_PLUS,
279
- PM_ERR_UNARY_RECEIVER_TILDE,
284
+ PM_ERR_UNARY_RECEIVER,
280
285
  PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT,
281
286
  PM_ERR_UNEXPECTED_TOKEN_IGNORE,
282
287
  PM_ERR_UNDEF_ARGUMENT,
@@ -245,6 +245,13 @@ extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM];
245
245
  */
246
246
  #define PM_ENCODING_US_ASCII_ENTRY (&pm_encodings[PM_ENCODING_US_ASCII])
247
247
 
248
+ /**
249
+ * This is the ASCII-8BIT encoding. We need a reference to it so that pm_strpbrk
250
+ * can compare against it because invalid multibyte characters are not a thing
251
+ * in this encoding.
252
+ */
253
+ #define PM_ENCODING_ASCII_8BIT_ENTRY (&pm_encodings[PM_ENCODING_ASCII_8BIT])
254
+
248
255
  /**
249
256
  * Parse the given name of an encoding and return a pointer to the corresponding
250
257
  * encoding struct if one can be found, otherwise return NULL.
@@ -163,7 +163,7 @@ pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool,
163
163
  * @param length The length of the constant.
164
164
  * @return The id of the constant.
165
165
  */
166
- pm_constant_id_t pm_constant_pool_find(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
166
+ pm_constant_id_t pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length);
167
167
 
168
168
  /**
169
169
  * Insert a constant into a constant pool that is a slice of a source string.
@@ -7,6 +7,7 @@
7
7
  #define PRISM_STRPBRK_H
8
8
 
9
9
  #include "prism/defines.h"
10
+ #include "prism/diagnostic.h"
10
11
  #include "prism/parser.h"
11
12
 
12
13
  #include <stddef.h>
@@ -35,9 +36,11 @@
35
36
  * @param source The source to search.
36
37
  * @param charset The charset to search for.
37
38
  * @param length The maximum number of bytes to search.
39
+ * @param validate Whether to validate that the source string is valid in the
40
+ * current encoding of the parser.
38
41
  * @return A pointer to the first character in the source string that is in the
39
42
  * charset, or NULL if no such character exists.
40
43
  */
41
- const uint8_t * pm_strpbrk(const pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length);
44
+ const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate);
42
45
 
43
46
  #endif
@@ -14,7 +14,7 @@
14
14
  /**
15
15
  * The minor version of the Prism library as an int.
16
16
  */
17
- #define PRISM_VERSION_MINOR 21
17
+ #define PRISM_VERSION_MINOR 23
18
18
 
19
19
  /**
20
20
  * The patch version of the Prism library as an int.
@@ -24,6 +24,6 @@
24
24
  /**
25
25
  * The version of the Prism library as a constant string.
26
26
  */
27
- #define PRISM_VERSION "0.21.0"
27
+ #define PRISM_VERSION "0.23.0"
28
28
 
29
29
  #endif
data/lib/prism/ffi.rb CHANGED
@@ -160,8 +160,13 @@ module Prism
160
160
  pointer = FFI::MemoryPointer.new(SIZEOF)
161
161
 
162
162
  begin
163
- raise unless LibRubyParser.pm_string_mapped_init(pointer, filepath)
164
- yield new(pointer)
163
+ raise TypeError unless filepath.is_a?(String)
164
+
165
+ if LibRubyParser.pm_string_mapped_init(pointer, filepath)
166
+ yield new(pointer)
167
+ else
168
+ raise SystemCallError.new(filepath, FFI.errno)
169
+ end
165
170
  ensure
166
171
  LibRubyParser.pm_string_free(pointer)
167
172
  pointer.free
@@ -312,7 +317,7 @@ module Prism
312
317
  values << (options.fetch(:frozen_string_literal, false) ? 1 : 0)
313
318
 
314
319
  template << "C"
315
- values << { nil => 0, "3.3.0" => 1, "latest" => 0 }.fetch(options[:version])
320
+ values << { nil => 0, "3.3.0" => 1, "3.4.0" => 0, "latest" => 0 }.fetch(options[:version])
316
321
 
317
322
  template << "L"
318
323
  if (scopes = options[:scopes])
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "delegate"
4
+ require "ripper"
4
5
 
5
6
  module Prism
6
7
  # This class is responsible for lexing the source using prism and then
@@ -860,7 +861,7 @@ module Prism
860
861
  previous = []
861
862
  results = []
862
863
 
863
- Ripper.lex(source, raise_errors: true).each do |token|
864
+ lex(source).each do |token|
864
865
  case token[1]
865
866
  when :on_sp
866
867
  # skip
@@ -886,6 +887,21 @@ module Prism
886
887
 
887
888
  results
888
889
  end
890
+
891
+ private
892
+
893
+ if Ripper.method(:lex).parameters.assoc(:keyrest)
894
+ def lex(source)
895
+ Ripper.lex(source, raise_errors: true)
896
+ end
897
+ else
898
+ def lex(source)
899
+ ripper = Ripper::Lexer.new(source)
900
+ ripper.lex.tap do |result|
901
+ raise SyntaxError, ripper.errors.map(&:message).join(' ;') if ripper.errors.any?
902
+ end
903
+ end
904
+ end
889
905
  end
890
906
 
891
907
  private_constant :LexRipper