prism 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,6 +25,15 @@
25
25
  #define __STDC_FORMAT_MACROS
26
26
  #include <inttypes.h>
27
27
 
28
+ /**
29
+ * When we are parsing using recursive descent, we want to protect against
30
+ * malicious payloads that could attempt to crash our parser. We do this by
31
+ * specifying a maximum depth to which we are allowed to recurse.
32
+ */
33
+ #ifndef PRISM_DEPTH_MAXIMUM
34
+ #define PRISM_DEPTH_MAXIMUM 1000
35
+ #endif
36
+
28
37
  /**
29
38
  * By default, we compile with -fvisibility=hidden. When this is enabled, we
30
39
  * need to mark certain functions as being publically-visible. This macro does
@@ -212,4 +221,22 @@
212
221
  #define PRISM_ENCODING_EXCLUDE_FULL
213
222
  #endif
214
223
 
224
+ /**
225
+ * Support PRISM_LIKELY and PRISM_UNLIKELY to help the compiler optimize its
226
+ * branch predication.
227
+ */
228
+ #if defined(__GNUC__) || defined(__clang__)
229
+ /** The compiler should predicate that this branch will be taken. */
230
+ #define PRISM_LIKELY(x) __builtin_expect(!!(x), 1)
231
+
232
+ /** The compiler should predicate that this branch will not be taken. */
233
+ #define PRISM_UNLIKELY(x) __builtin_expect(!!(x), 0)
234
+ #else
235
+ /** Void because this platform does not support branch prediction hints. */
236
+ #define PRISM_LIKELY(x) (x)
237
+
238
+ /** Void because this platform does not support branch prediction hints. */
239
+ #define PRISM_UNLIKELY(x) (x)
240
+ #endif
241
+
215
242
  #endif
@@ -44,7 +44,6 @@ typedef enum {
44
44
  PM_ERR_ARGUMENT_FORMAL_GLOBAL,
45
45
  PM_ERR_ARGUMENT_FORMAL_IVAR,
46
46
  PM_ERR_ARGUMENT_FORWARDING_UNBOUND,
47
- PM_ERR_ARGUMENT_IN,
48
47
  PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND,
49
48
  PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
50
49
  PM_ERR_ARGUMENT_NO_FORWARDING_STAR,
@@ -128,6 +127,7 @@ typedef enum {
128
127
  PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT,
129
128
  PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH,
130
129
  PM_ERR_EXPECT_EXPRESSION_AFTER_STAR,
130
+ PM_ERR_EXPECT_FOR_DELIMITER,
131
131
  PM_ERR_EXPECT_IDENT_REQ_PARAMETER,
132
132
  PM_ERR_EXPECT_IN_DELIMITER,
133
133
  PM_ERR_EXPECT_LPAREN_REQ_PARAMETER,
@@ -136,6 +136,7 @@ typedef enum {
136
136
  PM_ERR_EXPECT_RPAREN,
137
137
  PM_ERR_EXPECT_RPAREN_AFTER_MULTI,
138
138
  PM_ERR_EXPECT_RPAREN_REQ_PARAMETER,
139
+ PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER,
139
140
  PM_ERR_EXPECT_STRING_CONTENT,
140
141
  PM_ERR_EXPECT_WHEN_DELIMITER,
141
142
  PM_ERR_EXPRESSION_BARE_HASH,
@@ -214,7 +215,9 @@ typedef enum {
214
215
  PM_ERR_MODULE_TERM,
215
216
  PM_ERR_MULTI_ASSIGN_MULTI_SPLATS,
216
217
  PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST,
218
+ PM_ERR_NESTING_TOO_DEEP,
217
219
  PM_ERR_NO_LOCAL_VARIABLE,
220
+ PM_ERR_NON_ASSOCIATIVE_OPERATOR,
218
221
  PM_ERR_NOT_EXPRESSION,
219
222
  PM_ERR_NUMBER_LITERAL_UNDERSCORE,
220
223
  PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK,
@@ -300,6 +303,7 @@ typedef enum {
300
303
  PM_ERR_UNEXPECTED_BLOCK_ARGUMENT,
301
304
  PM_ERR_UNEXPECTED_INDEX_BLOCK,
302
305
  PM_ERR_UNEXPECTED_INDEX_KEYWORDS,
306
+ PM_ERR_UNEXPECTED_LABEL,
303
307
  PM_ERR_UNEXPECTED_MULTI_WRITE,
304
308
  PM_ERR_UNEXPECTED_RANGE_OPERATOR,
305
309
  PM_ERR_UNEXPECTED_SAFE_NAVIGATION,
@@ -7,6 +7,7 @@
7
7
  #define PRISM_OPTIONS_H
8
8
 
9
9
  #include "prism/defines.h"
10
+ #include "prism/util/pm_char.h"
10
11
  #include "prism/util/pm_string.h"
11
12
 
12
13
  #include <stdbool.h>
@@ -139,6 +140,23 @@ typedef struct pm_options {
139
140
  * but ignore any encoding magic comments at the top of the file.
140
141
  */
141
142
  bool encoding_locked;
143
+
144
+ /**
145
+ * When the file being parsed is the main script, the shebang will be
146
+ * considered for command-line flags (or for implicit -x). The caller needs
147
+ * to pass this information to the parser so that it can behave correctly.
148
+ */
149
+ bool main_script;
150
+
151
+ /**
152
+ * When the file being parsed is considered a "partial" script, jumps will
153
+ * not be marked as errors if they are not contained within loops/blocks.
154
+ * This is used in the case that you're parsing a script that you know will
155
+ * be embedded inside another script later, but you do not have that context
156
+ * yet. For example, when parsing an ERB template that will be evaluated
157
+ * inside another script.
158
+ */
159
+ bool partial_script;
142
160
  } pm_options_t;
143
161
 
144
162
  /**
@@ -248,6 +266,22 @@ PRISM_EXPORTED_FUNCTION void pm_options_command_line_set(pm_options_t *options,
248
266
  */
249
267
  PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const char *version, size_t length);
250
268
 
269
+ /**
270
+ * Set the main script option on the given options struct.
271
+ *
272
+ * @param options The options struct to set the main script value on.
273
+ * @param main_script The main script value to set.
274
+ */
275
+ PRISM_EXPORTED_FUNCTION void pm_options_main_script_set(pm_options_t *options, bool main_script);
276
+
277
+ /**
278
+ * Set the partial script option on the given options struct.
279
+ *
280
+ * @param options The options struct to set the partial script value on.
281
+ * @param partial_script The partial script value to set.
282
+ */
283
+ PRISM_EXPORTED_FUNCTION void pm_options_partial_script_set(pm_options_t *options, bool partial_script);
284
+
251
285
  /**
252
286
  * Allocate and zero out the scopes array on the given options struct.
253
287
  *
@@ -315,6 +349,9 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
315
349
  * | `1` | -l command line option |
316
350
  * | `1` | -a command line option |
317
351
  * | `1` | the version |
352
+ * | `1` | encoding locked |
353
+ * | `1` | main script |
354
+ * | `1` | partial script |
318
355
  * | `4` | the number of scopes |
319
356
  * | ... | the scopes |
320
357
  *
@@ -347,8 +384,8 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
347
384
  * * The encoding can have a length of 0, in which case we'll use the default
348
385
  * encoding (UTF-8). If it's not 0, it should correspond to a name of an
349
386
  * encoding that can be passed to `Encoding.find` in Ruby.
350
- * * The frozen string literal and suppress warnings fields are booleans, so
351
- * their values should be either 0 or 1.
387
+ * * The frozen string literal, encoding locked, main script, and partial script
388
+ * fields are booleans, so their values should be either 0 or 1.
352
389
  * * The number of scopes can be 0.
353
390
  *
354
391
  * @param options The options struct to deserialize into.
@@ -861,6 +861,13 @@ struct pm_parser {
861
861
  */
862
862
  bool parsing_eval;
863
863
 
864
+ /**
865
+ * Whether or not we are parsing a "partial" script, which is a script that
866
+ * will be evaluated in the context of another script, so we should not
867
+ * check jumps (next/break/etc.) for validity.
868
+ */
869
+ bool partial_script;
870
+
864
871
  /** Whether or not we're at the beginning of a command. */
865
872
  bool command_start;
866
873
 
@@ -22,6 +22,9 @@
22
22
  #include <fcntl.h>
23
23
  #include <sys/mman.h>
24
24
  #include <sys/stat.h>
25
+ #elif defined(PRISM_HAS_FILESYSTEM)
26
+ #include <fcntl.h>
27
+ #include <sys/stat.h>
25
28
  #endif
26
29
 
27
30
  /**
@@ -93,6 +96,26 @@ void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length);
93
96
  */
94
97
  void pm_string_constant_init(pm_string_t *string, const char *source, size_t length);
95
98
 
99
+ /**
100
+ * Represents the result of calling pm_string_mapped_init or
101
+ * pm_string_file_init. We need this additional information because there is
102
+ * not a platform-agnostic way to indicate that the file that was attempted to
103
+ * be opened was a directory.
104
+ */
105
+ typedef enum {
106
+ /** Indicates that the string was successfully initialized. */
107
+ PM_STRING_INIT_SUCCESS = 0,
108
+ /**
109
+ * Indicates a generic error from a string_*_init function, where the type
110
+ * of error should be read from `errno` or `GetLastError()`.
111
+ */
112
+ PM_STRING_INIT_ERROR_GENERIC = 1,
113
+ /**
114
+ * Indicates that the file that was attempted to be opened was a directory.
115
+ */
116
+ PM_STRING_INIT_ERROR_DIRECTORY = 2
117
+ } pm_string_init_result_t;
118
+
96
119
  /**
97
120
  * Read the file indicated by the filepath parameter into source and load its
98
121
  * contents and size into the given `pm_string_t`. The given `pm_string_t`
@@ -106,9 +129,9 @@ void pm_string_constant_init(pm_string_t *string, const char *source, size_t len
106
129
  *
107
130
  * @param string The string to initialize.
108
131
  * @param filepath The filepath to read.
109
- * @return Whether or not the file was successfully mapped.
132
+ * @return The success of the read, indicated by the value of the enum.
110
133
  */
111
- PRISM_EXPORTED_FUNCTION bool pm_string_mapped_init(pm_string_t *string, const char *filepath);
134
+ PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_mapped_init(pm_string_t *string, const char *filepath);
112
135
 
113
136
  /**
114
137
  * Read the file indicated by the filepath parameter into source and load its
@@ -117,9 +140,9 @@ PRISM_EXPORTED_FUNCTION bool pm_string_mapped_init(pm_string_t *string, const ch
117
140
  *
118
141
  * @param string The string to initialize.
119
142
  * @param filepath The filepath to read.
120
- * @return Whether or not the file was successfully read.
143
+ * @return The success of the read, indicated by the value of the enum.
121
144
  */
122
- PRISM_EXPORTED_FUNCTION bool pm_string_file_init(pm_string_t *string, const char *filepath);
145
+ PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_file_init(pm_string_t *string, const char *filepath);
123
146
 
124
147
  /**
125
148
  * Ensure the string is owned. If it is not, then reinitialize it as owned and
@@ -14,7 +14,7 @@
14
14
  /**
15
15
  * The minor version of the Prism library as an int.
16
16
  */
17
- #define PRISM_VERSION_MINOR 0
17
+ #define PRISM_VERSION_MINOR 1
18
18
 
19
19
  /**
20
20
  * The patch version of the Prism library as an int.
@@ -24,6 +24,6 @@
24
24
  /**
25
25
  * The version of the Prism library as a constant string.
26
26
  */
27
- #define PRISM_VERSION "1.0.0"
27
+ #define PRISM_VERSION "1.1.0"
28
28
 
29
29
  #endif
@@ -4592,9 +4592,11 @@ module Prism
4592
4592
  # comma-separated list.
4593
4593
  def arguments_node_flags_inspect(node)
4594
4594
  flags = [] #: Array[String]
4595
+ flags << "contains_forwarding" if node.contains_forwarding?
4595
4596
  flags << "contains_keywords" if node.contains_keywords?
4596
4597
  flags << "contains_keyword_splat" if node.contains_keyword_splat?
4597
4598
  flags << "contains_splat" if node.contains_splat?
4599
+ flags << "contains_multiple_splats" if node.contains_multiple_splats?
4598
4600
  flags.join(", ")
4599
4601
  end
4600
4602
 
data/lib/prism/dsl.rb CHANGED
@@ -184,7 +184,7 @@ module Prism
184
184
  end
185
185
 
186
186
  # Create a new CapturePatternNode node.
187
- def capture_pattern_node(source: default_source, node_id: 0, location: default_location, flags: 0, value: default_node(source, location), target: default_node(source, location), operator_loc: location)
187
+ def capture_pattern_node(source: default_source, node_id: 0, location: default_location, flags: 0, value: default_node(source, location), target: local_variable_target_node(source: source), operator_loc: location)
188
188
  CapturePatternNode.new(source, node_id, location, flags, value, target, operator_loc)
189
189
  end
190
190
 
@@ -199,7 +199,7 @@ module Prism
199
199
  end
200
200
 
201
201
  # Create a new ClassNode node.
202
- def class_node(source: default_source, node_id: 0, location: default_location, flags: 0, locals: [], class_keyword_loc: location, constant_path: default_node(source, location), inheritance_operator_loc: nil, superclass: nil, body: nil, end_keyword_loc: location, name: :"")
202
+ def class_node(source: default_source, node_id: 0, location: default_location, flags: 0, locals: [], class_keyword_loc: location, constant_path: constant_read_node(source: source), inheritance_operator_loc: nil, superclass: nil, body: nil, end_keyword_loc: location, name: :"")
203
203
  ClassNode.new(source, node_id, location, flags, locals, class_keyword_loc, constant_path, inheritance_operator_loc, superclass, body, end_keyword_loc, name)
204
204
  end
205
205
 
@@ -314,7 +314,7 @@ module Prism
314
314
  end
315
315
 
316
316
  # Create a new EmbeddedVariableNode node.
317
- def embedded_variable_node(source: default_source, node_id: 0, location: default_location, flags: 0, operator_loc: location, variable: default_node(source, location))
317
+ def embedded_variable_node(source: default_source, node_id: 0, location: default_location, flags: 0, operator_loc: location, variable: instance_variable_read_node(source: source))
318
318
  EmbeddedVariableNode.new(source, node_id, location, flags, operator_loc, variable)
319
319
  end
320
320
 
@@ -329,7 +329,7 @@ module Prism
329
329
  end
330
330
 
331
331
  # Create a new FindPatternNode node.
332
- def find_pattern_node(source: default_source, node_id: 0, location: default_location, flags: 0, constant: nil, left: default_node(source, location), requireds: [], right: default_node(source, location), opening_loc: nil, closing_loc: nil)
332
+ def find_pattern_node(source: default_source, node_id: 0, location: default_location, flags: 0, constant: nil, left: splat_node(source: source), requireds: [], right: splat_node(source: source), opening_loc: nil, closing_loc: nil)
333
333
  FindPatternNode.new(source, node_id, location, flags, constant, left, requireds, right, opening_loc, closing_loc)
334
334
  end
335
335
 
@@ -344,7 +344,7 @@ module Prism
344
344
  end
345
345
 
346
346
  # Create a new ForNode node.
347
- def for_node(source: default_source, node_id: 0, location: default_location, flags: 0, index: default_node(source, location), collection: default_node(source, location), statements: nil, for_keyword_loc: location, in_keyword_loc: location, do_keyword_loc: nil, end_keyword_loc: location)
347
+ def for_node(source: default_source, node_id: 0, location: default_location, flags: 0, index: local_variable_target_node(source: source), collection: default_node(source, location), statements: nil, for_keyword_loc: location, in_keyword_loc: location, do_keyword_loc: nil, end_keyword_loc: location)
348
348
  ForNode.new(source, node_id, location, flags, index, collection, statements, for_keyword_loc, in_keyword_loc, do_keyword_loc, end_keyword_loc)
349
349
  end
350
350
 
@@ -414,7 +414,7 @@ module Prism
414
414
  end
415
415
 
416
416
  # Create a new ImplicitNode node.
417
- def implicit_node(source: default_source, node_id: 0, location: default_location, flags: 0, value: default_node(source, location))
417
+ def implicit_node(source: default_source, node_id: 0, location: default_location, flags: 0, value: local_variable_read_node(source: source))
418
418
  ImplicitNode.new(source, node_id, location, flags, value)
419
419
  end
420
420
 
@@ -589,7 +589,7 @@ module Prism
589
589
  end
590
590
 
591
591
  # Create a new ModuleNode node.
592
- def module_node(source: default_source, node_id: 0, location: default_location, flags: 0, locals: [], module_keyword_loc: location, constant_path: default_node(source, location), body: nil, end_keyword_loc: location, name: :"")
592
+ def module_node(source: default_source, node_id: 0, location: default_location, flags: 0, locals: [], module_keyword_loc: location, constant_path: constant_read_node(source: source), body: nil, end_keyword_loc: location, name: :"")
593
593
  ModuleNode.new(source, node_id, location, flags, locals, module_keyword_loc, constant_path, body, end_keyword_loc, name)
594
594
  end
595
595
 
@@ -659,7 +659,7 @@ module Prism
659
659
  end
660
660
 
661
661
  # Create a new PinnedVariableNode node.
662
- def pinned_variable_node(source: default_source, node_id: 0, location: default_location, flags: 0, variable: default_node(source, location), operator_loc: location)
662
+ def pinned_variable_node(source: default_source, node_id: 0, location: default_location, flags: 0, variable: local_variable_read_node(source: source), operator_loc: location)
663
663
  PinnedVariableNode.new(source, node_id, location, flags, variable, operator_loc)
664
664
  end
665
665
 
@@ -831,9 +831,11 @@ module Prism
831
831
  # Retrieve the value of one of the ArgumentsNodeFlags flags.
832
832
  def arguments_node_flag(name)
833
833
  case name
834
+ when :contains_forwarding then ArgumentsNodeFlags::CONTAINS_FORWARDING
834
835
  when :contains_keywords then ArgumentsNodeFlags::CONTAINS_KEYWORDS
835
836
  when :contains_keyword_splat then ArgumentsNodeFlags::CONTAINS_KEYWORD_SPLAT
836
837
  when :contains_splat then ArgumentsNodeFlags::CONTAINS_SPLAT
838
+ when :contains_multiple_splats then ArgumentsNodeFlags::CONTAINS_MULTIPLE_SPLATS
837
839
  else Kernel.raise ArgumentError, "invalid ArgumentsNodeFlags flag: #{name.inspect}"
838
840
  end
839
841
  end
data/lib/prism/ffi.rb CHANGED
@@ -72,6 +72,7 @@ module Prism
72
72
  end
73
73
 
74
74
  callback :pm_parse_stream_fgets_t, [:pointer, :int, :pointer], :pointer
75
+ enum :pm_string_init_result_t, %i[PM_STRING_INIT_SUCCESS PM_STRING_INIT_ERROR_GENERIC PM_STRING_INIT_ERROR_DIRECTORY]
75
76
 
76
77
  load_exported_functions_from(
77
78
  "prism.h",
@@ -176,13 +177,26 @@ module Prism
176
177
  def self.with_file(filepath)
177
178
  raise TypeError unless filepath.is_a?(String)
178
179
 
180
+ # On Windows and Mac, it's expected that filepaths will be encoded in
181
+ # UTF-8. If they are not, we need to convert them to UTF-8 before
182
+ # passing them into pm_string_mapped_init.
183
+ if RbConfig::CONFIG["host_os"].match?(/bccwin|cygwin|djgpp|mingw|mswin|wince|darwin/i) &&
184
+ (encoding = filepath.encoding) != Encoding::ASCII_8BIT && encoding != Encoding::UTF_8
185
+ filepath = filepath.encode(Encoding::UTF_8)
186
+ end
187
+
179
188
  FFI::MemoryPointer.new(SIZEOF) do |pm_string|
180
- if LibRubyParser.pm_string_mapped_init(pm_string, filepath)
189
+ case (result = LibRubyParser.pm_string_mapped_init(pm_string, filepath))
190
+ when :PM_STRING_INIT_SUCCESS
181
191
  pointer = LibRubyParser.pm_string_source(pm_string)
182
192
  length = LibRubyParser.pm_string_length(pm_string)
183
193
  return yield new(pointer, length, false)
184
- else
194
+ when :PM_STRING_INIT_ERROR_GENERIC
185
195
  raise SystemCallError.new(filepath, FFI.errno)
196
+ when :PM_STRING_INIT_ERROR_DIRECTORY
197
+ raise Errno::EISDIR.new(filepath)
198
+ else
199
+ raise "Unknown error initializing pm_string_t: #{result.inspect}"
186
200
  end
187
201
  ensure
188
202
  LibRubyParser.pm_string_free(pm_string)
@@ -397,6 +411,20 @@ module Prism
397
411
  end
398
412
  end
399
413
 
414
+ # Return the value that should be dumped for the version option.
415
+ def dump_options_version(version)
416
+ case version
417
+ when nil, "latest"
418
+ 0
419
+ when /\A3\.3(\.\d+)?\z/
420
+ 1
421
+ when /\A3\.4(\.\d+)?\z/
422
+ 0
423
+ else
424
+ raise ArgumentError, "invalid version: #{version}"
425
+ end
426
+ end
427
+
400
428
  # Convert the given options into a serialized options string.
401
429
  def dump_options(options)
402
430
  template = +""
@@ -429,11 +457,17 @@ module Prism
429
457
  values << dump_options_command_line(options)
430
458
 
431
459
  template << "C"
432
- values << { nil => 0, "3.3.0" => 1, "3.3.1" => 1, "3.4.0" => 0, "latest" => 0 }.fetch(options[:version])
460
+ values << dump_options_version(options[:version])
433
461
 
434
462
  template << "C"
435
463
  values << (options[:encoding] == false ? 1 : 0)
436
464
 
465
+ template << "C"
466
+ values << (options.fetch(:main_script, false) ? 1 : 0)
467
+
468
+ template << "C"
469
+ values << (options.fetch(:partial_script, false) ? 1 : 0)
470
+
437
471
  template << "L"
438
472
  if (scopes = options[:scopes])
439
473
  values << scopes.length
@@ -124,7 +124,7 @@ module Prism
124
124
  # Inspect a ArgumentsNode node.
125
125
  def visit_arguments_node(node)
126
126
  commands << [inspect_node("ArgumentsNode", node), indent]
127
- flags = [("newline" if node.newline?), ("static_literal" if node.static_literal?), ("contains_keywords" if node.contains_keywords?), ("contains_keyword_splat" if node.contains_keyword_splat?), ("contains_splat" if node.contains_splat?)].compact
127
+ flags = [("newline" if node.newline?), ("static_literal" if node.static_literal?), ("contains_forwarding" if node.contains_forwarding?), ("contains_keywords" if node.contains_keywords?), ("contains_keyword_splat" if node.contains_keyword_splat?), ("contains_splat" if node.contains_splat?), ("contains_multiple_splats" if node.contains_multiple_splats?)].compact
128
128
  commands << ["├── flags: #{flags.empty? ? "∅" : flags.join(", ")}\n", indent]
129
129
  commands << ["└── arguments: (length: #{(arguments = node.arguments).length})\n", indent]
130
130
  if arguments.any?
@@ -481,7 +481,7 @@ module Prism
481
481
  embexpr_balance -= 1
482
482
  when :on_tstring_content
483
483
  if embexpr_balance == 0
484
- while index < max_index && tokens[index].event == :on_tstring_content
484
+ while index < max_index && tokens[index].event == :on_tstring_content && !token.value.match?(/\\\r?\n\z/)
485
485
  token.value << tokens[index].value
486
486
  index += 1
487
487
  end