prism 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -25,6 +25,15 @@
25
25
  #define __STDC_FORMAT_MACROS
26
26
  #include <inttypes.h>
27
27
 
28
+ /**
29
+ * When we are parsing using recursive descent, we want to protect against
30
+ * malicious payloads that could attempt to crash our parser. We do this by
31
+ * specifying a maximum depth to which we are allowed to recurse.
32
+ */
33
+ #ifndef PRISM_DEPTH_MAXIMUM
34
+ #define PRISM_DEPTH_MAXIMUM 1000
35
+ #endif
36
+
28
37
  /**
29
38
  * By default, we compile with -fvisibility=hidden. When this is enabled, we
30
39
  * need to mark certain functions as being publically-visible. This macro does
@@ -212,4 +221,22 @@
212
221
  #define PRISM_ENCODING_EXCLUDE_FULL
213
222
  #endif
214
223
 
224
+ /**
225
+ * Support PRISM_LIKELY and PRISM_UNLIKELY to help the compiler optimize its
226
+ * branch predication.
227
+ */
228
+ #if defined(__GNUC__) || defined(__clang__)
229
+ /** The compiler should predicate that this branch will be taken. */
230
+ #define PRISM_LIKELY(x) __builtin_expect(!!(x), 1)
231
+
232
+ /** The compiler should predicate that this branch will not be taken. */
233
+ #define PRISM_UNLIKELY(x) __builtin_expect(!!(x), 0)
234
+ #else
235
+ /** Void because this platform does not support branch prediction hints. */
236
+ #define PRISM_LIKELY(x) (x)
237
+
238
+ /** Void because this platform does not support branch prediction hints. */
239
+ #define PRISM_UNLIKELY(x) (x)
240
+ #endif
241
+
215
242
  #endif
@@ -44,7 +44,6 @@ typedef enum {
44
44
  PM_ERR_ARGUMENT_FORMAL_GLOBAL,
45
45
  PM_ERR_ARGUMENT_FORMAL_IVAR,
46
46
  PM_ERR_ARGUMENT_FORWARDING_UNBOUND,
47
- PM_ERR_ARGUMENT_IN,
48
47
  PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND,
49
48
  PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
50
49
  PM_ERR_ARGUMENT_NO_FORWARDING_STAR,
@@ -128,6 +127,7 @@ typedef enum {
128
127
  PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT,
129
128
  PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH,
130
129
  PM_ERR_EXPECT_EXPRESSION_AFTER_STAR,
130
+ PM_ERR_EXPECT_FOR_DELIMITER,
131
131
  PM_ERR_EXPECT_IDENT_REQ_PARAMETER,
132
132
  PM_ERR_EXPECT_IN_DELIMITER,
133
133
  PM_ERR_EXPECT_LPAREN_REQ_PARAMETER,
@@ -136,6 +136,7 @@ typedef enum {
136
136
  PM_ERR_EXPECT_RPAREN,
137
137
  PM_ERR_EXPECT_RPAREN_AFTER_MULTI,
138
138
  PM_ERR_EXPECT_RPAREN_REQ_PARAMETER,
139
+ PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER,
139
140
  PM_ERR_EXPECT_STRING_CONTENT,
140
141
  PM_ERR_EXPECT_WHEN_DELIMITER,
141
142
  PM_ERR_EXPRESSION_BARE_HASH,
@@ -214,7 +215,9 @@ typedef enum {
214
215
  PM_ERR_MODULE_TERM,
215
216
  PM_ERR_MULTI_ASSIGN_MULTI_SPLATS,
216
217
  PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST,
218
+ PM_ERR_NESTING_TOO_DEEP,
217
219
  PM_ERR_NO_LOCAL_VARIABLE,
220
+ PM_ERR_NON_ASSOCIATIVE_OPERATOR,
218
221
  PM_ERR_NOT_EXPRESSION,
219
222
  PM_ERR_NUMBER_LITERAL_UNDERSCORE,
220
223
  PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK,
@@ -300,6 +303,7 @@ typedef enum {
300
303
  PM_ERR_UNEXPECTED_BLOCK_ARGUMENT,
301
304
  PM_ERR_UNEXPECTED_INDEX_BLOCK,
302
305
  PM_ERR_UNEXPECTED_INDEX_KEYWORDS,
306
+ PM_ERR_UNEXPECTED_LABEL,
303
307
  PM_ERR_UNEXPECTED_MULTI_WRITE,
304
308
  PM_ERR_UNEXPECTED_RANGE_OPERATOR,
305
309
  PM_ERR_UNEXPECTED_SAFE_NAVIGATION,
@@ -7,6 +7,7 @@
7
7
  #define PRISM_OPTIONS_H
8
8
 
9
9
  #include "prism/defines.h"
10
+ #include "prism/util/pm_char.h"
10
11
  #include "prism/util/pm_string.h"
11
12
 
12
13
  #include <stdbool.h>
@@ -139,6 +140,23 @@ typedef struct pm_options {
139
140
  * but ignore any encoding magic comments at the top of the file.
140
141
  */
141
142
  bool encoding_locked;
143
+
144
+ /**
145
+ * When the file being parsed is the main script, the shebang will be
146
+ * considered for command-line flags (or for implicit -x). The caller needs
147
+ * to pass this information to the parser so that it can behave correctly.
148
+ */
149
+ bool main_script;
150
+
151
+ /**
152
+ * When the file being parsed is considered a "partial" script, jumps will
153
+ * not be marked as errors if they are not contained within loops/blocks.
154
+ * This is used in the case that you're parsing a script that you know will
155
+ * be embedded inside another script later, but you do not have that context
156
+ * yet. For example, when parsing an ERB template that will be evaluated
157
+ * inside another script.
158
+ */
159
+ bool partial_script;
142
160
  } pm_options_t;
143
161
 
144
162
  /**
@@ -248,6 +266,22 @@ PRISM_EXPORTED_FUNCTION void pm_options_command_line_set(pm_options_t *options,
248
266
  */
249
267
  PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const char *version, size_t length);
250
268
 
269
+ /**
270
+ * Set the main script option on the given options struct.
271
+ *
272
+ * @param options The options struct to set the main script value on.
273
+ * @param main_script The main script value to set.
274
+ */
275
+ PRISM_EXPORTED_FUNCTION void pm_options_main_script_set(pm_options_t *options, bool main_script);
276
+
277
+ /**
278
+ * Set the partial script option on the given options struct.
279
+ *
280
+ * @param options The options struct to set the partial script value on.
281
+ * @param partial_script The partial script value to set.
282
+ */
283
+ PRISM_EXPORTED_FUNCTION void pm_options_partial_script_set(pm_options_t *options, bool partial_script);
284
+
251
285
  /**
252
286
  * Allocate and zero out the scopes array on the given options struct.
253
287
  *
@@ -315,6 +349,9 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
315
349
  * | `1` | -l command line option |
316
350
  * | `1` | -a command line option |
317
351
  * | `1` | the version |
352
+ * | `1` | encoding locked |
353
+ * | `1` | main script |
354
+ * | `1` | partial script |
318
355
  * | `4` | the number of scopes |
319
356
  * | ... | the scopes |
320
357
  *
@@ -347,8 +384,8 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
347
384
  * * The encoding can have a length of 0, in which case we'll use the default
348
385
  * encoding (UTF-8). If it's not 0, it should correspond to a name of an
349
386
  * encoding that can be passed to `Encoding.find` in Ruby.
350
- * * The frozen string literal and suppress warnings fields are booleans, so
351
- * their values should be either 0 or 1.
387
+ * * The frozen string literal, encoding locked, main script, and partial script
388
+ * fields are booleans, so their values should be either 0 or 1.
352
389
  * * The number of scopes can be 0.
353
390
  *
354
391
  * @param options The options struct to deserialize into.
@@ -861,6 +861,13 @@ struct pm_parser {
861
861
  */
862
862
  bool parsing_eval;
863
863
 
864
+ /**
865
+ * Whether or not we are parsing a "partial" script, which is a script that
866
+ * will be evaluated in the context of another script, so we should not
867
+ * check jumps (next/break/etc.) for validity.
868
+ */
869
+ bool partial_script;
870
+
864
871
  /** Whether or not we're at the beginning of a command. */
865
872
  bool command_start;
866
873
 
@@ -22,6 +22,9 @@
22
22
  #include <fcntl.h>
23
23
  #include <sys/mman.h>
24
24
  #include <sys/stat.h>
25
+ #elif defined(PRISM_HAS_FILESYSTEM)
26
+ #include <fcntl.h>
27
+ #include <sys/stat.h>
25
28
  #endif
26
29
 
27
30
  /**
@@ -93,6 +96,26 @@ void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length);
93
96
  */
94
97
  void pm_string_constant_init(pm_string_t *string, const char *source, size_t length);
95
98
 
99
+ /**
100
+ * Represents the result of calling pm_string_mapped_init or
101
+ * pm_string_file_init. We need this additional information because there is
102
+ * not a platform-agnostic way to indicate that the file that was attempted to
103
+ * be opened was a directory.
104
+ */
105
+ typedef enum {
106
+ /** Indicates that the string was successfully initialized. */
107
+ PM_STRING_INIT_SUCCESS = 0,
108
+ /**
109
+ * Indicates a generic error from a string_*_init function, where the type
110
+ * of error should be read from `errno` or `GetLastError()`.
111
+ */
112
+ PM_STRING_INIT_ERROR_GENERIC = 1,
113
+ /**
114
+ * Indicates that the file that was attempted to be opened was a directory.
115
+ */
116
+ PM_STRING_INIT_ERROR_DIRECTORY = 2
117
+ } pm_string_init_result_t;
118
+
96
119
  /**
97
120
  * Read the file indicated by the filepath parameter into source and load its
98
121
  * contents and size into the given `pm_string_t`. The given `pm_string_t`
@@ -106,9 +129,9 @@ void pm_string_constant_init(pm_string_t *string, const char *source, size_t len
106
129
  *
107
130
  * @param string The string to initialize.
108
131
  * @param filepath The filepath to read.
109
- * @return Whether or not the file was successfully mapped.
132
+ * @return The success of the read, indicated by the value of the enum.
110
133
  */
111
- PRISM_EXPORTED_FUNCTION bool pm_string_mapped_init(pm_string_t *string, const char *filepath);
134
+ PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_mapped_init(pm_string_t *string, const char *filepath);
112
135
 
113
136
  /**
114
137
  * Read the file indicated by the filepath parameter into source and load its
@@ -117,9 +140,9 @@ PRISM_EXPORTED_FUNCTION bool pm_string_mapped_init(pm_string_t *string, const ch
117
140
  *
118
141
  * @param string The string to initialize.
119
142
  * @param filepath The filepath to read.
120
- * @return Whether or not the file was successfully read.
143
+ * @return The success of the read, indicated by the value of the enum.
121
144
  */
122
- PRISM_EXPORTED_FUNCTION bool pm_string_file_init(pm_string_t *string, const char *filepath);
145
+ PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_file_init(pm_string_t *string, const char *filepath);
123
146
 
124
147
  /**
125
148
  * Ensure the string is owned. If it is not, then reinitialize it as owned and
@@ -14,7 +14,7 @@
14
14
  /**
15
15
  * The minor version of the Prism library as an int.
16
16
  */
17
- #define PRISM_VERSION_MINOR 0
17
+ #define PRISM_VERSION_MINOR 1
18
18
 
19
19
  /**
20
20
  * The patch version of the Prism library as an int.
@@ -24,6 +24,6 @@
24
24
  /**
25
25
  * The version of the Prism library as a constant string.
26
26
  */
27
- #define PRISM_VERSION "1.0.0"
27
+ #define PRISM_VERSION "1.1.0"
28
28
 
29
29
  #endif
@@ -4592,9 +4592,11 @@ module Prism
4592
4592
  # comma-separated list.
4593
4593
  def arguments_node_flags_inspect(node)
4594
4594
  flags = [] #: Array[String]
4595
+ flags << "contains_forwarding" if node.contains_forwarding?
4595
4596
  flags << "contains_keywords" if node.contains_keywords?
4596
4597
  flags << "contains_keyword_splat" if node.contains_keyword_splat?
4597
4598
  flags << "contains_splat" if node.contains_splat?
4599
+ flags << "contains_multiple_splats" if node.contains_multiple_splats?
4598
4600
  flags.join(", ")
4599
4601
  end
4600
4602
 
data/lib/prism/dsl.rb CHANGED
@@ -184,7 +184,7 @@ module Prism
184
184
  end
185
185
 
186
186
  # Create a new CapturePatternNode node.
187
- def capture_pattern_node(source: default_source, node_id: 0, location: default_location, flags: 0, value: default_node(source, location), target: default_node(source, location), operator_loc: location)
187
+ def capture_pattern_node(source: default_source, node_id: 0, location: default_location, flags: 0, value: default_node(source, location), target: local_variable_target_node(source: source), operator_loc: location)
188
188
  CapturePatternNode.new(source, node_id, location, flags, value, target, operator_loc)
189
189
  end
190
190
 
@@ -199,7 +199,7 @@ module Prism
199
199
  end
200
200
 
201
201
  # Create a new ClassNode node.
202
- def class_node(source: default_source, node_id: 0, location: default_location, flags: 0, locals: [], class_keyword_loc: location, constant_path: default_node(source, location), inheritance_operator_loc: nil, superclass: nil, body: nil, end_keyword_loc: location, name: :"")
202
+ def class_node(source: default_source, node_id: 0, location: default_location, flags: 0, locals: [], class_keyword_loc: location, constant_path: constant_read_node(source: source), inheritance_operator_loc: nil, superclass: nil, body: nil, end_keyword_loc: location, name: :"")
203
203
  ClassNode.new(source, node_id, location, flags, locals, class_keyword_loc, constant_path, inheritance_operator_loc, superclass, body, end_keyword_loc, name)
204
204
  end
205
205
 
@@ -314,7 +314,7 @@ module Prism
314
314
  end
315
315
 
316
316
  # Create a new EmbeddedVariableNode node.
317
- def embedded_variable_node(source: default_source, node_id: 0, location: default_location, flags: 0, operator_loc: location, variable: default_node(source, location))
317
+ def embedded_variable_node(source: default_source, node_id: 0, location: default_location, flags: 0, operator_loc: location, variable: instance_variable_read_node(source: source))
318
318
  EmbeddedVariableNode.new(source, node_id, location, flags, operator_loc, variable)
319
319
  end
320
320
 
@@ -329,7 +329,7 @@ module Prism
329
329
  end
330
330
 
331
331
  # Create a new FindPatternNode node.
332
- def find_pattern_node(source: default_source, node_id: 0, location: default_location, flags: 0, constant: nil, left: default_node(source, location), requireds: [], right: default_node(source, location), opening_loc: nil, closing_loc: nil)
332
+ def find_pattern_node(source: default_source, node_id: 0, location: default_location, flags: 0, constant: nil, left: splat_node(source: source), requireds: [], right: splat_node(source: source), opening_loc: nil, closing_loc: nil)
333
333
  FindPatternNode.new(source, node_id, location, flags, constant, left, requireds, right, opening_loc, closing_loc)
334
334
  end
335
335
 
@@ -344,7 +344,7 @@ module Prism
344
344
  end
345
345
 
346
346
  # Create a new ForNode node.
347
- def for_node(source: default_source, node_id: 0, location: default_location, flags: 0, index: default_node(source, location), collection: default_node(source, location), statements: nil, for_keyword_loc: location, in_keyword_loc: location, do_keyword_loc: nil, end_keyword_loc: location)
347
+ def for_node(source: default_source, node_id: 0, location: default_location, flags: 0, index: local_variable_target_node(source: source), collection: default_node(source, location), statements: nil, for_keyword_loc: location, in_keyword_loc: location, do_keyword_loc: nil, end_keyword_loc: location)
348
348
  ForNode.new(source, node_id, location, flags, index, collection, statements, for_keyword_loc, in_keyword_loc, do_keyword_loc, end_keyword_loc)
349
349
  end
350
350
 
@@ -414,7 +414,7 @@ module Prism
414
414
  end
415
415
 
416
416
  # Create a new ImplicitNode node.
417
- def implicit_node(source: default_source, node_id: 0, location: default_location, flags: 0, value: default_node(source, location))
417
+ def implicit_node(source: default_source, node_id: 0, location: default_location, flags: 0, value: local_variable_read_node(source: source))
418
418
  ImplicitNode.new(source, node_id, location, flags, value)
419
419
  end
420
420
 
@@ -589,7 +589,7 @@ module Prism
589
589
  end
590
590
 
591
591
  # Create a new ModuleNode node.
592
- def module_node(source: default_source, node_id: 0, location: default_location, flags: 0, locals: [], module_keyword_loc: location, constant_path: default_node(source, location), body: nil, end_keyword_loc: location, name: :"")
592
+ def module_node(source: default_source, node_id: 0, location: default_location, flags: 0, locals: [], module_keyword_loc: location, constant_path: constant_read_node(source: source), body: nil, end_keyword_loc: location, name: :"")
593
593
  ModuleNode.new(source, node_id, location, flags, locals, module_keyword_loc, constant_path, body, end_keyword_loc, name)
594
594
  end
595
595
 
@@ -659,7 +659,7 @@ module Prism
659
659
  end
660
660
 
661
661
  # Create a new PinnedVariableNode node.
662
- def pinned_variable_node(source: default_source, node_id: 0, location: default_location, flags: 0, variable: default_node(source, location), operator_loc: location)
662
+ def pinned_variable_node(source: default_source, node_id: 0, location: default_location, flags: 0, variable: local_variable_read_node(source: source), operator_loc: location)
663
663
  PinnedVariableNode.new(source, node_id, location, flags, variable, operator_loc)
664
664
  end
665
665
 
@@ -831,9 +831,11 @@ module Prism
831
831
  # Retrieve the value of one of the ArgumentsNodeFlags flags.
832
832
  def arguments_node_flag(name)
833
833
  case name
834
+ when :contains_forwarding then ArgumentsNodeFlags::CONTAINS_FORWARDING
834
835
  when :contains_keywords then ArgumentsNodeFlags::CONTAINS_KEYWORDS
835
836
  when :contains_keyword_splat then ArgumentsNodeFlags::CONTAINS_KEYWORD_SPLAT
836
837
  when :contains_splat then ArgumentsNodeFlags::CONTAINS_SPLAT
838
+ when :contains_multiple_splats then ArgumentsNodeFlags::CONTAINS_MULTIPLE_SPLATS
837
839
  else Kernel.raise ArgumentError, "invalid ArgumentsNodeFlags flag: #{name.inspect}"
838
840
  end
839
841
  end
data/lib/prism/ffi.rb CHANGED
@@ -72,6 +72,7 @@ module Prism
72
72
  end
73
73
 
74
74
  callback :pm_parse_stream_fgets_t, [:pointer, :int, :pointer], :pointer
75
+ enum :pm_string_init_result_t, %i[PM_STRING_INIT_SUCCESS PM_STRING_INIT_ERROR_GENERIC PM_STRING_INIT_ERROR_DIRECTORY]
75
76
 
76
77
  load_exported_functions_from(
77
78
  "prism.h",
@@ -176,13 +177,26 @@ module Prism
176
177
  def self.with_file(filepath)
177
178
  raise TypeError unless filepath.is_a?(String)
178
179
 
180
+ # On Windows and Mac, it's expected that filepaths will be encoded in
181
+ # UTF-8. If they are not, we need to convert them to UTF-8 before
182
+ # passing them into pm_string_mapped_init.
183
+ if RbConfig::CONFIG["host_os"].match?(/bccwin|cygwin|djgpp|mingw|mswin|wince|darwin/i) &&
184
+ (encoding = filepath.encoding) != Encoding::ASCII_8BIT && encoding != Encoding::UTF_8
185
+ filepath = filepath.encode(Encoding::UTF_8)
186
+ end
187
+
179
188
  FFI::MemoryPointer.new(SIZEOF) do |pm_string|
180
- if LibRubyParser.pm_string_mapped_init(pm_string, filepath)
189
+ case (result = LibRubyParser.pm_string_mapped_init(pm_string, filepath))
190
+ when :PM_STRING_INIT_SUCCESS
181
191
  pointer = LibRubyParser.pm_string_source(pm_string)
182
192
  length = LibRubyParser.pm_string_length(pm_string)
183
193
  return yield new(pointer, length, false)
184
- else
194
+ when :PM_STRING_INIT_ERROR_GENERIC
185
195
  raise SystemCallError.new(filepath, FFI.errno)
196
+ when :PM_STRING_INIT_ERROR_DIRECTORY
197
+ raise Errno::EISDIR.new(filepath)
198
+ else
199
+ raise "Unknown error initializing pm_string_t: #{result.inspect}"
186
200
  end
187
201
  ensure
188
202
  LibRubyParser.pm_string_free(pm_string)
@@ -397,6 +411,20 @@ module Prism
397
411
  end
398
412
  end
399
413
 
414
+ # Return the value that should be dumped for the version option.
415
+ def dump_options_version(version)
416
+ case version
417
+ when nil, "latest"
418
+ 0
419
+ when /\A3\.3(\.\d+)?\z/
420
+ 1
421
+ when /\A3\.4(\.\d+)?\z/
422
+ 0
423
+ else
424
+ raise ArgumentError, "invalid version: #{version}"
425
+ end
426
+ end
427
+
400
428
  # Convert the given options into a serialized options string.
401
429
  def dump_options(options)
402
430
  template = +""
@@ -429,11 +457,17 @@ module Prism
429
457
  values << dump_options_command_line(options)
430
458
 
431
459
  template << "C"
432
- values << { nil => 0, "3.3.0" => 1, "3.3.1" => 1, "3.4.0" => 0, "latest" => 0 }.fetch(options[:version])
460
+ values << dump_options_version(options[:version])
433
461
 
434
462
  template << "C"
435
463
  values << (options[:encoding] == false ? 1 : 0)
436
464
 
465
+ template << "C"
466
+ values << (options.fetch(:main_script, false) ? 1 : 0)
467
+
468
+ template << "C"
469
+ values << (options.fetch(:partial_script, false) ? 1 : 0)
470
+
437
471
  template << "L"
438
472
  if (scopes = options[:scopes])
439
473
  values << scopes.length
@@ -124,7 +124,7 @@ module Prism
124
124
  # Inspect a ArgumentsNode node.
125
125
  def visit_arguments_node(node)
126
126
  commands << [inspect_node("ArgumentsNode", node), indent]
127
- flags = [("newline" if node.newline?), ("static_literal" if node.static_literal?), ("contains_keywords" if node.contains_keywords?), ("contains_keyword_splat" if node.contains_keyword_splat?), ("contains_splat" if node.contains_splat?)].compact
127
+ flags = [("newline" if node.newline?), ("static_literal" if node.static_literal?), ("contains_forwarding" if node.contains_forwarding?), ("contains_keywords" if node.contains_keywords?), ("contains_keyword_splat" if node.contains_keyword_splat?), ("contains_splat" if node.contains_splat?), ("contains_multiple_splats" if node.contains_multiple_splats?)].compact
128
128
  commands << ["├── flags: #{flags.empty? ? "∅" : flags.join(", ")}\n", indent]
129
129
  commands << ["└── arguments: (length: #{(arguments = node.arguments).length})\n", indent]
130
130
  if arguments.any?
@@ -481,7 +481,7 @@ module Prism
481
481
  embexpr_balance -= 1
482
482
  when :on_tstring_content
483
483
  if embexpr_balance == 0
484
- while index < max_index && tokens[index].event == :on_tstring_content
484
+ while index < max_index && tokens[index].event == :on_tstring_content && !token.value.match?(/\\\r?\n\z/)
485
485
  token.value << tokens[index].value
486
486
  index += 1
487
487
  end