prism 0.17.1 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +60 -1
  3. data/Makefile +5 -5
  4. data/README.md +4 -3
  5. data/config.yml +214 -68
  6. data/docs/build_system.md +6 -6
  7. data/docs/building.md +10 -3
  8. data/docs/configuration.md +11 -9
  9. data/docs/encoding.md +92 -88
  10. data/docs/heredocs.md +1 -1
  11. data/docs/javascript.md +29 -1
  12. data/docs/local_variable_depth.md +229 -0
  13. data/docs/ruby_api.md +16 -0
  14. data/docs/serialization.md +18 -13
  15. data/ext/prism/api_node.c +411 -240
  16. data/ext/prism/extconf.rb +97 -127
  17. data/ext/prism/extension.c +97 -33
  18. data/ext/prism/extension.h +1 -1
  19. data/include/prism/ast.h +377 -159
  20. data/include/prism/defines.h +17 -0
  21. data/include/prism/diagnostic.h +38 -6
  22. data/include/prism/{enc/pm_encoding.h → encoding.h} +126 -64
  23. data/include/prism/options.h +2 -2
  24. data/include/prism/parser.h +62 -36
  25. data/include/prism/regexp.h +2 -2
  26. data/include/prism/util/pm_buffer.h +9 -1
  27. data/include/prism/util/pm_memchr.h +2 -2
  28. data/include/prism/util/pm_strpbrk.h +3 -3
  29. data/include/prism/version.h +3 -3
  30. data/include/prism.h +13 -15
  31. data/lib/prism/compiler.rb +15 -3
  32. data/lib/prism/debug.rb +13 -4
  33. data/lib/prism/desugar_compiler.rb +4 -3
  34. data/lib/prism/dispatcher.rb +70 -14
  35. data/lib/prism/dot_visitor.rb +4612 -0
  36. data/lib/prism/dsl.rb +77 -57
  37. data/lib/prism/ffi.rb +19 -6
  38. data/lib/prism/lex_compat.rb +19 -9
  39. data/lib/prism/mutation_compiler.rb +26 -6
  40. data/lib/prism/node.rb +1314 -522
  41. data/lib/prism/node_ext.rb +102 -19
  42. data/lib/prism/parse_result.rb +58 -27
  43. data/lib/prism/ripper_compat.rb +49 -34
  44. data/lib/prism/serialize.rb +251 -227
  45. data/lib/prism/visitor.rb +15 -3
  46. data/lib/prism.rb +21 -4
  47. data/prism.gemspec +7 -9
  48. data/rbi/prism.rbi +688 -284
  49. data/rbi/prism_static.rbi +3 -0
  50. data/sig/prism.rbs +426 -156
  51. data/sig/prism_static.rbs +1 -0
  52. data/src/diagnostic.c +280 -216
  53. data/src/encoding.c +5137 -0
  54. data/src/node.c +99 -21
  55. data/src/options.c +21 -2
  56. data/src/prettyprint.c +1743 -1241
  57. data/src/prism.c +1774 -831
  58. data/src/regexp.c +15 -15
  59. data/src/serialize.c +261 -164
  60. data/src/util/pm_buffer.c +10 -1
  61. data/src/util/pm_memchr.c +1 -1
  62. data/src/util/pm_strpbrk.c +4 -4
  63. metadata +8 -10
  64. data/src/enc/pm_big5.c +0 -53
  65. data/src/enc/pm_euc_jp.c +0 -59
  66. data/src/enc/pm_gbk.c +0 -62
  67. data/src/enc/pm_shift_jis.c +0 -57
  68. data/src/enc/pm_tables.c +0 -743
  69. data/src/enc/pm_unicode.c +0 -2369
  70. data/src/enc/pm_windows_31j.c +0 -57
@@ -14,8 +14,49 @@ module Prism
14
14
  end
15
15
  end
16
16
 
17
+ class InterpolatedMatchLastLineNode < Node
18
+ include RegularExpressionOptions
19
+ end
20
+
21
+ class InterpolatedRegularExpressionNode < Node
22
+ include RegularExpressionOptions
23
+ end
24
+
25
+ class MatchLastLineNode < Node
26
+ include RegularExpressionOptions
27
+ end
28
+
29
+ class RegularExpressionNode < Node
30
+ include RegularExpressionOptions
31
+ end
32
+
17
33
  private_constant :RegularExpressionOptions
18
34
 
35
+ module HeredocQuery # :nodoc:
36
+ # Returns true if this node was represented as a heredoc in the source code.
37
+ def heredoc?
38
+ opening&.start_with?("<<")
39
+ end
40
+ end
41
+
42
+ class InterpolatedStringNode < Node
43
+ include HeredocQuery
44
+ end
45
+
46
+ class InterpolatedXStringNode < Node
47
+ include HeredocQuery
48
+ end
49
+
50
+ class StringNode < Node
51
+ include HeredocQuery
52
+ end
53
+
54
+ class XStringNode < Node
55
+ include HeredocQuery
56
+ end
57
+
58
+ private_constant :HeredocQuery
59
+
19
60
  class FloatNode < Node
20
61
  # Returns the value of the node as a Ruby Float.
21
62
  def value
@@ -37,18 +78,6 @@ module Prism
37
78
  end
38
79
  end
39
80
 
40
- class InterpolatedMatchLastLineNode < Node
41
- include RegularExpressionOptions
42
- end
43
-
44
- class InterpolatedRegularExpressionNode < Node
45
- include RegularExpressionOptions
46
- end
47
-
48
- class MatchLastLineNode < Node
49
- include RegularExpressionOptions
50
- end
51
-
52
81
  class RationalNode < Node
53
82
  # Returns the value of the node as a Ruby Rational.
54
83
  def value
@@ -56,12 +85,9 @@ module Prism
56
85
  end
57
86
  end
58
87
 
59
- class RegularExpressionNode < Node
60
- include RegularExpressionOptions
61
- end
62
-
63
88
  class ConstantReadNode < Node
64
- # Returns the list of parts for the full name of this constant. For example: [:Foo]
89
+ # Returns the list of parts for the full name of this constant.
90
+ # For example: [:Foo]
65
91
  def full_name_parts
66
92
  [name]
67
93
  end
@@ -73,7 +99,16 @@ module Prism
73
99
  end
74
100
 
75
101
  class ConstantPathNode < Node
76
- # Returns the list of parts for the full name of this constant path. For example: [:Foo, :Bar]
102
+ # An error class raised when dynamic parts are found while computing a
103
+ # constant path's full name. For example:
104
+ # Foo::Bar::Baz -> does not raise because all parts of the constant path are
105
+ # simple constants
106
+ # var::Bar::Baz -> raises because the first part of the constant path is a
107
+ # local variable
108
+ class DynamicPartsInConstantPathError < StandardError; end
109
+
110
+ # Returns the list of parts for the full name of this constant path.
111
+ # For example: [:Foo, :Bar]
77
112
  def full_name_parts
78
113
  parts = [child.name]
79
114
  current = parent
@@ -83,6 +118,10 @@ module Prism
83
118
  current = current.parent
84
119
  end
85
120
 
121
+ unless current.is_a?(ConstantReadNode)
122
+ raise DynamicPartsInConstantPathError, "Constant path contains dynamic parts. Cannot compute full name"
123
+ end
124
+
86
125
  parts.unshift(current&.name || :"")
87
126
  end
88
127
 
@@ -93,7 +132,8 @@ module Prism
93
132
  end
94
133
 
95
134
  class ConstantPathTargetNode < Node
96
- # Returns the list of parts for the full name of this constant path. For example: [:Foo, :Bar]
135
+ # Returns the list of parts for the full name of this constant path.
136
+ # For example: [:Foo, :Bar]
97
137
  def full_name_parts
98
138
  (parent&.full_name_parts || [:""]).push(child.name)
99
139
  end
@@ -103,4 +143,47 @@ module Prism
103
143
  full_name_parts.join("::")
104
144
  end
105
145
  end
146
+
147
+ class ParametersNode < Node
148
+ # Mirrors the Method#parameters method.
149
+ def signature
150
+ names = []
151
+
152
+ requireds.each do |param|
153
+ names << (param.is_a?(MultiTargetNode) ? [:req] : [:req, param.name])
154
+ end
155
+
156
+ optionals.each { |param| names << [:opt, param.name] }
157
+ names << [:rest, rest.name || :*] if rest
158
+
159
+ posts.each do |param|
160
+ names << (param.is_a?(MultiTargetNode) ? [:req] : [:req, param.name])
161
+ end
162
+
163
+ # Regardless of the order in which the keywords were defined, the required
164
+ # keywords always come first followed by the optional keywords.
165
+ keyopt = []
166
+ keywords.each do |param|
167
+ if param.is_a?(OptionalKeywordParameterNode)
168
+ keyopt << param
169
+ else
170
+ names << [:keyreq, param.name]
171
+ end
172
+ end
173
+
174
+ keyopt.each { |param| names << [:key, param.name] }
175
+
176
+ case keyword_rest
177
+ when ForwardingParameterNode
178
+ names.concat([[:rest, :*], [:keyrest, :**], [:block, :&]])
179
+ when KeywordRestParameterNode
180
+ names << [:keyrest, keyword_rest.name || :**]
181
+ when NoKeywordsParameterNode
182
+ names << [:nokey]
183
+ end
184
+
185
+ names << [:block, block.name || :&] if block
186
+ names
187
+ end
188
+ end
106
189
  end
@@ -25,40 +25,50 @@ module Prism
25
25
 
26
26
  # Perform a byteslice on the source code using the given byte offset and
27
27
  # byte length.
28
- def slice(offset, length)
29
- source.byteslice(offset, length)
28
+ def slice(byte_offset, length)
29
+ source.byteslice(byte_offset, length)
30
30
  end
31
31
 
32
32
  # Binary search through the offsets to find the line number for the given
33
33
  # byte offset.
34
- def line(value)
35
- start_line + find_line(value)
34
+ def line(byte_offset)
35
+ start_line + find_line(byte_offset)
36
36
  end
37
37
 
38
38
  # Return the byte offset of the start of the line corresponding to the given
39
39
  # byte offset.
40
- def line_offset(value)
41
- offsets[find_line(value)]
40
+ def line_start(byte_offset)
41
+ offsets[find_line(byte_offset)]
42
42
  end
43
43
 
44
44
  # Return the column number for the given byte offset.
45
- def column(value)
46
- value - offsets[find_line(value)]
45
+ def column(byte_offset)
46
+ byte_offset - line_start(byte_offset)
47
+ end
48
+
49
+ # Return the character offset for the given byte offset.
50
+ def character_offset(byte_offset)
51
+ source.byteslice(0, byte_offset).length
52
+ end
53
+
54
+ # Return the column number in characters for the given byte offset.
55
+ def character_column(byte_offset)
56
+ character_offset(byte_offset) - character_offset(line_start(byte_offset))
47
57
  end
48
58
 
49
59
  private
50
60
 
51
61
  # Binary search through the offsets to find the line number for the given
52
62
  # byte offset.
53
- def find_line(value)
63
+ def find_line(byte_offset)
54
64
  left = 0
55
65
  right = offsets.length - 1
56
66
 
57
67
  while left <= right
58
68
  mid = left + (right - left) / 2
59
- return mid if offsets[mid] == value
69
+ return mid if offsets[mid] == byte_offset
60
70
 
61
- if offsets[mid] < value
71
+ if offsets[mid] < byte_offset
62
72
  left = mid + 1
63
73
  else
64
74
  right = mid - 1
@@ -121,11 +131,23 @@ module Prism
121
131
  source.slice(start_offset, length)
122
132
  end
123
133
 
134
+ # The character offset from the beginning of the source where this location
135
+ # starts.
136
+ def start_character_offset
137
+ source.character_offset(start_offset)
138
+ end
139
+
124
140
  # The byte offset from the beginning of the source where this location ends.
125
141
  def end_offset
126
142
  start_offset + length
127
143
  end
128
144
 
145
+ # The character offset from the beginning of the source where this location
146
+ # ends.
147
+ def end_character_offset
148
+ source.character_offset(end_offset)
149
+ end
150
+
129
151
  # The line number where this location starts.
130
152
  def start_line
131
153
  source.line(start_offset)
@@ -133,7 +155,7 @@ module Prism
133
155
 
134
156
  # The content of the line where this location starts before this location.
135
157
  def start_line_slice
136
- offset = source.line_offset(start_offset)
158
+ offset = source.line_start(start_offset)
137
159
  source.slice(offset, start_offset - offset)
138
160
  end
139
161
 
@@ -148,12 +170,24 @@ module Prism
148
170
  source.column(start_offset)
149
171
  end
150
172
 
173
+ # The column number in characters where this location ends from the start of
174
+ # the line.
175
+ def start_character_column
176
+ source.character_column(start_offset)
177
+ end
178
+
151
179
  # The column number in bytes where this location ends from the start of the
152
180
  # line.
153
181
  def end_column
154
182
  source.column(end_offset)
155
183
  end
156
184
 
185
+ # The column number in characters where this location ends from the start of
186
+ # the line.
187
+ def end_character_column
188
+ source.character_column(end_offset)
189
+ end
190
+
157
191
  # Implement the hash pattern matching interface for Location.
158
192
  def deconstruct_keys(keys)
159
193
  { start_offset: start_offset, end_offset: end_offset }
@@ -204,11 +238,6 @@ module Prism
204
238
  def deconstruct_keys(keys)
205
239
  { location: location }
206
240
  end
207
-
208
- # This can only be true for inline comments.
209
- def trailing?
210
- false
211
- end
212
241
  end
213
242
 
214
243
  # InlineComment objects are the most common. They correspond to comments in
@@ -229,18 +258,14 @@ module Prism
229
258
  # EmbDocComment objects correspond to comments that are surrounded by =begin
230
259
  # and =end.
231
260
  class EmbDocComment < Comment
232
- # Returns a string representation of this comment.
233
- def inspect
234
- "#<Prism::EmbDocComment @location=#{location.inspect}>"
261
+ # This can only be true for inline comments.
262
+ def trailing?
263
+ false
235
264
  end
236
- end
237
265
 
238
- # DATAComment objects correspond to comments that are after the __END__
239
- # keyword in a source file.
240
- class DATAComment < Comment
241
266
  # Returns a string representation of this comment.
242
267
  def inspect
243
- "#<Prism::DATAComment @location=#{location.inspect}>"
268
+ "#<Prism::EmbDocComment @location=#{location.inspect}>"
244
269
  end
245
270
  end
246
271
 
@@ -344,6 +369,11 @@ module Prism
344
369
  # The list of magic comments that were encountered during parsing.
345
370
  attr_reader :magic_comments
346
371
 
372
+ # An optional location that represents the location of the content after the
373
+ # __END__ marker. This content is loaded into the DATA constant when the
374
+ # file being parsed is the main file being executed.
375
+ attr_reader :data_loc
376
+
347
377
  # The list of errors that were generated during parsing.
348
378
  attr_reader :errors
349
379
 
@@ -354,10 +384,11 @@ module Prism
354
384
  attr_reader :source
355
385
 
356
386
  # Create a new parse result object with the given values.
357
- def initialize(value, comments, magic_comments, errors, warnings, source)
387
+ def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
358
388
  @value = value
359
389
  @comments = comments
360
390
  @magic_comments = magic_comments
391
+ @data_loc = data_loc
361
392
  @errors = errors
362
393
  @warnings = warnings
363
394
  @source = source
@@ -365,7 +396,7 @@ module Prism
365
396
 
366
397
  # Implement the hash pattern matching interface for ParseResult.
367
398
  def deconstruct_keys(keys)
368
- { value: value, comments: comments, magic_comments: magic_comments, errors: errors, warnings: warnings }
399
+ { value: value, comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings }
369
400
  end
370
401
 
371
402
  # Returns true if there were no errors during parsing and false if there
@@ -3,6 +3,10 @@
3
3
  require "ripper"
4
4
 
5
5
  module Prism
6
+ # Note: This integration is not finished, and therefore still has many
7
+ # inconsistencies with Ripper. If you'd like to help out, pull requests would
8
+ # be greatly appreciated!
9
+ #
6
10
  # This class is meant to provide a compatibility layer between prism and
7
11
  # Ripper. It functions by parsing the entire tree first and then walking it
8
12
  # and executing each of the Ripper callbacks as it goes.
@@ -10,7 +14,10 @@ module Prism
10
14
  # This class is going to necessarily be slower than the native Ripper API. It
11
15
  # is meant as a stopgap until developers migrate to using prism. It is also
12
16
  # meant as a test harness for the prism parser.
13
- class RipperCompat
17
+ #
18
+ # To use this class, you treat `Prism::RipperCompat` effectively as you would
19
+ # treat the `Ripper` class.
20
+ class RipperCompat < Visitor
14
21
  # This class mirrors the ::Ripper::SexpBuilder subclass of ::Ripper that
15
22
  # returns the arrays of [type, *children].
16
23
  class SexpBuilder < RipperCompat
@@ -77,43 +84,63 @@ module Prism
77
84
 
78
85
  # True if the parser encountered an error during parsing.
79
86
  def error?
80
- result.errors.any?
87
+ result.failure?
81
88
  end
82
89
 
83
90
  # Parse the source and return the result.
84
91
  def parse
85
- result.value.accept(self) unless error?
92
+ result.magic_comments.each do |magic_comment|
93
+ on_magic_comment(magic_comment.key, magic_comment.value)
94
+ end
95
+
96
+ if error?
97
+ result.errors.each do |error|
98
+ on_parse_error(error.message)
99
+ end
100
+ else
101
+ result.value.accept(self)
102
+ end
86
103
  end
87
104
 
88
105
  ############################################################################
89
106
  # Visitor methods
90
107
  ############################################################################
91
108
 
92
- # This method is responsible for dispatching to the correct visitor method
93
- # based on the type of the node.
94
- def visit(node)
95
- node&.accept(self)
96
- end
97
-
98
109
  # Visit a CallNode node.
99
110
  def visit_call_node(node)
100
- if !node.opening_loc && node.arguments.arguments.length == 1
101
- bounds(node.receiver.location)
111
+ if !node.message.match?(/^[[:alpha:]_]/) && node.opening_loc.nil? && node.arguments&.arguments&.length == 1
102
112
  left = visit(node.receiver)
103
-
104
- bounds(node.arguments.arguments.first.location)
105
113
  right = visit(node.arguments.arguments.first)
106
114
 
107
- on_binary(left, source[node.message_loc.start_offset...node.message_loc.end_offset].to_sym, right)
115
+ bounds(node.location)
116
+ on_binary(left, node.name, right)
108
117
  else
109
118
  raise NotImplementedError
110
119
  end
111
120
  end
112
121
 
122
+ # Visit a FloatNode node.
123
+ def visit_float_node(node)
124
+ bounds(node.location)
125
+ on_float(node.slice)
126
+ end
127
+
128
+ # Visit a ImaginaryNode node.
129
+ def visit_imaginary_node(node)
130
+ bounds(node.location)
131
+ on_imaginary(node.slice)
132
+ end
133
+
113
134
  # Visit an IntegerNode node.
114
135
  def visit_integer_node(node)
115
136
  bounds(node.location)
116
- on_int(source[node.location.start_offset...node.location.end_offset])
137
+ on_int(node.slice)
138
+ end
139
+
140
+ # Visit a RationalNode node.
141
+ def visit_rational_node(node)
142
+ bounds(node.location)
143
+ on_rational(node.slice)
117
144
  end
118
145
 
119
146
  # Visit a StatementsNode node.
@@ -124,24 +151,11 @@ module Prism
124
151
  end
125
152
  end
126
153
 
127
- # Visit a token found during parsing.
128
- def visit_token(node)
129
- bounds(node.location)
130
-
131
- case node.type
132
- when :MINUS
133
- on_op(node.value)
134
- when :PLUS
135
- on_op(node.value)
136
- else
137
- raise NotImplementedError, "Unknown token: #{node.type}"
138
- end
139
- end
140
-
141
154
  # Visit a ProgramNode node.
142
155
  def visit_program_node(node)
156
+ statements = visit(node.statements)
143
157
  bounds(node.location)
144
- on_program(visit(node.statements))
158
+ on_program(statements)
145
159
  end
146
160
 
147
161
  ############################################################################
@@ -166,10 +180,8 @@ module Prism
166
180
  # This method could be drastically improved with some caching on the start
167
181
  # of every line, but for now it's good enough.
168
182
  def bounds(location)
169
- start_offset = location.start_offset
170
-
171
- @lineno = source[0..start_offset].count("\n") + 1
172
- @column = start_offset - (source.rindex("\n", start_offset) || 0)
183
+ @lineno = location.start_line
184
+ @column = location.start_column
173
185
  end
174
186
 
175
187
  # Lazily initialize the parse result.
@@ -185,6 +197,9 @@ module Prism
185
197
  def _dispatch5(_, _, _, _, _); end # :nodoc:
186
198
  def _dispatch7(_, _, _, _, _, _, _); end # :nodoc:
187
199
 
200
+ alias_method :on_parse_error, :_dispatch1
201
+ alias_method :on_magic_comment, :_dispatch2
202
+
188
203
  (Ripper::SCANNER_EVENT_TABLE.merge(Ripper::PARSER_EVENT_TABLE)).each do |event, arity|
189
204
  alias_method :"on_#{event}", :"_dispatch#{arity}"
190
205
  end