prism 0.17.1 → 0.19.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +60 -1
  3. data/Makefile +5 -5
  4. data/README.md +4 -3
  5. data/config.yml +214 -68
  6. data/docs/build_system.md +6 -6
  7. data/docs/building.md +10 -3
  8. data/docs/configuration.md +11 -9
  9. data/docs/encoding.md +92 -88
  10. data/docs/heredocs.md +1 -1
  11. data/docs/javascript.md +29 -1
  12. data/docs/local_variable_depth.md +229 -0
  13. data/docs/ruby_api.md +16 -0
  14. data/docs/serialization.md +18 -13
  15. data/ext/prism/api_node.c +411 -240
  16. data/ext/prism/extconf.rb +97 -127
  17. data/ext/prism/extension.c +97 -33
  18. data/ext/prism/extension.h +1 -1
  19. data/include/prism/ast.h +377 -159
  20. data/include/prism/defines.h +17 -0
  21. data/include/prism/diagnostic.h +38 -6
  22. data/include/prism/{enc/pm_encoding.h → encoding.h} +126 -64
  23. data/include/prism/options.h +2 -2
  24. data/include/prism/parser.h +62 -36
  25. data/include/prism/regexp.h +2 -2
  26. data/include/prism/util/pm_buffer.h +9 -1
  27. data/include/prism/util/pm_memchr.h +2 -2
  28. data/include/prism/util/pm_strpbrk.h +3 -3
  29. data/include/prism/version.h +3 -3
  30. data/include/prism.h +13 -15
  31. data/lib/prism/compiler.rb +15 -3
  32. data/lib/prism/debug.rb +13 -4
  33. data/lib/prism/desugar_compiler.rb +4 -3
  34. data/lib/prism/dispatcher.rb +70 -14
  35. data/lib/prism/dot_visitor.rb +4612 -0
  36. data/lib/prism/dsl.rb +77 -57
  37. data/lib/prism/ffi.rb +19 -6
  38. data/lib/prism/lex_compat.rb +19 -9
  39. data/lib/prism/mutation_compiler.rb +26 -6
  40. data/lib/prism/node.rb +1314 -522
  41. data/lib/prism/node_ext.rb +102 -19
  42. data/lib/prism/parse_result.rb +58 -27
  43. data/lib/prism/ripper_compat.rb +49 -34
  44. data/lib/prism/serialize.rb +251 -227
  45. data/lib/prism/visitor.rb +15 -3
  46. data/lib/prism.rb +21 -4
  47. data/prism.gemspec +7 -9
  48. data/rbi/prism.rbi +688 -284
  49. data/rbi/prism_static.rbi +3 -0
  50. data/sig/prism.rbs +426 -156
  51. data/sig/prism_static.rbs +1 -0
  52. data/src/diagnostic.c +280 -216
  53. data/src/encoding.c +5137 -0
  54. data/src/node.c +99 -21
  55. data/src/options.c +21 -2
  56. data/src/prettyprint.c +1743 -1241
  57. data/src/prism.c +1774 -831
  58. data/src/regexp.c +15 -15
  59. data/src/serialize.c +261 -164
  60. data/src/util/pm_buffer.c +10 -1
  61. data/src/util/pm_memchr.c +1 -1
  62. data/src/util/pm_strpbrk.c +4 -4
  63. metadata +8 -10
  64. data/src/enc/pm_big5.c +0 -53
  65. data/src/enc/pm_euc_jp.c +0 -59
  66. data/src/enc/pm_gbk.c +0 -62
  67. data/src/enc/pm_shift_jis.c +0 -57
  68. data/src/enc/pm_tables.c +0 -743
  69. data/src/enc/pm_unicode.c +0 -2369
  70. data/src/enc/pm_windows_31j.c +0 -57
@@ -14,8 +14,49 @@ module Prism
14
14
  end
15
15
  end
16
16
 
17
+ class InterpolatedMatchLastLineNode < Node
18
+ include RegularExpressionOptions
19
+ end
20
+
21
+ class InterpolatedRegularExpressionNode < Node
22
+ include RegularExpressionOptions
23
+ end
24
+
25
+ class MatchLastLineNode < Node
26
+ include RegularExpressionOptions
27
+ end
28
+
29
+ class RegularExpressionNode < Node
30
+ include RegularExpressionOptions
31
+ end
32
+
17
33
  private_constant :RegularExpressionOptions
18
34
 
35
+ module HeredocQuery # :nodoc:
36
+ # Returns true if this node was represented as a heredoc in the source code.
37
+ def heredoc?
38
+ opening&.start_with?("<<")
39
+ end
40
+ end
41
+
42
+ class InterpolatedStringNode < Node
43
+ include HeredocQuery
44
+ end
45
+
46
+ class InterpolatedXStringNode < Node
47
+ include HeredocQuery
48
+ end
49
+
50
+ class StringNode < Node
51
+ include HeredocQuery
52
+ end
53
+
54
+ class XStringNode < Node
55
+ include HeredocQuery
56
+ end
57
+
58
+ private_constant :HeredocQuery
59
+
19
60
  class FloatNode < Node
20
61
  # Returns the value of the node as a Ruby Float.
21
62
  def value
@@ -37,18 +78,6 @@ module Prism
37
78
  end
38
79
  end
39
80
 
40
- class InterpolatedMatchLastLineNode < Node
41
- include RegularExpressionOptions
42
- end
43
-
44
- class InterpolatedRegularExpressionNode < Node
45
- include RegularExpressionOptions
46
- end
47
-
48
- class MatchLastLineNode < Node
49
- include RegularExpressionOptions
50
- end
51
-
52
81
  class RationalNode < Node
53
82
  # Returns the value of the node as a Ruby Rational.
54
83
  def value
@@ -56,12 +85,9 @@ module Prism
56
85
  end
57
86
  end
58
87
 
59
- class RegularExpressionNode < Node
60
- include RegularExpressionOptions
61
- end
62
-
63
88
  class ConstantReadNode < Node
64
- # Returns the list of parts for the full name of this constant. For example: [:Foo]
89
+ # Returns the list of parts for the full name of this constant.
90
+ # For example: [:Foo]
65
91
  def full_name_parts
66
92
  [name]
67
93
  end
@@ -73,7 +99,16 @@ module Prism
73
99
  end
74
100
 
75
101
  class ConstantPathNode < Node
76
- # Returns the list of parts for the full name of this constant path. For example: [:Foo, :Bar]
102
+ # An error class raised when dynamic parts are found while computing a
103
+ # constant path's full name. For example:
104
+ # Foo::Bar::Baz -> does not raise because all parts of the constant path are
105
+ # simple constants
106
+ # var::Bar::Baz -> raises because the first part of the constant path is a
107
+ # local variable
108
+ class DynamicPartsInConstantPathError < StandardError; end
109
+
110
+ # Returns the list of parts for the full name of this constant path.
111
+ # For example: [:Foo, :Bar]
77
112
  def full_name_parts
78
113
  parts = [child.name]
79
114
  current = parent
@@ -83,6 +118,10 @@ module Prism
83
118
  current = current.parent
84
119
  end
85
120
 
121
+ unless current.is_a?(ConstantReadNode)
122
+ raise DynamicPartsInConstantPathError, "Constant path contains dynamic parts. Cannot compute full name"
123
+ end
124
+
86
125
  parts.unshift(current&.name || :"")
87
126
  end
88
127
 
@@ -93,7 +132,8 @@ module Prism
93
132
  end
94
133
 
95
134
  class ConstantPathTargetNode < Node
96
- # Returns the list of parts for the full name of this constant path. For example: [:Foo, :Bar]
135
+ # Returns the list of parts for the full name of this constant path.
136
+ # For example: [:Foo, :Bar]
97
137
  def full_name_parts
98
138
  (parent&.full_name_parts || [:""]).push(child.name)
99
139
  end
@@ -103,4 +143,47 @@ module Prism
103
143
  full_name_parts.join("::")
104
144
  end
105
145
  end
146
+
147
+ class ParametersNode < Node
148
+ # Mirrors the Method#parameters method.
149
+ def signature
150
+ names = []
151
+
152
+ requireds.each do |param|
153
+ names << (param.is_a?(MultiTargetNode) ? [:req] : [:req, param.name])
154
+ end
155
+
156
+ optionals.each { |param| names << [:opt, param.name] }
157
+ names << [:rest, rest.name || :*] if rest
158
+
159
+ posts.each do |param|
160
+ names << (param.is_a?(MultiTargetNode) ? [:req] : [:req, param.name])
161
+ end
162
+
163
+ # Regardless of the order in which the keywords were defined, the required
164
+ # keywords always come first followed by the optional keywords.
165
+ keyopt = []
166
+ keywords.each do |param|
167
+ if param.is_a?(OptionalKeywordParameterNode)
168
+ keyopt << param
169
+ else
170
+ names << [:keyreq, param.name]
171
+ end
172
+ end
173
+
174
+ keyopt.each { |param| names << [:key, param.name] }
175
+
176
+ case keyword_rest
177
+ when ForwardingParameterNode
178
+ names.concat([[:rest, :*], [:keyrest, :**], [:block, :&]])
179
+ when KeywordRestParameterNode
180
+ names << [:keyrest, keyword_rest.name || :**]
181
+ when NoKeywordsParameterNode
182
+ names << [:nokey]
183
+ end
184
+
185
+ names << [:block, block.name || :&] if block
186
+ names
187
+ end
188
+ end
106
189
  end
@@ -25,40 +25,50 @@ module Prism
25
25
 
26
26
  # Perform a byteslice on the source code using the given byte offset and
27
27
  # byte length.
28
- def slice(offset, length)
29
- source.byteslice(offset, length)
28
+ def slice(byte_offset, length)
29
+ source.byteslice(byte_offset, length)
30
30
  end
31
31
 
32
32
  # Binary search through the offsets to find the line number for the given
33
33
  # byte offset.
34
- def line(value)
35
- start_line + find_line(value)
34
+ def line(byte_offset)
35
+ start_line + find_line(byte_offset)
36
36
  end
37
37
 
38
38
  # Return the byte offset of the start of the line corresponding to the given
39
39
  # byte offset.
40
- def line_offset(value)
41
- offsets[find_line(value)]
40
+ def line_start(byte_offset)
41
+ offsets[find_line(byte_offset)]
42
42
  end
43
43
 
44
44
  # Return the column number for the given byte offset.
45
- def column(value)
46
- value - offsets[find_line(value)]
45
+ def column(byte_offset)
46
+ byte_offset - line_start(byte_offset)
47
+ end
48
+
49
+ # Return the character offset for the given byte offset.
50
+ def character_offset(byte_offset)
51
+ source.byteslice(0, byte_offset).length
52
+ end
53
+
54
+ # Return the column number in characters for the given byte offset.
55
+ def character_column(byte_offset)
56
+ character_offset(byte_offset) - character_offset(line_start(byte_offset))
47
57
  end
48
58
 
49
59
  private
50
60
 
51
61
  # Binary search through the offsets to find the line number for the given
52
62
  # byte offset.
53
- def find_line(value)
63
+ def find_line(byte_offset)
54
64
  left = 0
55
65
  right = offsets.length - 1
56
66
 
57
67
  while left <= right
58
68
  mid = left + (right - left) / 2
59
- return mid if offsets[mid] == value
69
+ return mid if offsets[mid] == byte_offset
60
70
 
61
- if offsets[mid] < value
71
+ if offsets[mid] < byte_offset
62
72
  left = mid + 1
63
73
  else
64
74
  right = mid - 1
@@ -121,11 +131,23 @@ module Prism
121
131
  source.slice(start_offset, length)
122
132
  end
123
133
 
134
+ # The character offset from the beginning of the source where this location
135
+ # starts.
136
+ def start_character_offset
137
+ source.character_offset(start_offset)
138
+ end
139
+
124
140
  # The byte offset from the beginning of the source where this location ends.
125
141
  def end_offset
126
142
  start_offset + length
127
143
  end
128
144
 
145
+ # The character offset from the beginning of the source where this location
146
+ # ends.
147
+ def end_character_offset
148
+ source.character_offset(end_offset)
149
+ end
150
+
129
151
  # The line number where this location starts.
130
152
  def start_line
131
153
  source.line(start_offset)
@@ -133,7 +155,7 @@ module Prism
133
155
 
134
156
  # The content of the line where this location starts before this location.
135
157
  def start_line_slice
136
- offset = source.line_offset(start_offset)
158
+ offset = source.line_start(start_offset)
137
159
  source.slice(offset, start_offset - offset)
138
160
  end
139
161
 
@@ -148,12 +170,24 @@ module Prism
148
170
  source.column(start_offset)
149
171
  end
150
172
 
173
+ # The column number in characters where this location ends from the start of
174
+ # the line.
175
+ def start_character_column
176
+ source.character_column(start_offset)
177
+ end
178
+
151
179
  # The column number in bytes where this location ends from the start of the
152
180
  # line.
153
181
  def end_column
154
182
  source.column(end_offset)
155
183
  end
156
184
 
185
+ # The column number in characters where this location ends from the start of
186
+ # the line.
187
+ def end_character_column
188
+ source.character_column(end_offset)
189
+ end
190
+
157
191
  # Implement the hash pattern matching interface for Location.
158
192
  def deconstruct_keys(keys)
159
193
  { start_offset: start_offset, end_offset: end_offset }
@@ -204,11 +238,6 @@ module Prism
204
238
  def deconstruct_keys(keys)
205
239
  { location: location }
206
240
  end
207
-
208
- # This can only be true for inline comments.
209
- def trailing?
210
- false
211
- end
212
241
  end
213
242
 
214
243
  # InlineComment objects are the most common. They correspond to comments in
@@ -229,18 +258,14 @@ module Prism
229
258
  # EmbDocComment objects correspond to comments that are surrounded by =begin
230
259
  # and =end.
231
260
  class EmbDocComment < Comment
232
- # Returns a string representation of this comment.
233
- def inspect
234
- "#<Prism::EmbDocComment @location=#{location.inspect}>"
261
+ # This can only be true for inline comments.
262
+ def trailing?
263
+ false
235
264
  end
236
- end
237
265
 
238
- # DATAComment objects correspond to comments that are after the __END__
239
- # keyword in a source file.
240
- class DATAComment < Comment
241
266
  # Returns a string representation of this comment.
242
267
  def inspect
243
- "#<Prism::DATAComment @location=#{location.inspect}>"
268
+ "#<Prism::EmbDocComment @location=#{location.inspect}>"
244
269
  end
245
270
  end
246
271
 
@@ -344,6 +369,11 @@ module Prism
344
369
  # The list of magic comments that were encountered during parsing.
345
370
  attr_reader :magic_comments
346
371
 
372
+ # An optional location that represents the location of the content after the
373
+ # __END__ marker. This content is loaded into the DATA constant when the
374
+ # file being parsed is the main file being executed.
375
+ attr_reader :data_loc
376
+
347
377
  # The list of errors that were generated during parsing.
348
378
  attr_reader :errors
349
379
 
@@ -354,10 +384,11 @@ module Prism
354
384
  attr_reader :source
355
385
 
356
386
  # Create a new parse result object with the given values.
357
- def initialize(value, comments, magic_comments, errors, warnings, source)
387
+ def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
358
388
  @value = value
359
389
  @comments = comments
360
390
  @magic_comments = magic_comments
391
+ @data_loc = data_loc
361
392
  @errors = errors
362
393
  @warnings = warnings
363
394
  @source = source
@@ -365,7 +396,7 @@ module Prism
365
396
 
366
397
  # Implement the hash pattern matching interface for ParseResult.
367
398
  def deconstruct_keys(keys)
368
- { value: value, comments: comments, magic_comments: magic_comments, errors: errors, warnings: warnings }
399
+ { value: value, comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings }
369
400
  end
370
401
 
371
402
  # Returns true if there were no errors during parsing and false if there
@@ -3,6 +3,10 @@
3
3
  require "ripper"
4
4
 
5
5
  module Prism
6
+ # Note: This integration is not finished, and therefore still has many
7
+ # inconsistencies with Ripper. If you'd like to help out, pull requests would
8
+ # be greatly appreciated!
9
+ #
6
10
  # This class is meant to provide a compatibility layer between prism and
7
11
  # Ripper. It functions by parsing the entire tree first and then walking it
8
12
  # and executing each of the Ripper callbacks as it goes.
@@ -10,7 +14,10 @@ module Prism
10
14
  # This class is going to necessarily be slower than the native Ripper API. It
11
15
  # is meant as a stopgap until developers migrate to using prism. It is also
12
16
  # meant as a test harness for the prism parser.
13
- class RipperCompat
17
+ #
18
+ # To use this class, you treat `Prism::RipperCompat` effectively as you would
19
+ # treat the `Ripper` class.
20
+ class RipperCompat < Visitor
14
21
  # This class mirrors the ::Ripper::SexpBuilder subclass of ::Ripper that
15
22
  # returns the arrays of [type, *children].
16
23
  class SexpBuilder < RipperCompat
@@ -77,43 +84,63 @@ module Prism
77
84
 
78
85
  # True if the parser encountered an error during parsing.
79
86
  def error?
80
- result.errors.any?
87
+ result.failure?
81
88
  end
82
89
 
83
90
  # Parse the source and return the result.
84
91
  def parse
85
- result.value.accept(self) unless error?
92
+ result.magic_comments.each do |magic_comment|
93
+ on_magic_comment(magic_comment.key, magic_comment.value)
94
+ end
95
+
96
+ if error?
97
+ result.errors.each do |error|
98
+ on_parse_error(error.message)
99
+ end
100
+ else
101
+ result.value.accept(self)
102
+ end
86
103
  end
87
104
 
88
105
  ############################################################################
89
106
  # Visitor methods
90
107
  ############################################################################
91
108
 
92
- # This method is responsible for dispatching to the correct visitor method
93
- # based on the type of the node.
94
- def visit(node)
95
- node&.accept(self)
96
- end
97
-
98
109
  # Visit a CallNode node.
99
110
  def visit_call_node(node)
100
- if !node.opening_loc && node.arguments.arguments.length == 1
101
- bounds(node.receiver.location)
111
+ if !node.message.match?(/^[[:alpha:]_]/) && node.opening_loc.nil? && node.arguments&.arguments&.length == 1
102
112
  left = visit(node.receiver)
103
-
104
- bounds(node.arguments.arguments.first.location)
105
113
  right = visit(node.arguments.arguments.first)
106
114
 
107
- on_binary(left, source[node.message_loc.start_offset...node.message_loc.end_offset].to_sym, right)
115
+ bounds(node.location)
116
+ on_binary(left, node.name, right)
108
117
  else
109
118
  raise NotImplementedError
110
119
  end
111
120
  end
112
121
 
122
+ # Visit a FloatNode node.
123
+ def visit_float_node(node)
124
+ bounds(node.location)
125
+ on_float(node.slice)
126
+ end
127
+
128
+ # Visit a ImaginaryNode node.
129
+ def visit_imaginary_node(node)
130
+ bounds(node.location)
131
+ on_imaginary(node.slice)
132
+ end
133
+
113
134
  # Visit an IntegerNode node.
114
135
  def visit_integer_node(node)
115
136
  bounds(node.location)
116
- on_int(source[node.location.start_offset...node.location.end_offset])
137
+ on_int(node.slice)
138
+ end
139
+
140
+ # Visit a RationalNode node.
141
+ def visit_rational_node(node)
142
+ bounds(node.location)
143
+ on_rational(node.slice)
117
144
  end
118
145
 
119
146
  # Visit a StatementsNode node.
@@ -124,24 +151,11 @@ module Prism
124
151
  end
125
152
  end
126
153
 
127
- # Visit a token found during parsing.
128
- def visit_token(node)
129
- bounds(node.location)
130
-
131
- case node.type
132
- when :MINUS
133
- on_op(node.value)
134
- when :PLUS
135
- on_op(node.value)
136
- else
137
- raise NotImplementedError, "Unknown token: #{node.type}"
138
- end
139
- end
140
-
141
154
  # Visit a ProgramNode node.
142
155
  def visit_program_node(node)
156
+ statements = visit(node.statements)
143
157
  bounds(node.location)
144
- on_program(visit(node.statements))
158
+ on_program(statements)
145
159
  end
146
160
 
147
161
  ############################################################################
@@ -166,10 +180,8 @@ module Prism
166
180
  # This method could be drastically improved with some caching on the start
167
181
  # of every line, but for now it's good enough.
168
182
  def bounds(location)
169
- start_offset = location.start_offset
170
-
171
- @lineno = source[0..start_offset].count("\n") + 1
172
- @column = start_offset - (source.rindex("\n", start_offset) || 0)
183
+ @lineno = location.start_line
184
+ @column = location.start_column
173
185
  end
174
186
 
175
187
  # Lazily initialize the parse result.
@@ -185,6 +197,9 @@ module Prism
185
197
  def _dispatch5(_, _, _, _, _); end # :nodoc:
186
198
  def _dispatch7(_, _, _, _, _, _, _); end # :nodoc:
187
199
 
200
+ alias_method :on_parse_error, :_dispatch1
201
+ alias_method :on_magic_comment, :_dispatch2
202
+
188
203
  (Ripper::SCANNER_EVENT_TABLE.merge(Ripper::PARSER_EVENT_TABLE)).each do |event, arity|
189
204
  alias_method :"on_#{event}", :"_dispatch#{arity}"
190
205
  end