prism 0.15.1 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -1
  3. data/Makefile +12 -0
  4. data/README.md +3 -1
  5. data/config.yml +66 -50
  6. data/docs/configuration.md +2 -0
  7. data/docs/fuzzing.md +1 -1
  8. data/docs/javascript.md +90 -0
  9. data/docs/releasing.md +27 -0
  10. data/docs/ruby_api.md +2 -0
  11. data/docs/serialization.md +28 -29
  12. data/ext/prism/api_node.c +856 -826
  13. data/ext/prism/api_pack.c +20 -9
  14. data/ext/prism/extension.c +494 -119
  15. data/ext/prism/extension.h +1 -1
  16. data/include/prism/ast.h +3157 -747
  17. data/include/prism/defines.h +40 -8
  18. data/include/prism/diagnostic.h +36 -3
  19. data/include/prism/enc/pm_encoding.h +119 -28
  20. data/include/prism/node.h +38 -30
  21. data/include/prism/options.h +204 -0
  22. data/include/prism/pack.h +44 -33
  23. data/include/prism/parser.h +445 -199
  24. data/include/prism/prettyprint.h +26 -0
  25. data/include/prism/regexp.h +16 -2
  26. data/include/prism/util/pm_buffer.h +102 -18
  27. data/include/prism/util/pm_char.h +162 -48
  28. data/include/prism/util/pm_constant_pool.h +128 -34
  29. data/include/prism/util/pm_list.h +68 -38
  30. data/include/prism/util/pm_memchr.h +18 -3
  31. data/include/prism/util/pm_newline_list.h +71 -28
  32. data/include/prism/util/pm_state_stack.h +25 -7
  33. data/include/prism/util/pm_string.h +115 -27
  34. data/include/prism/util/pm_string_list.h +25 -6
  35. data/include/prism/util/pm_strncasecmp.h +32 -0
  36. data/include/prism/util/pm_strpbrk.h +31 -17
  37. data/include/prism/version.h +28 -3
  38. data/include/prism.h +229 -36
  39. data/lib/prism/compiler.rb +5 -5
  40. data/lib/prism/debug.rb +43 -13
  41. data/lib/prism/desugar_compiler.rb +1 -1
  42. data/lib/prism/dispatcher.rb +27 -26
  43. data/lib/prism/dsl.rb +16 -16
  44. data/lib/prism/ffi.rb +138 -61
  45. data/lib/prism/lex_compat.rb +26 -16
  46. data/lib/prism/mutation_compiler.rb +11 -11
  47. data/lib/prism/node.rb +426 -227
  48. data/lib/prism/node_ext.rb +23 -16
  49. data/lib/prism/node_inspector.rb +1 -1
  50. data/lib/prism/pack.rb +79 -40
  51. data/lib/prism/parse_result/comments.rb +7 -2
  52. data/lib/prism/parse_result/newlines.rb +4 -0
  53. data/lib/prism/parse_result.rb +157 -21
  54. data/lib/prism/pattern.rb +14 -3
  55. data/lib/prism/ripper_compat.rb +28 -10
  56. data/lib/prism/serialize.rb +935 -307
  57. data/lib/prism/visitor.rb +9 -5
  58. data/lib/prism.rb +20 -2
  59. data/prism.gemspec +11 -2
  60. data/rbi/prism.rbi +7305 -0
  61. data/rbi/prism_static.rbi +196 -0
  62. data/sig/prism.rbs +4468 -0
  63. data/sig/prism_static.rbs +123 -0
  64. data/src/diagnostic.c +56 -53
  65. data/src/enc/pm_big5.c +1 -0
  66. data/src/enc/pm_euc_jp.c +1 -0
  67. data/src/enc/pm_gbk.c +1 -0
  68. data/src/enc/pm_shift_jis.c +1 -0
  69. data/src/enc/pm_tables.c +316 -80
  70. data/src/enc/pm_unicode.c +54 -9
  71. data/src/enc/pm_windows_31j.c +1 -0
  72. data/src/node.c +357 -345
  73. data/src/options.c +170 -0
  74. data/src/prettyprint.c +7697 -1643
  75. data/src/prism.c +1964 -1125
  76. data/src/regexp.c +153 -95
  77. data/src/serialize.c +432 -397
  78. data/src/token_type.c +3 -1
  79. data/src/util/pm_buffer.c +88 -23
  80. data/src/util/pm_char.c +103 -57
  81. data/src/util/pm_constant_pool.c +52 -22
  82. data/src/util/pm_list.c +12 -4
  83. data/src/util/pm_memchr.c +5 -3
  84. data/src/util/pm_newline_list.c +25 -63
  85. data/src/util/pm_state_stack.c +9 -3
  86. data/src/util/pm_string.c +95 -85
  87. data/src/util/pm_string_list.c +14 -15
  88. data/src/util/pm_strncasecmp.c +10 -3
  89. data/src/util/pm_strpbrk.c +25 -19
  90. metadata +12 -3
  91. data/docs/prism.png +0 -0
@@ -5,31 +5,71 @@ module Prism
5
5
  # conjunction with locations to allow them to resolve line numbers and source
6
6
  # ranges.
7
7
  class Source
8
- attr_reader :source, :offsets
8
+ # The source code that this source object represents.
9
+ attr_reader :source
9
10
 
10
- def initialize(source, offsets = compute_offsets(source))
11
+ # The line number where this source starts.
12
+ attr_accessor :start_line
13
+
14
+ # The list of newline byte offsets in the source code.
15
+ attr_reader :offsets
16
+
17
+ # Create a new source object with the given source code and newline byte
18
+ # offsets. If no newline byte offsets are given, they will be computed from
19
+ # the source code.
20
+ def initialize(source, start_line = 1, offsets = compute_offsets(source))
11
21
  @source = source
22
+ @start_line = start_line
12
23
  @offsets = offsets
13
24
  end
14
25
 
26
+ # Perform a byteslice on the source code using the given byte offset and
27
+ # byte length.
15
28
  def slice(offset, length)
16
29
  source.byteslice(offset, length)
17
30
  end
18
31
 
32
+ # Binary search through the offsets to find the line number for the given
33
+ # byte offset.
19
34
  def line(value)
20
- offsets.bsearch_index { |offset| offset > value } || offsets.length
35
+ start_line + find_line(value)
21
36
  end
22
37
 
38
+ # Return the byte offset of the start of the line corresponding to the given
39
+ # byte offset.
23
40
  def line_offset(value)
24
- offsets[line(value) - 1]
41
+ offsets[find_line(value)]
25
42
  end
26
43
 
44
+ # Return the column number for the given byte offset.
27
45
  def column(value)
28
- value - offsets[line(value) - 1]
46
+ value - offsets[find_line(value)]
29
47
  end
30
48
 
31
49
  private
32
50
 
51
+ # Binary search through the offsets to find the line number for the given
52
+ # byte offset.
53
+ def find_line(value)
54
+ left = 0
55
+ right = offsets.length - 1
56
+
57
+ while left <= right
58
+ mid = left + (right - left) / 2
59
+ return mid if offsets[mid] == value
60
+
61
+ if offsets[mid] < value
62
+ left = mid + 1
63
+ else
64
+ right = mid - 1
65
+ end
66
+ end
67
+
68
+ left - 1
69
+ end
70
+
71
+ # Find all of the newlines in the source code and return their byte offsets
72
+ # from the start of the string an array.
33
73
  def compute_offsets(code)
34
74
  offsets = [0]
35
75
  code.b.scan("\n") { offsets << $~.end(0) }
@@ -53,6 +93,8 @@ module Prism
53
93
  # The list of comments attached to this location
54
94
  attr_reader :comments
55
95
 
96
+ # Create a new location object with the given source, start byte offset, and
97
+ # byte length.
56
98
  def initialize(source, start_offset, length)
57
99
  @source = source
58
100
  @start_offset = start_offset
@@ -97,7 +139,7 @@ module Prism
97
139
 
98
140
  # The line number where this location ends.
99
141
  def end_line
100
- source.line(end_offset - 1)
142
+ source.line(end_offset)
101
143
  end
102
144
 
103
145
  # The column number in bytes where this location starts from the start of
@@ -112,14 +154,17 @@ module Prism
112
154
  source.column(end_offset)
113
155
  end
114
156
 
157
+ # Implement the hash pattern matching interface for Location.
115
158
  def deconstruct_keys(keys)
116
159
  { start_offset: start_offset, end_offset: end_offset }
117
160
  end
118
161
 
162
+ # Implement the pretty print interface for Location.
119
163
  def pretty_print(q)
120
164
  q.text("(#{start_line},#{start_column})-(#{end_line},#{end_column})")
121
165
  end
122
166
 
167
+ # Returns true if the given other location is equal to this location.
123
168
  def ==(other)
124
169
  other.is_a?(Location) &&
125
170
  other.start_offset == start_offset &&
@@ -136,57 +181,99 @@ module Prism
136
181
  Location.new(source, start_offset, other.end_offset - start_offset)
137
182
  end
138
183
 
184
+ # Returns a null location that does not correspond to a source and points to
185
+ # the beginning of the file. Useful for when you want a location object but
186
+ # do not care where it points.
139
187
  def self.null
140
188
  new(nil, 0, 0)
141
189
  end
142
190
  end
143
191
 
144
- # This represents a comment that was encountered during parsing.
192
+ # This represents a comment that was encountered during parsing. It is the
193
+ # base class for all comment types.
145
194
  class Comment
146
- TYPES = [:inline, :embdoc, :__END__]
195
+ # The location of this comment in the source.
196
+ attr_reader :location
147
197
 
148
- attr_reader :type, :location
149
-
150
- def initialize(type, location)
151
- @type = type
198
+ # Create a new comment object with the given location.
199
+ def initialize(location)
152
200
  @location = location
153
201
  end
154
202
 
203
+ # Implement the hash pattern matching interface for Comment.
155
204
  def deconstruct_keys(keys)
156
- { type: type, location: location }
205
+ { location: location }
206
+ end
207
+
208
+ # This can only be true for inline comments.
209
+ def trailing?
210
+ false
157
211
  end
212
+ end
158
213
 
159
- # Returns true if the comment happens on the same line as other code and false if the comment is by itself
214
+ # InlineComment objects are the most common. They correspond to comments in
215
+ # the source file like this one that start with #.
216
+ class InlineComment < Comment
217
+ # Returns true if this comment happens on the same line as other code and
218
+ # false if the comment is by itself.
160
219
  def trailing?
161
- type == :inline && !location.start_line_slice.strip.empty?
220
+ !location.start_line_slice.strip.empty?
221
+ end
222
+
223
+ # Returns a string representation of this comment.
224
+ def inspect
225
+ "#<Prism::InlineComment @location=#{location.inspect}>"
162
226
  end
227
+ end
163
228
 
229
+ # EmbDocComment objects correspond to comments that are surrounded by =begin
230
+ # and =end.
231
+ class EmbDocComment < Comment
232
+ # Returns a string representation of this comment.
164
233
  def inspect
165
- "#<Prism::Comment @type=#{@type.inspect} @location=#{@location.inspect}>"
234
+ "#<Prism::EmbDocComment @location=#{location.inspect}>"
235
+ end
236
+ end
237
+
238
+ # DATAComment objects correspond to comments that are after the __END__
239
+ # keyword in a source file.
240
+ class DATAComment < Comment
241
+ # Returns a string representation of this comment.
242
+ def inspect
243
+ "#<Prism::DATAComment @location=#{location.inspect}>"
166
244
  end
167
245
  end
168
246
 
169
247
  # This represents a magic comment that was encountered during parsing.
170
248
  class MagicComment
171
- attr_reader :key_loc, :value_loc
249
+ # A Location object representing the location of the key in the source.
250
+ attr_reader :key_loc
251
+
252
+ # A Location object representing the location of the value in the source.
253
+ attr_reader :value_loc
172
254
 
255
+ # Create a new magic comment object with the given key and value locations.
173
256
  def initialize(key_loc, value_loc)
174
257
  @key_loc = key_loc
175
258
  @value_loc = value_loc
176
259
  end
177
260
 
261
+ # Returns the key of the magic comment by slicing it from the source code.
178
262
  def key
179
263
  key_loc.slice
180
264
  end
181
265
 
266
+ # Returns the value of the magic comment by slicing it from the source code.
182
267
  def value
183
268
  value_loc.slice
184
269
  end
185
270
 
271
+ # Implement the hash pattern matching interface for MagicComment.
186
272
  def deconstruct_keys(keys)
187
273
  { key_loc: key_loc, value_loc: value_loc }
188
274
  end
189
275
 
276
+ # Returns a string representation of this magic comment.
190
277
  def inspect
191
278
  "#<Prism::MagicComment @key=#{key.inspect} @value=#{value.inspect}>"
192
279
  end
@@ -194,17 +281,24 @@ module Prism
194
281
 
195
282
  # This represents an error that was encountered during parsing.
196
283
  class ParseError
197
- attr_reader :message, :location
284
+ # The message associated with this error.
285
+ attr_reader :message
198
286
 
287
+ # A Location object representing the location of this error in the source.
288
+ attr_reader :location
289
+
290
+ # Create a new error object with the given message and location.
199
291
  def initialize(message, location)
200
292
  @message = message
201
293
  @location = location
202
294
  end
203
295
 
296
+ # Implement the hash pattern matching interface for ParseError.
204
297
  def deconstruct_keys(keys)
205
298
  { message: message, location: location }
206
299
  end
207
300
 
301
+ # Returns a string representation of this error.
208
302
  def inspect
209
303
  "#<Prism::ParseError @message=#{@message.inspect} @location=#{@location.inspect}>"
210
304
  end
@@ -212,17 +306,24 @@ module Prism
212
306
 
213
307
  # This represents a warning that was encountered during parsing.
214
308
  class ParseWarning
215
- attr_reader :message, :location
309
+ # The message associated with this warning.
310
+ attr_reader :message
311
+
312
+ # A Location object representing the location of this warning in the source.
313
+ attr_reader :location
216
314
 
315
+ # Create a new warning object with the given message and location.
217
316
  def initialize(message, location)
218
317
  @message = message
219
318
  @location = location
220
319
  end
221
320
 
321
+ # Implement the hash pattern matching interface for ParseWarning.
222
322
  def deconstruct_keys(keys)
223
323
  { message: message, location: location }
224
324
  end
225
325
 
326
+ # Returns a string representation of this warning.
226
327
  def inspect
227
328
  "#<Prism::ParseWarning @message=#{@message.inspect} @location=#{@location.inspect}>"
228
329
  end
@@ -232,8 +333,27 @@ module Prism
232
333
  # the AST, any comments that were encounters, and any errors that were
233
334
  # encountered.
234
335
  class ParseResult
235
- attr_reader :value, :comments, :magic_comments, :errors, :warnings, :source
336
+ # The value that was generated by parsing. Normally this holds the AST, but
337
+ # it can sometimes how a list of tokens or other results passed back from
338
+ # the parser.
339
+ attr_reader :value
236
340
 
341
+ # The list of comments that were encountered during parsing.
342
+ attr_reader :comments
343
+
344
+ # The list of magic comments that were encountered during parsing.
345
+ attr_reader :magic_comments
346
+
347
+ # The list of errors that were generated during parsing.
348
+ attr_reader :errors
349
+
350
+ # The list of warnings that were generated during parsing.
351
+ attr_reader :warnings
352
+
353
+ # A Source instance that represents the source code that was parsed.
354
+ attr_reader :source
355
+
356
+ # Create a new parse result object with the given values.
237
357
  def initialize(value, comments, magic_comments, errors, warnings, source)
238
358
  @value = value
239
359
  @comments = comments
@@ -243,14 +363,19 @@ module Prism
243
363
  @source = source
244
364
  end
245
365
 
366
+ # Implement the hash pattern matching interface for ParseResult.
246
367
  def deconstruct_keys(keys)
247
368
  { value: value, comments: comments, magic_comments: magic_comments, errors: errors, warnings: warnings }
248
369
  end
249
370
 
371
+ # Returns true if there were no errors during parsing and false if there
372
+ # were.
250
373
  def success?
251
374
  errors.empty?
252
375
  end
253
376
 
377
+ # Returns true if there were errors during parsing and false if there were
378
+ # not.
254
379
  def failure?
255
380
  !success?
256
381
  end
@@ -258,18 +383,28 @@ module Prism
258
383
 
259
384
  # This represents a token from the Ruby source.
260
385
  class Token
261
- attr_reader :type, :value, :location
386
+ # The type of token that this token is.
387
+ attr_reader :type
388
+
389
+ # A byteslice of the source that this token represents.
390
+ attr_reader :value
391
+
392
+ # A Location object representing the location of this token in the source.
393
+ attr_reader :location
262
394
 
395
+ # Create a new token object with the given type, value, and location.
263
396
  def initialize(type, value, location)
264
397
  @type = type
265
398
  @value = value
266
399
  @location = location
267
400
  end
268
401
 
402
+ # Implement the hash pattern matching interface for Token.
269
403
  def deconstruct_keys(keys)
270
404
  { type: type, value: value, location: location }
271
405
  end
272
406
 
407
+ # Implement the pretty print interface for Token.
273
408
  def pretty_print(q)
274
409
  q.group do
275
410
  q.text(type.to_s)
@@ -284,6 +419,7 @@ module Prism
284
419
  end
285
420
  end
286
421
 
422
+ # Returns true if the given other token is equal to this token.
287
423
  def ==(other)
288
424
  other.is_a?(Token) &&
289
425
  other.type == type &&
data/lib/prism/pattern.rb CHANGED
@@ -38,6 +38,8 @@ module Prism
38
38
  # Raised when the query given to a pattern is either invalid Ruby syntax or
39
39
  # is using syntax that we don't yet support.
40
40
  class CompilationError < StandardError
41
+ # Create a new CompilationError with the given representation of the node
42
+ # that caused the error.
41
43
  def initialize(repr)
42
44
  super(<<~ERROR)
43
45
  prism was unable to compile the pattern you provided into a usable
@@ -53,18 +55,27 @@ module Prism
53
55
  end
54
56
  end
55
57
 
58
+ # The query that this pattern was initialized with.
56
59
  attr_reader :query
57
60
 
61
+ # Create a new pattern with the given query. The query should be a string
62
+ # containing a Ruby pattern matching expression.
58
63
  def initialize(query)
59
64
  @query = query
60
65
  @compiled = nil
61
66
  end
62
67
 
68
+ # Compile the query into a callable object that can be used to match against
69
+ # nodes.
63
70
  def compile
64
71
  result = Prism.parse("case nil\nin #{query}\nend")
65
72
  compile_node(result.value.statements.body.last.conditions.last.pattern)
66
73
  end
67
74
 
75
+ # Scan the given node and all of its children for nodes that match the
76
+ # pattern. If a block is given, it will be called with each node that
77
+ # matches the pattern. If no block is given, an enumerator will be returned
78
+ # that will yield each node that matches the pattern.
68
79
  def scan(root)
69
80
  return to_enum(__method__, root) unless block_given?
70
81
 
@@ -158,12 +169,12 @@ module Prism
158
169
  # in InstanceVariableReadNode[name: Symbol]
159
170
  # in { name: Symbol }
160
171
  def compile_hash_pattern_node(node)
161
- compile_error(node) unless node.kwrest.nil?
172
+ compile_error(node) if node.rest
162
173
  compiled_constant = compile_node(node.constant) if node.constant
163
174
 
164
175
  preprocessed =
165
- node.assocs.to_h do |assoc|
166
- [assoc.key.unescaped.to_sym, compile_node(assoc.value)]
176
+ node.elements.to_h do |element|
177
+ [element.key.unescaped.to_sym, compile_node(element.value)]
167
178
  end
168
179
 
169
180
  compiled_keywords = ->(other) do
@@ -35,11 +35,11 @@ module Prism
35
35
  class SexpBuilderPP < SexpBuilder
36
36
  private
37
37
 
38
- def _dispatch_event_new
38
+ def _dispatch_event_new # :nodoc:
39
39
  []
40
40
  end
41
41
 
42
- def _dispatch_event_push(list, item)
42
+ def _dispatch_event_push(list, item) # :nodoc:
43
43
  list << item
44
44
  list
45
45
  end
@@ -54,8 +54,16 @@ module Prism
54
54
  end
55
55
  end
56
56
 
57
- attr_reader :source, :lineno, :column
57
+ # The source that is being parsed.
58
+ attr_reader :source
58
59
 
60
+ # The current line number of the parser.
61
+ attr_reader :lineno
62
+
63
+ # The current column number of the parser.
64
+ attr_reader :column
65
+
66
+ # Create a new RipperCompat object with the given source.
59
67
  def initialize(source)
60
68
  @source = source
61
69
  @result = nil
@@ -67,10 +75,12 @@ module Prism
67
75
  # Public interface
68
76
  ############################################################################
69
77
 
78
+ # True if the parser encountered an error during parsing.
70
79
  def error?
71
80
  result.errors.any?
72
81
  end
73
82
 
83
+ # Parse the source and return the result.
74
84
  def parse
75
85
  result.value.accept(self) unless error?
76
86
  end
@@ -79,10 +89,13 @@ module Prism
79
89
  # Visitor methods
80
90
  ############################################################################
81
91
 
92
+ # This method is responsible for dispatching to the correct visitor method
93
+ # based on the type of the node.
82
94
  def visit(node)
83
95
  node&.accept(self)
84
96
  end
85
97
 
98
+ # Visit a CallNode node.
86
99
  def visit_call_node(node)
87
100
  if !node.opening_loc && node.arguments.arguments.length == 1
88
101
  bounds(node.receiver.location)
@@ -97,11 +110,13 @@ module Prism
97
110
  end
98
111
  end
99
112
 
113
+ # Visit an IntegerNode node.
100
114
  def visit_integer_node(node)
101
115
  bounds(node.location)
102
116
  on_int(source[node.location.start_offset...node.location.end_offset])
103
117
  end
104
118
 
119
+ # Visit a StatementsNode node.
105
120
  def visit_statements_node(node)
106
121
  bounds(node.location)
107
122
  node.body.inject(on_stmts_new) do |stmts, stmt|
@@ -109,6 +124,7 @@ module Prism
109
124
  end
110
125
  end
111
126
 
127
+ # Visit a token found during parsing.
112
128
  def visit_token(node)
113
129
  bounds(node.location)
114
130
 
@@ -122,6 +138,7 @@ module Prism
122
138
  end
123
139
  end
124
140
 
141
+ # Visit a ProgramNode node.
125
142
  def visit_program_node(node)
126
143
  bounds(node.location)
127
144
  on_program(visit(node.statements))
@@ -155,17 +172,18 @@ module Prism
155
172
  @column = start_offset - (source.rindex("\n", start_offset) || 0)
156
173
  end
157
174
 
175
+ # Lazily initialize the parse result.
158
176
  def result
159
177
  @result ||= Prism.parse(source)
160
178
  end
161
179
 
162
- def _dispatch0; end
163
- def _dispatch1(_); end
164
- def _dispatch2(_, _); end
165
- def _dispatch3(_, _, _); end
166
- def _dispatch4(_, _, _, _); end
167
- def _dispatch5(_, _, _, _, _); end
168
- def _dispatch7(_, _, _, _, _, _, _); end
180
+ def _dispatch0; end # :nodoc:
181
+ def _dispatch1(_); end # :nodoc:
182
+ def _dispatch2(_, _); end # :nodoc:
183
+ def _dispatch3(_, _, _); end # :nodoc:
184
+ def _dispatch4(_, _, _, _); end # :nodoc:
185
+ def _dispatch5(_, _, _, _, _); end # :nodoc:
186
+ def _dispatch7(_, _, _, _, _, _, _); end # :nodoc:
169
187
 
170
188
  (Ripper::SCANNER_EVENT_TABLE.merge(Ripper::PARSER_EVENT_TABLE)).each do |event, arity|
171
189
  alias_method :"on_#{event}", :"_dispatch#{arity}"