jruby-prism-parser 0.23.0.pre.SNAPSHOT-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (110) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +401 -0
  3. data/CODE_OF_CONDUCT.md +76 -0
  4. data/CONTRIBUTING.md +62 -0
  5. data/LICENSE.md +7 -0
  6. data/Makefile +101 -0
  7. data/README.md +98 -0
  8. data/config.yml +2902 -0
  9. data/docs/build_system.md +91 -0
  10. data/docs/configuration.md +64 -0
  11. data/docs/cruby_compilation.md +27 -0
  12. data/docs/design.md +53 -0
  13. data/docs/encoding.md +121 -0
  14. data/docs/fuzzing.md +88 -0
  15. data/docs/heredocs.md +36 -0
  16. data/docs/javascript.md +118 -0
  17. data/docs/local_variable_depth.md +229 -0
  18. data/docs/mapping.md +117 -0
  19. data/docs/parser_translation.md +34 -0
  20. data/docs/parsing_rules.md +19 -0
  21. data/docs/releasing.md +98 -0
  22. data/docs/ripper.md +36 -0
  23. data/docs/ruby_api.md +43 -0
  24. data/docs/ruby_parser_translation.md +19 -0
  25. data/docs/serialization.md +209 -0
  26. data/docs/testing.md +55 -0
  27. data/ext/prism/api_node.c +5098 -0
  28. data/ext/prism/api_pack.c +267 -0
  29. data/ext/prism/extconf.rb +110 -0
  30. data/ext/prism/extension.c +1155 -0
  31. data/ext/prism/extension.h +18 -0
  32. data/include/prism/ast.h +5807 -0
  33. data/include/prism/defines.h +102 -0
  34. data/include/prism/diagnostic.h +339 -0
  35. data/include/prism/encoding.h +265 -0
  36. data/include/prism/node.h +57 -0
  37. data/include/prism/options.h +230 -0
  38. data/include/prism/pack.h +152 -0
  39. data/include/prism/parser.h +732 -0
  40. data/include/prism/prettyprint.h +26 -0
  41. data/include/prism/regexp.h +33 -0
  42. data/include/prism/util/pm_buffer.h +155 -0
  43. data/include/prism/util/pm_char.h +205 -0
  44. data/include/prism/util/pm_constant_pool.h +209 -0
  45. data/include/prism/util/pm_list.h +97 -0
  46. data/include/prism/util/pm_memchr.h +29 -0
  47. data/include/prism/util/pm_newline_list.h +93 -0
  48. data/include/prism/util/pm_state_stack.h +42 -0
  49. data/include/prism/util/pm_string.h +150 -0
  50. data/include/prism/util/pm_string_list.h +44 -0
  51. data/include/prism/util/pm_strncasecmp.h +32 -0
  52. data/include/prism/util/pm_strpbrk.h +46 -0
  53. data/include/prism/version.h +29 -0
  54. data/include/prism.h +289 -0
  55. data/jruby-prism.jar +0 -0
  56. data/lib/prism/compiler.rb +486 -0
  57. data/lib/prism/debug.rb +206 -0
  58. data/lib/prism/desugar_compiler.rb +207 -0
  59. data/lib/prism/dispatcher.rb +2150 -0
  60. data/lib/prism/dot_visitor.rb +4634 -0
  61. data/lib/prism/dsl.rb +785 -0
  62. data/lib/prism/ffi.rb +346 -0
  63. data/lib/prism/lex_compat.rb +908 -0
  64. data/lib/prism/mutation_compiler.rb +753 -0
  65. data/lib/prism/node.rb +17864 -0
  66. data/lib/prism/node_ext.rb +212 -0
  67. data/lib/prism/node_inspector.rb +68 -0
  68. data/lib/prism/pack.rb +224 -0
  69. data/lib/prism/parse_result/comments.rb +177 -0
  70. data/lib/prism/parse_result/newlines.rb +64 -0
  71. data/lib/prism/parse_result.rb +498 -0
  72. data/lib/prism/pattern.rb +250 -0
  73. data/lib/prism/serialize.rb +1354 -0
  74. data/lib/prism/translation/parser/compiler.rb +1838 -0
  75. data/lib/prism/translation/parser/lexer.rb +335 -0
  76. data/lib/prism/translation/parser/rubocop.rb +37 -0
  77. data/lib/prism/translation/parser.rb +178 -0
  78. data/lib/prism/translation/ripper.rb +577 -0
  79. data/lib/prism/translation/ruby_parser.rb +1521 -0
  80. data/lib/prism/translation.rb +11 -0
  81. data/lib/prism/version.rb +3 -0
  82. data/lib/prism/visitor.rb +495 -0
  83. data/lib/prism.rb +99 -0
  84. data/prism.gemspec +135 -0
  85. data/rbi/prism.rbi +7767 -0
  86. data/rbi/prism_static.rbi +207 -0
  87. data/sig/prism.rbs +4773 -0
  88. data/sig/prism_static.rbs +201 -0
  89. data/src/diagnostic.c +400 -0
  90. data/src/encoding.c +5132 -0
  91. data/src/node.c +2786 -0
  92. data/src/options.c +213 -0
  93. data/src/pack.c +493 -0
  94. data/src/prettyprint.c +8881 -0
  95. data/src/prism.c +18406 -0
  96. data/src/regexp.c +638 -0
  97. data/src/serialize.c +1554 -0
  98. data/src/token_type.c +700 -0
  99. data/src/util/pm_buffer.c +190 -0
  100. data/src/util/pm_char.c +318 -0
  101. data/src/util/pm_constant_pool.c +322 -0
  102. data/src/util/pm_list.c +49 -0
  103. data/src/util/pm_memchr.c +35 -0
  104. data/src/util/pm_newline_list.c +84 -0
  105. data/src/util/pm_state_stack.c +25 -0
  106. data/src/util/pm_string.c +203 -0
  107. data/src/util/pm_string_list.c +28 -0
  108. data/src/util/pm_strncasecmp.c +24 -0
  109. data/src/util/pm_strpbrk.c +180 -0
  110. metadata +156 -0
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Prism
4
+ class ParseResult
5
+ # The :line tracepoint event gets fired whenever the Ruby VM encounters an
6
+ # expression on a new line. The types of expressions that can trigger this
7
+ # event are:
8
+ #
9
+ # * if statements
10
+ # * unless statements
11
+ # * nodes that are children of statements lists
12
+ #
13
+ # In order to keep track of the newlines, we have a list of offsets that
14
+ # come back from the parser. We assign these offsets to the first nodes that
15
+ # we find in the tree that are on those lines.
16
+ #
17
+ # Note that the logic in this file should be kept in sync with the Java
18
+ # MarkNewlinesVisitor, since that visitor is responsible for marking the
19
+ # newlines for JRuby/TruffleRuby.
20
+ class Newlines < Visitor
21
+ # Create a new Newlines visitor with the given newline offsets.
22
+ def initialize(newline_marked)
23
+ @newline_marked = newline_marked
24
+ end
25
+
26
+ # Permit block/lambda nodes to mark newlines within themselves.
27
+ def visit_block_node(node)
28
+ old_newline_marked = @newline_marked
29
+ @newline_marked = Array.new(old_newline_marked.size, false)
30
+
31
+ begin
32
+ super(node)
33
+ ensure
34
+ @newline_marked = old_newline_marked
35
+ end
36
+ end
37
+
38
+ alias_method :visit_lambda_node, :visit_block_node
39
+
40
+ # Mark if/unless nodes as newlines.
41
+ def visit_if_node(node)
42
+ node.set_newline_flag(@newline_marked)
43
+ super(node)
44
+ end
45
+
46
+ alias_method :visit_unless_node, :visit_if_node
47
+
48
+ # Permit statements lists to mark newlines within themselves.
49
+ def visit_statements_node(node)
50
+ node.body.each do |child|
51
+ child.set_newline_flag(@newline_marked)
52
+ end
53
+ super(node)
54
+ end
55
+ end
56
+
57
+ private_constant :Newlines
58
+
59
+ # Walk the tree and mark nodes that are on a new line.
60
+ def mark_newlines!
61
+ value.accept(Newlines.new(Array.new(1 + source.offsets.size, false)))
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,498 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Prism
4
+ # This represents a source of Ruby code that has been parsed. It is used in
5
+ # conjunction with locations to allow them to resolve line numbers and source
6
+ # ranges.
7
+ class Source
8
+ # The source code that this source object represents.
9
+ attr_reader :source
10
+
11
+ # The line number where this source starts.
12
+ attr_reader :start_line
13
+
14
+ # The list of newline byte offsets in the source code.
15
+ attr_reader :offsets
16
+
17
+ # Create a new source object with the given source code.
18
+ def initialize(source, start_line = 1, offsets = [])
19
+ @source = source
20
+ @start_line = start_line # set after parsing is done
21
+ @offsets = offsets # set after parsing is done
22
+ end
23
+
24
+ # Perform a byteslice on the source code using the given byte offset and
25
+ # byte length.
26
+ def slice(byte_offset, length)
27
+ source.byteslice(byte_offset, length)
28
+ end
29
+
30
+ # Binary search through the offsets to find the line number for the given
31
+ # byte offset.
32
+ def line(byte_offset)
33
+ start_line + find_line(byte_offset)
34
+ end
35
+
36
+ # Return the byte offset of the start of the line corresponding to the given
37
+ # byte offset.
38
+ def line_start(byte_offset)
39
+ offsets[find_line(byte_offset)]
40
+ end
41
+
42
+ # Return the column number for the given byte offset.
43
+ def column(byte_offset)
44
+ byte_offset - line_start(byte_offset)
45
+ end
46
+
47
+ # Return the character offset for the given byte offset.
48
+ def character_offset(byte_offset)
49
+ source.byteslice(0, byte_offset).length
50
+ end
51
+
52
+ # Return the column number in characters for the given byte offset.
53
+ def character_column(byte_offset)
54
+ character_offset(byte_offset) - character_offset(line_start(byte_offset))
55
+ end
56
+
57
+ # Returns the offset from the start of the file for the given byte offset
58
+ # counting in code units for the given encoding.
59
+ #
60
+ # This method is tested with UTF-8, UTF-16, and UTF-32. If there is the
61
+ # concept of code units that differs from the number of characters in other
62
+ # encodings, it is not captured here.
63
+ def code_units_offset(byte_offset, encoding)
64
+ byteslice = source.byteslice(0, byte_offset).encode(encoding)
65
+ (encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE) ? (byteslice.bytesize / 2) : byteslice.length
66
+ end
67
+
68
+ # Returns the column number in code units for the given encoding for the
69
+ # given byte offset.
70
+ def code_units_column(byte_offset, encoding)
71
+ code_units_offset(byte_offset, encoding) - code_units_offset(line_start(byte_offset), encoding)
72
+ end
73
+
74
+ private
75
+
76
+ # Binary search through the offsets to find the line number for the given
77
+ # byte offset.
78
+ def find_line(byte_offset)
79
+ left = 0
80
+ right = offsets.length - 1
81
+
82
+ while left <= right
83
+ mid = left + (right - left) / 2
84
+ return mid if offsets[mid] == byte_offset
85
+
86
+ if offsets[mid] < byte_offset
87
+ left = mid + 1
88
+ else
89
+ right = mid - 1
90
+ end
91
+ end
92
+
93
+ left - 1
94
+ end
95
+ end
96
+
97
+ # This represents a location in the source.
98
+ class Location
99
+ # A Source object that is used to determine more information from the given
100
+ # offset and length.
101
+ attr_reader :source
102
+ protected :source
103
+
104
+ # The byte offset from the beginning of the source where this location
105
+ # starts.
106
+ attr_reader :start_offset
107
+
108
+ # The length of this location in bytes.
109
+ attr_reader :length
110
+
111
+ # The list of comments attached to this location
112
+ attr_reader :comments
113
+
114
+ # Create a new location object with the given source, start byte offset, and
115
+ # byte length.
116
+ def initialize(source, start_offset, length)
117
+ @source = source
118
+ @start_offset = start_offset
119
+ @length = length
120
+ @comments = []
121
+ end
122
+
123
+ # Create a new location object with the given options.
124
+ def copy(**options)
125
+ Location.new(
126
+ options.fetch(:source) { source },
127
+ options.fetch(:start_offset) { start_offset },
128
+ options.fetch(:length) { length }
129
+ )
130
+ end
131
+
132
+ # Returns a string representation of this location.
133
+ def inspect
134
+ "#<Prism::Location @start_offset=#{@start_offset} @length=#{@length} start_line=#{start_line}>"
135
+ end
136
+
137
+ # The source code that this location represents.
138
+ def slice
139
+ source.slice(start_offset, length)
140
+ end
141
+
142
+ # The character offset from the beginning of the source where this location
143
+ # starts.
144
+ def start_character_offset
145
+ source.character_offset(start_offset)
146
+ end
147
+
148
+ # The offset from the start of the file in code units of the given encoding.
149
+ def start_code_units_offset(encoding = Encoding::UTF_16LE)
150
+ source.code_units_offset(start_offset, encoding)
151
+ end
152
+
153
+ # The byte offset from the beginning of the source where this location ends.
154
+ def end_offset
155
+ start_offset + length
156
+ end
157
+
158
+ # The character offset from the beginning of the source where this location
159
+ # ends.
160
+ def end_character_offset
161
+ source.character_offset(end_offset)
162
+ end
163
+
164
+ # The offset from the start of the file in code units of the given encoding.
165
+ def end_code_units_offset(encoding = Encoding::UTF_16LE)
166
+ source.code_units_offset(end_offset, encoding)
167
+ end
168
+
169
+ # The line number where this location starts.
170
+ def start_line
171
+ source.line(start_offset)
172
+ end
173
+
174
+ # The content of the line where this location starts before this location.
175
+ def start_line_slice
176
+ offset = source.line_start(start_offset)
177
+ source.slice(offset, start_offset - offset)
178
+ end
179
+
180
+ # The line number where this location ends.
181
+ def end_line
182
+ source.line(end_offset)
183
+ end
184
+
185
+ # The column number in bytes where this location starts from the start of
186
+ # the line.
187
+ def start_column
188
+ source.column(start_offset)
189
+ end
190
+
191
+ # The column number in characters where this location ends from the start of
192
+ # the line.
193
+ def start_character_column
194
+ source.character_column(start_offset)
195
+ end
196
+
197
+ # The column number in code units of the given encoding where this location
198
+ # starts from the start of the line.
199
+ def start_code_units_column(encoding = Encoding::UTF_16LE)
200
+ source.code_units_column(start_offset, encoding)
201
+ end
202
+
203
+ # The column number in bytes where this location ends from the start of the
204
+ # line.
205
+ def end_column
206
+ source.column(end_offset)
207
+ end
208
+
209
+ # The column number in characters where this location ends from the start of
210
+ # the line.
211
+ def end_character_column
212
+ source.character_column(end_offset)
213
+ end
214
+
215
+ # The column number in code units of the given encoding where this location
216
+ # ends from the start of the line.
217
+ def end_code_units_column(encoding = Encoding::UTF_16LE)
218
+ source.code_units_column(end_offset, encoding)
219
+ end
220
+
221
+ # Implement the hash pattern matching interface for Location.
222
+ def deconstruct_keys(keys)
223
+ { start_offset: start_offset, end_offset: end_offset }
224
+ end
225
+
226
+ # Implement the pretty print interface for Location.
227
+ def pretty_print(q)
228
+ q.text("(#{start_line},#{start_column})-(#{end_line},#{end_column})")
229
+ end
230
+
231
+ # Returns true if the given other location is equal to this location.
232
+ def ==(other)
233
+ other.is_a?(Location) &&
234
+ other.start_offset == start_offset &&
235
+ other.end_offset == end_offset
236
+ end
237
+
238
+ # Returns a new location that stretches from this location to the given
239
+ # other location. Raises an error if this location is not before the other
240
+ # location or if they don't share the same source.
241
+ def join(other)
242
+ raise "Incompatible sources" if source != other.source
243
+ raise "Incompatible locations" if start_offset > other.start_offset
244
+
245
+ Location.new(source, start_offset, other.end_offset - start_offset)
246
+ end
247
+
248
+ # Returns a null location that does not correspond to a source and points to
249
+ # the beginning of the file. Useful for when you want a location object but
250
+ # do not care where it points.
251
+ def self.null
252
+ new(nil, 0, 0)
253
+ end
254
+ end
255
+
256
+ # This represents a comment that was encountered during parsing. It is the
257
+ # base class for all comment types.
258
+ class Comment
259
+ # The location of this comment in the source.
260
+ attr_reader :location
261
+
262
+ # Create a new comment object with the given location.
263
+ def initialize(location)
264
+ @location = location
265
+ end
266
+
267
+ # Implement the hash pattern matching interface for Comment.
268
+ def deconstruct_keys(keys)
269
+ { location: location }
270
+ end
271
+ end
272
+
273
+ # InlineComment objects are the most common. They correspond to comments in
274
+ # the source file like this one that start with #.
275
+ class InlineComment < Comment
276
+ # Returns true if this comment happens on the same line as other code and
277
+ # false if the comment is by itself.
278
+ def trailing?
279
+ !location.start_line_slice.strip.empty?
280
+ end
281
+
282
+ # Returns a string representation of this comment.
283
+ def inspect
284
+ "#<Prism::InlineComment @location=#{location.inspect}>"
285
+ end
286
+ end
287
+
288
+ # EmbDocComment objects correspond to comments that are surrounded by =begin
289
+ # and =end.
290
+ class EmbDocComment < Comment
291
+ # This can only be true for inline comments.
292
+ def trailing?
293
+ false
294
+ end
295
+
296
+ # Returns a string representation of this comment.
297
+ def inspect
298
+ "#<Prism::EmbDocComment @location=#{location.inspect}>"
299
+ end
300
+ end
301
+
302
+ # This represents a magic comment that was encountered during parsing.
303
+ class MagicComment
304
+ # A Location object representing the location of the key in the source.
305
+ attr_reader :key_loc
306
+
307
+ # A Location object representing the location of the value in the source.
308
+ attr_reader :value_loc
309
+
310
+ # Create a new magic comment object with the given key and value locations.
311
+ def initialize(key_loc, value_loc)
312
+ @key_loc = key_loc
313
+ @value_loc = value_loc
314
+ end
315
+
316
+ # Returns the key of the magic comment by slicing it from the source code.
317
+ def key
318
+ key_loc.slice
319
+ end
320
+
321
+ # Returns the value of the magic comment by slicing it from the source code.
322
+ def value
323
+ value_loc.slice
324
+ end
325
+
326
+ # Implement the hash pattern matching interface for MagicComment.
327
+ def deconstruct_keys(keys)
328
+ { key_loc: key_loc, value_loc: value_loc }
329
+ end
330
+
331
+ # Returns a string representation of this magic comment.
332
+ def inspect
333
+ "#<Prism::MagicComment @key=#{key.inspect} @value=#{value.inspect}>"
334
+ end
335
+ end
336
+
337
+ # This represents an error that was encountered during parsing.
338
+ class ParseError
339
+ # The message associated with this error.
340
+ attr_reader :message
341
+
342
+ # A Location object representing the location of this error in the source.
343
+ attr_reader :location
344
+
345
+ # The level of this error.
346
+ attr_reader :level
347
+
348
+ # Create a new error object with the given message and location.
349
+ def initialize(message, location, level)
350
+ @message = message
351
+ @location = location
352
+ @level = level
353
+ end
354
+
355
+ # Implement the hash pattern matching interface for ParseError.
356
+ def deconstruct_keys(keys)
357
+ { message: message, location: location, level: level }
358
+ end
359
+
360
+ # Returns a string representation of this error.
361
+ def inspect
362
+ "#<Prism::ParseError @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>"
363
+ end
364
+ end
365
+
366
+ # This represents a warning that was encountered during parsing.
367
+ class ParseWarning
368
+ # The message associated with this warning.
369
+ attr_reader :message
370
+
371
+ # A Location object representing the location of this warning in the source.
372
+ attr_reader :location
373
+
374
+ # The level of this warning.
375
+ attr_reader :level
376
+
377
+ # Create a new warning object with the given message and location.
378
+ def initialize(message, location, level)
379
+ @message = message
380
+ @location = location
381
+ @level = level
382
+ end
383
+
384
+ # Implement the hash pattern matching interface for ParseWarning.
385
+ def deconstruct_keys(keys)
386
+ { message: message, location: location, level: level }
387
+ end
388
+
389
+ # Returns a string representation of this warning.
390
+ def inspect
391
+ "#<Prism::ParseWarning @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>"
392
+ end
393
+ end
394
+
395
+ # This represents the result of a call to ::parse or ::parse_file. It contains
396
+ # the AST, any comments that were encounters, and any errors that were
397
+ # encountered.
398
+ class ParseResult
399
+ # The value that was generated by parsing. Normally this holds the AST, but
400
+ # it can sometimes how a list of tokens or other results passed back from
401
+ # the parser.
402
+ attr_reader :value
403
+
404
+ # The list of comments that were encountered during parsing.
405
+ attr_reader :comments
406
+
407
+ # The list of magic comments that were encountered during parsing.
408
+ attr_reader :magic_comments
409
+
410
+ # An optional location that represents the location of the __END__ marker
411
+ # and the rest of the content of the file. This content is loaded into the
412
+ # DATA constant when the file being parsed is the main file being executed.
413
+ attr_reader :data_loc
414
+
415
+ # The list of errors that were generated during parsing.
416
+ attr_reader :errors
417
+
418
+ # The list of warnings that were generated during parsing.
419
+ attr_reader :warnings
420
+
421
+ # A Source instance that represents the source code that was parsed.
422
+ attr_reader :source
423
+
424
+ # Create a new parse result object with the given values.
425
+ def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
426
+ @value = value
427
+ @comments = comments
428
+ @magic_comments = magic_comments
429
+ @data_loc = data_loc
430
+ @errors = errors
431
+ @warnings = warnings
432
+ @source = source
433
+ end
434
+
435
+ # Implement the hash pattern matching interface for ParseResult.
436
+ def deconstruct_keys(keys)
437
+ { value: value, comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings }
438
+ end
439
+
440
+ # Returns true if there were no errors during parsing and false if there
441
+ # were.
442
+ def success?
443
+ errors.empty?
444
+ end
445
+
446
+ # Returns true if there were errors during parsing and false if there were
447
+ # not.
448
+ def failure?
449
+ !success?
450
+ end
451
+ end
452
+
453
+ # This represents a token from the Ruby source.
454
+ class Token
455
+ # The type of token that this token is.
456
+ attr_reader :type
457
+
458
+ # A byteslice of the source that this token represents.
459
+ attr_reader :value
460
+
461
+ # A Location object representing the location of this token in the source.
462
+ attr_reader :location
463
+
464
+ # Create a new token object with the given type, value, and location.
465
+ def initialize(type, value, location)
466
+ @type = type
467
+ @value = value
468
+ @location = location
469
+ end
470
+
471
+ # Implement the hash pattern matching interface for Token.
472
+ def deconstruct_keys(keys)
473
+ { type: type, value: value, location: location }
474
+ end
475
+
476
+ # Implement the pretty print interface for Token.
477
+ def pretty_print(q)
478
+ q.group do
479
+ q.text(type.to_s)
480
+ self.location.pretty_print(q)
481
+ q.text("(")
482
+ q.nest(2) do
483
+ q.breakable("")
484
+ q.pp(value)
485
+ end
486
+ q.breakable("")
487
+ q.text(")")
488
+ end
489
+ end
490
+
491
+ # Returns true if the given other token is equal to this token.
492
+ def ==(other)
493
+ other.is_a?(Token) &&
494
+ other.type == type &&
495
+ other.value == value
496
+ end
497
+ end
498
+ end