jruby-prism-parser 0.23.0.pre.SNAPSHOT-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +401 -0
  3. data/CODE_OF_CONDUCT.md +76 -0
  4. data/CONTRIBUTING.md +62 -0
  5. data/LICENSE.md +7 -0
  6. data/Makefile +101 -0
  7. data/README.md +98 -0
  8. data/config.yml +2902 -0
  9. data/docs/build_system.md +91 -0
  10. data/docs/configuration.md +64 -0
  11. data/docs/cruby_compilation.md +27 -0
  12. data/docs/design.md +53 -0
  13. data/docs/encoding.md +121 -0
  14. data/docs/fuzzing.md +88 -0
  15. data/docs/heredocs.md +36 -0
  16. data/docs/javascript.md +118 -0
  17. data/docs/local_variable_depth.md +229 -0
  18. data/docs/mapping.md +117 -0
  19. data/docs/parser_translation.md +34 -0
  20. data/docs/parsing_rules.md +19 -0
  21. data/docs/releasing.md +98 -0
  22. data/docs/ripper.md +36 -0
  23. data/docs/ruby_api.md +43 -0
  24. data/docs/ruby_parser_translation.md +19 -0
  25. data/docs/serialization.md +209 -0
  26. data/docs/testing.md +55 -0
  27. data/ext/prism/api_node.c +5098 -0
  28. data/ext/prism/api_pack.c +267 -0
  29. data/ext/prism/extconf.rb +110 -0
  30. data/ext/prism/extension.c +1155 -0
  31. data/ext/prism/extension.h +18 -0
  32. data/include/prism/ast.h +5807 -0
  33. data/include/prism/defines.h +102 -0
  34. data/include/prism/diagnostic.h +339 -0
  35. data/include/prism/encoding.h +265 -0
  36. data/include/prism/node.h +57 -0
  37. data/include/prism/options.h +230 -0
  38. data/include/prism/pack.h +152 -0
  39. data/include/prism/parser.h +732 -0
  40. data/include/prism/prettyprint.h +26 -0
  41. data/include/prism/regexp.h +33 -0
  42. data/include/prism/util/pm_buffer.h +155 -0
  43. data/include/prism/util/pm_char.h +205 -0
  44. data/include/prism/util/pm_constant_pool.h +209 -0
  45. data/include/prism/util/pm_list.h +97 -0
  46. data/include/prism/util/pm_memchr.h +29 -0
  47. data/include/prism/util/pm_newline_list.h +93 -0
  48. data/include/prism/util/pm_state_stack.h +42 -0
  49. data/include/prism/util/pm_string.h +150 -0
  50. data/include/prism/util/pm_string_list.h +44 -0
  51. data/include/prism/util/pm_strncasecmp.h +32 -0
  52. data/include/prism/util/pm_strpbrk.h +46 -0
  53. data/include/prism/version.h +29 -0
  54. data/include/prism.h +289 -0
  55. data/jruby-prism.jar +0 -0
  56. data/lib/prism/compiler.rb +486 -0
  57. data/lib/prism/debug.rb +206 -0
  58. data/lib/prism/desugar_compiler.rb +207 -0
  59. data/lib/prism/dispatcher.rb +2150 -0
  60. data/lib/prism/dot_visitor.rb +4634 -0
  61. data/lib/prism/dsl.rb +785 -0
  62. data/lib/prism/ffi.rb +346 -0
  63. data/lib/prism/lex_compat.rb +908 -0
  64. data/lib/prism/mutation_compiler.rb +753 -0
  65. data/lib/prism/node.rb +17864 -0
  66. data/lib/prism/node_ext.rb +212 -0
  67. data/lib/prism/node_inspector.rb +68 -0
  68. data/lib/prism/pack.rb +224 -0
  69. data/lib/prism/parse_result/comments.rb +177 -0
  70. data/lib/prism/parse_result/newlines.rb +64 -0
  71. data/lib/prism/parse_result.rb +498 -0
  72. data/lib/prism/pattern.rb +250 -0
  73. data/lib/prism/serialize.rb +1354 -0
  74. data/lib/prism/translation/parser/compiler.rb +1838 -0
  75. data/lib/prism/translation/parser/lexer.rb +335 -0
  76. data/lib/prism/translation/parser/rubocop.rb +37 -0
  77. data/lib/prism/translation/parser.rb +178 -0
  78. data/lib/prism/translation/ripper.rb +577 -0
  79. data/lib/prism/translation/ruby_parser.rb +1521 -0
  80. data/lib/prism/translation.rb +11 -0
  81. data/lib/prism/version.rb +3 -0
  82. data/lib/prism/visitor.rb +495 -0
  83. data/lib/prism.rb +99 -0
  84. data/prism.gemspec +135 -0
  85. data/rbi/prism.rbi +7767 -0
  86. data/rbi/prism_static.rbi +207 -0
  87. data/sig/prism.rbs +4773 -0
  88. data/sig/prism_static.rbs +201 -0
  89. data/src/diagnostic.c +400 -0
  90. data/src/encoding.c +5132 -0
  91. data/src/node.c +2786 -0
  92. data/src/options.c +213 -0
  93. data/src/pack.c +493 -0
  94. data/src/prettyprint.c +8881 -0
  95. data/src/prism.c +18406 -0
  96. data/src/regexp.c +638 -0
  97. data/src/serialize.c +1554 -0
  98. data/src/token_type.c +700 -0
  99. data/src/util/pm_buffer.c +190 -0
  100. data/src/util/pm_char.c +318 -0
  101. data/src/util/pm_constant_pool.c +322 -0
  102. data/src/util/pm_list.c +49 -0
  103. data/src/util/pm_memchr.c +35 -0
  104. data/src/util/pm_newline_list.c +84 -0
  105. data/src/util/pm_state_stack.c +25 -0
  106. data/src/util/pm_string.c +203 -0
  107. data/src/util/pm_string_list.c +28 -0
  108. data/src/util/pm_strncasecmp.c +24 -0
  109. data/src/util/pm_strpbrk.c +180 -0
  110. metadata +156 -0
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Prism
4
+ class ParseResult
5
+ # The :line tracepoint event gets fired whenever the Ruby VM encounters an
6
+ # expression on a new line. The types of expressions that can trigger this
7
+ # event are:
8
+ #
9
+ # * if statements
10
+ # * unless statements
11
+ # * nodes that are children of statements lists
12
+ #
13
+ # In order to keep track of the newlines, we have a list of offsets that
14
+ # come back from the parser. We assign these offsets to the first nodes that
15
+ # we find in the tree that are on those lines.
16
+ #
17
+ # Note that the logic in this file should be kept in sync with the Java
18
+ # MarkNewlinesVisitor, since that visitor is responsible for marking the
19
+ # newlines for JRuby/TruffleRuby.
20
+ class Newlines < Visitor
21
+ # Create a new Newlines visitor with the given newline offsets.
22
+ def initialize(newline_marked)
23
+ @newline_marked = newline_marked
24
+ end
25
+
26
+ # Permit block/lambda nodes to mark newlines within themselves.
27
+ def visit_block_node(node)
28
+ old_newline_marked = @newline_marked
29
+ @newline_marked = Array.new(old_newline_marked.size, false)
30
+
31
+ begin
32
+ super(node)
33
+ ensure
34
+ @newline_marked = old_newline_marked
35
+ end
36
+ end
37
+
38
+ alias_method :visit_lambda_node, :visit_block_node
39
+
40
+ # Mark if/unless nodes as newlines.
41
+ def visit_if_node(node)
42
+ node.set_newline_flag(@newline_marked)
43
+ super(node)
44
+ end
45
+
46
+ alias_method :visit_unless_node, :visit_if_node
47
+
48
+ # Permit statements lists to mark newlines within themselves.
49
+ def visit_statements_node(node)
50
+ node.body.each do |child|
51
+ child.set_newline_flag(@newline_marked)
52
+ end
53
+ super(node)
54
+ end
55
+ end
56
+
57
+ private_constant :Newlines
58
+
59
+ # Walk the tree and mark nodes that are on a new line.
60
+ def mark_newlines!
61
+ value.accept(Newlines.new(Array.new(1 + source.offsets.size, false)))
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,498 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Prism
4
+ # This represents a source of Ruby code that has been parsed. It is used in
5
+ # conjunction with locations to allow them to resolve line numbers and source
6
+ # ranges.
7
+ class Source
8
+ # The source code that this source object represents.
9
+ attr_reader :source
10
+
11
+ # The line number where this source starts.
12
+ attr_reader :start_line
13
+
14
+ # The list of newline byte offsets in the source code.
15
+ attr_reader :offsets
16
+
17
+ # Create a new source object with the given source code.
18
+ def initialize(source, start_line = 1, offsets = [])
19
+ @source = source
20
+ @start_line = start_line # set after parsing is done
21
+ @offsets = offsets # set after parsing is done
22
+ end
23
+
24
+ # Perform a byteslice on the source code using the given byte offset and
25
+ # byte length.
26
+ def slice(byte_offset, length)
27
+ source.byteslice(byte_offset, length)
28
+ end
29
+
30
+ # Binary search through the offsets to find the line number for the given
31
+ # byte offset.
32
+ def line(byte_offset)
33
+ start_line + find_line(byte_offset)
34
+ end
35
+
36
+ # Return the byte offset of the start of the line corresponding to the given
37
+ # byte offset.
38
+ def line_start(byte_offset)
39
+ offsets[find_line(byte_offset)]
40
+ end
41
+
42
+ # Return the column number for the given byte offset.
43
+ def column(byte_offset)
44
+ byte_offset - line_start(byte_offset)
45
+ end
46
+
47
+ # Return the character offset for the given byte offset.
48
+ def character_offset(byte_offset)
49
+ source.byteslice(0, byte_offset).length
50
+ end
51
+
52
+ # Return the column number in characters for the given byte offset.
53
+ def character_column(byte_offset)
54
+ character_offset(byte_offset) - character_offset(line_start(byte_offset))
55
+ end
56
+
57
+ # Returns the offset from the start of the file for the given byte offset
58
+ # counting in code units for the given encoding.
59
+ #
60
+ # This method is tested with UTF-8, UTF-16, and UTF-32. If there is the
61
+ # concept of code units that differs from the number of characters in other
62
+ # encodings, it is not captured here.
63
+ def code_units_offset(byte_offset, encoding)
64
+ byteslice = source.byteslice(0, byte_offset).encode(encoding)
65
+ (encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE) ? (byteslice.bytesize / 2) : byteslice.length
66
+ end
67
+
68
+ # Returns the column number in code units for the given encoding for the
69
+ # given byte offset.
70
+ def code_units_column(byte_offset, encoding)
71
+ code_units_offset(byte_offset, encoding) - code_units_offset(line_start(byte_offset), encoding)
72
+ end
73
+
74
+ private
75
+
76
+ # Binary search through the offsets to find the line number for the given
77
+ # byte offset.
78
+ def find_line(byte_offset)
79
+ left = 0
80
+ right = offsets.length - 1
81
+
82
+ while left <= right
83
+ mid = left + (right - left) / 2
84
+ return mid if offsets[mid] == byte_offset
85
+
86
+ if offsets[mid] < byte_offset
87
+ left = mid + 1
88
+ else
89
+ right = mid - 1
90
+ end
91
+ end
92
+
93
+ left - 1
94
+ end
95
+ end
96
+
97
+ # This represents a location in the source.
98
+ class Location
99
+ # A Source object that is used to determine more information from the given
100
+ # offset and length.
101
+ attr_reader :source
102
+ protected :source
103
+
104
+ # The byte offset from the beginning of the source where this location
105
+ # starts.
106
+ attr_reader :start_offset
107
+
108
+ # The length of this location in bytes.
109
+ attr_reader :length
110
+
111
+ # The list of comments attached to this location
112
+ attr_reader :comments
113
+
114
+ # Create a new location object with the given source, start byte offset, and
115
+ # byte length.
116
+ def initialize(source, start_offset, length)
117
+ @source = source
118
+ @start_offset = start_offset
119
+ @length = length
120
+ @comments = []
121
+ end
122
+
123
+ # Create a new location object with the given options.
124
+ def copy(**options)
125
+ Location.new(
126
+ options.fetch(:source) { source },
127
+ options.fetch(:start_offset) { start_offset },
128
+ options.fetch(:length) { length }
129
+ )
130
+ end
131
+
132
+ # Returns a string representation of this location.
133
+ def inspect
134
+ "#<Prism::Location @start_offset=#{@start_offset} @length=#{@length} start_line=#{start_line}>"
135
+ end
136
+
137
+ # The source code that this location represents.
138
+ def slice
139
+ source.slice(start_offset, length)
140
+ end
141
+
142
+ # The character offset from the beginning of the source where this location
143
+ # starts.
144
+ def start_character_offset
145
+ source.character_offset(start_offset)
146
+ end
147
+
148
+ # The offset from the start of the file in code units of the given encoding.
149
+ def start_code_units_offset(encoding = Encoding::UTF_16LE)
150
+ source.code_units_offset(start_offset, encoding)
151
+ end
152
+
153
+ # The byte offset from the beginning of the source where this location ends.
154
+ def end_offset
155
+ start_offset + length
156
+ end
157
+
158
+ # The character offset from the beginning of the source where this location
159
+ # ends.
160
+ def end_character_offset
161
+ source.character_offset(end_offset)
162
+ end
163
+
164
+ # The offset from the start of the file in code units of the given encoding.
165
+ def end_code_units_offset(encoding = Encoding::UTF_16LE)
166
+ source.code_units_offset(end_offset, encoding)
167
+ end
168
+
169
+ # The line number where this location starts.
170
+ def start_line
171
+ source.line(start_offset)
172
+ end
173
+
174
+ # The content of the line where this location starts before this location.
175
+ def start_line_slice
176
+ offset = source.line_start(start_offset)
177
+ source.slice(offset, start_offset - offset)
178
+ end
179
+
180
+ # The line number where this location ends.
181
+ def end_line
182
+ source.line(end_offset)
183
+ end
184
+
185
+ # The column number in bytes where this location starts from the start of
186
+ # the line.
187
+ def start_column
188
+ source.column(start_offset)
189
+ end
190
+
191
+ # The column number in characters where this location ends from the start of
192
+ # the line.
193
+ def start_character_column
194
+ source.character_column(start_offset)
195
+ end
196
+
197
+ # The column number in code units of the given encoding where this location
198
+ # starts from the start of the line.
199
+ def start_code_units_column(encoding = Encoding::UTF_16LE)
200
+ source.code_units_column(start_offset, encoding)
201
+ end
202
+
203
+ # The column number in bytes where this location ends from the start of the
204
+ # line.
205
+ def end_column
206
+ source.column(end_offset)
207
+ end
208
+
209
+ # The column number in characters where this location ends from the start of
210
+ # the line.
211
+ def end_character_column
212
+ source.character_column(end_offset)
213
+ end
214
+
215
+ # The column number in code units of the given encoding where this location
216
+ # ends from the start of the line.
217
+ def end_code_units_column(encoding = Encoding::UTF_16LE)
218
+ source.code_units_column(end_offset, encoding)
219
+ end
220
+
221
+ # Implement the hash pattern matching interface for Location.
222
+ def deconstruct_keys(keys)
223
+ { start_offset: start_offset, end_offset: end_offset }
224
+ end
225
+
226
+ # Implement the pretty print interface for Location.
227
+ def pretty_print(q)
228
+ q.text("(#{start_line},#{start_column})-(#{end_line},#{end_column})")
229
+ end
230
+
231
+ # Returns true if the given other location is equal to this location.
232
+ def ==(other)
233
+ other.is_a?(Location) &&
234
+ other.start_offset == start_offset &&
235
+ other.end_offset == end_offset
236
+ end
237
+
238
+ # Returns a new location that stretches from this location to the given
239
+ # other location. Raises an error if this location is not before the other
240
+ # location or if they don't share the same source.
241
+ def join(other)
242
+ raise "Incompatible sources" if source != other.source
243
+ raise "Incompatible locations" if start_offset > other.start_offset
244
+
245
+ Location.new(source, start_offset, other.end_offset - start_offset)
246
+ end
247
+
248
+ # Returns a null location that does not correspond to a source and points to
249
+ # the beginning of the file. Useful for when you want a location object but
250
+ # do not care where it points.
251
+ def self.null
252
+ new(nil, 0, 0)
253
+ end
254
+ end
255
+
256
+ # This represents a comment that was encountered during parsing. It is the
257
+ # base class for all comment types.
258
+ class Comment
259
+ # The location of this comment in the source.
260
+ attr_reader :location
261
+
262
+ # Create a new comment object with the given location.
263
+ def initialize(location)
264
+ @location = location
265
+ end
266
+
267
+ # Implement the hash pattern matching interface for Comment.
268
+ def deconstruct_keys(keys)
269
+ { location: location }
270
+ end
271
+ end
272
+
273
+ # InlineComment objects are the most common. They correspond to comments in
274
+ # the source file like this one that start with #.
275
+ class InlineComment < Comment
276
+ # Returns true if this comment happens on the same line as other code and
277
+ # false if the comment is by itself.
278
+ def trailing?
279
+ !location.start_line_slice.strip.empty?
280
+ end
281
+
282
+ # Returns a string representation of this comment.
283
+ def inspect
284
+ "#<Prism::InlineComment @location=#{location.inspect}>"
285
+ end
286
+ end
287
+
288
+ # EmbDocComment objects correspond to comments that are surrounded by =begin
289
+ # and =end.
290
+ class EmbDocComment < Comment
291
+ # This can only be true for inline comments.
292
+ def trailing?
293
+ false
294
+ end
295
+
296
+ # Returns a string representation of this comment.
297
+ def inspect
298
+ "#<Prism::EmbDocComment @location=#{location.inspect}>"
299
+ end
300
+ end
301
+
302
+ # This represents a magic comment that was encountered during parsing.
303
+ class MagicComment
304
+ # A Location object representing the location of the key in the source.
305
+ attr_reader :key_loc
306
+
307
+ # A Location object representing the location of the value in the source.
308
+ attr_reader :value_loc
309
+
310
+ # Create a new magic comment object with the given key and value locations.
311
+ def initialize(key_loc, value_loc)
312
+ @key_loc = key_loc
313
+ @value_loc = value_loc
314
+ end
315
+
316
+ # Returns the key of the magic comment by slicing it from the source code.
317
+ def key
318
+ key_loc.slice
319
+ end
320
+
321
+ # Returns the value of the magic comment by slicing it from the source code.
322
+ def value
323
+ value_loc.slice
324
+ end
325
+
326
+ # Implement the hash pattern matching interface for MagicComment.
327
+ def deconstruct_keys(keys)
328
+ { key_loc: key_loc, value_loc: value_loc }
329
+ end
330
+
331
+ # Returns a string representation of this magic comment.
332
+ def inspect
333
+ "#<Prism::MagicComment @key=#{key.inspect} @value=#{value.inspect}>"
334
+ end
335
+ end
336
+
337
+ # This represents an error that was encountered during parsing.
338
+ class ParseError
339
+ # The message associated with this error.
340
+ attr_reader :message
341
+
342
+ # A Location object representing the location of this error in the source.
343
+ attr_reader :location
344
+
345
+ # The level of this error.
346
+ attr_reader :level
347
+
348
+ # Create a new error object with the given message and location.
349
+ def initialize(message, location, level)
350
+ @message = message
351
+ @location = location
352
+ @level = level
353
+ end
354
+
355
+ # Implement the hash pattern matching interface for ParseError.
356
+ def deconstruct_keys(keys)
357
+ { message: message, location: location, level: level }
358
+ end
359
+
360
+ # Returns a string representation of this error.
361
+ def inspect
362
+ "#<Prism::ParseError @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>"
363
+ end
364
+ end
365
+
366
+ # This represents a warning that was encountered during parsing.
367
+ class ParseWarning
368
+ # The message associated with this warning.
369
+ attr_reader :message
370
+
371
+ # A Location object representing the location of this warning in the source.
372
+ attr_reader :location
373
+
374
+ # The level of this warning.
375
+ attr_reader :level
376
+
377
+ # Create a new warning object with the given message and location.
378
+ def initialize(message, location, level)
379
+ @message = message
380
+ @location = location
381
+ @level = level
382
+ end
383
+
384
+ # Implement the hash pattern matching interface for ParseWarning.
385
+ def deconstruct_keys(keys)
386
+ { message: message, location: location, level: level }
387
+ end
388
+
389
+ # Returns a string representation of this warning.
390
+ def inspect
391
+ "#<Prism::ParseWarning @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>"
392
+ end
393
+ end
394
+
395
+ # This represents the result of a call to ::parse or ::parse_file. It contains
396
+ # the AST, any comments that were encounters, and any errors that were
397
+ # encountered.
398
+ class ParseResult
399
+ # The value that was generated by parsing. Normally this holds the AST, but
400
+ # it can sometimes how a list of tokens or other results passed back from
401
+ # the parser.
402
+ attr_reader :value
403
+
404
+ # The list of comments that were encountered during parsing.
405
+ attr_reader :comments
406
+
407
+ # The list of magic comments that were encountered during parsing.
408
+ attr_reader :magic_comments
409
+
410
+ # An optional location that represents the location of the __END__ marker
411
+ # and the rest of the content of the file. This content is loaded into the
412
+ # DATA constant when the file being parsed is the main file being executed.
413
+ attr_reader :data_loc
414
+
415
+ # The list of errors that were generated during parsing.
416
+ attr_reader :errors
417
+
418
+ # The list of warnings that were generated during parsing.
419
+ attr_reader :warnings
420
+
421
+ # A Source instance that represents the source code that was parsed.
422
+ attr_reader :source
423
+
424
+ # Create a new parse result object with the given values.
425
+ def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
426
+ @value = value
427
+ @comments = comments
428
+ @magic_comments = magic_comments
429
+ @data_loc = data_loc
430
+ @errors = errors
431
+ @warnings = warnings
432
+ @source = source
433
+ end
434
+
435
+ # Implement the hash pattern matching interface for ParseResult.
436
+ def deconstruct_keys(keys)
437
+ { value: value, comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings }
438
+ end
439
+
440
+ # Returns true if there were no errors during parsing and false if there
441
+ # were.
442
+ def success?
443
+ errors.empty?
444
+ end
445
+
446
+ # Returns true if there were errors during parsing and false if there were
447
+ # not.
448
+ def failure?
449
+ !success?
450
+ end
451
+ end
452
+
453
+ # This represents a token from the Ruby source.
454
+ class Token
455
+ # The type of token that this token is.
456
+ attr_reader :type
457
+
458
+ # A byteslice of the source that this token represents.
459
+ attr_reader :value
460
+
461
+ # A Location object representing the location of this token in the source.
462
+ attr_reader :location
463
+
464
+ # Create a new token object with the given type, value, and location.
465
+ def initialize(type, value, location)
466
+ @type = type
467
+ @value = value
468
+ @location = location
469
+ end
470
+
471
+ # Implement the hash pattern matching interface for Token.
472
+ def deconstruct_keys(keys)
473
+ { type: type, value: value, location: location }
474
+ end
475
+
476
+ # Implement the pretty print interface for Token.
477
+ def pretty_print(q)
478
+ q.group do
479
+ q.text(type.to_s)
480
+ self.location.pretty_print(q)
481
+ q.text("(")
482
+ q.nest(2) do
483
+ q.breakable("")
484
+ q.pp(value)
485
+ end
486
+ q.breakable("")
487
+ q.text(")")
488
+ end
489
+ end
490
+
491
+ # Returns true if the given other token is equal to this token.
492
+ def ==(other)
493
+ other.is_a?(Token) &&
494
+ other.type == type &&
495
+ other.value == value
496
+ end
497
+ end
498
+ end