jruby-prism-parser 0.24.0-java → 1.4.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. checksums.yaml +4 -4
  2. data/BSDmakefile +58 -0
  3. data/CHANGELOG.md +269 -1
  4. data/CONTRIBUTING.md +0 -4
  5. data/Makefile +25 -18
  6. data/README.md +57 -6
  7. data/config.yml +1724 -140
  8. data/docs/build_system.md +39 -11
  9. data/docs/configuration.md +4 -0
  10. data/docs/cruby_compilation.md +1 -1
  11. data/docs/fuzzing.md +1 -1
  12. data/docs/parser_translation.md +14 -9
  13. data/docs/parsing_rules.md +4 -1
  14. data/docs/releasing.md +8 -10
  15. data/docs/relocation.md +34 -0
  16. data/docs/ripper_translation.md +72 -0
  17. data/docs/ruby_api.md +2 -1
  18. data/docs/serialization.md +29 -5
  19. data/ext/prism/api_node.c +3395 -1999
  20. data/ext/prism/api_pack.c +9 -0
  21. data/ext/prism/extconf.rb +55 -34
  22. data/ext/prism/extension.c +597 -346
  23. data/ext/prism/extension.h +6 -5
  24. data/include/prism/ast.h +2612 -455
  25. data/include/prism/defines.h +160 -2
  26. data/include/prism/diagnostic.h +188 -76
  27. data/include/prism/encoding.h +22 -4
  28. data/include/prism/node.h +89 -17
  29. data/include/prism/options.h +224 -12
  30. data/include/prism/pack.h +11 -0
  31. data/include/prism/parser.h +267 -66
  32. data/include/prism/prettyprint.h +8 -0
  33. data/include/prism/regexp.h +18 -8
  34. data/include/prism/static_literals.h +121 -0
  35. data/include/prism/util/pm_buffer.h +75 -2
  36. data/include/prism/util/pm_char.h +1 -2
  37. data/include/prism/util/pm_constant_pool.h +18 -9
  38. data/include/prism/util/pm_integer.h +126 -0
  39. data/include/prism/util/pm_list.h +1 -1
  40. data/include/prism/util/pm_newline_list.h +19 -0
  41. data/include/prism/util/pm_string.h +48 -8
  42. data/include/prism/version.h +3 -3
  43. data/include/prism.h +99 -5
  44. data/jruby-prism.jar +0 -0
  45. data/lib/prism/compiler.rb +11 -1
  46. data/lib/prism/desugar_compiler.rb +113 -74
  47. data/lib/prism/dispatcher.rb +45 -1
  48. data/lib/prism/dot_visitor.rb +201 -77
  49. data/lib/prism/dsl.rb +673 -461
  50. data/lib/prism/ffi.rb +233 -45
  51. data/lib/prism/inspect_visitor.rb +2389 -0
  52. data/lib/prism/lex_compat.rb +35 -16
  53. data/lib/prism/mutation_compiler.rb +24 -8
  54. data/lib/prism/node.rb +7731 -8460
  55. data/lib/prism/node_ext.rb +328 -32
  56. data/lib/prism/pack.rb +4 -0
  57. data/lib/prism/parse_result/comments.rb +34 -24
  58. data/lib/prism/parse_result/errors.rb +65 -0
  59. data/lib/prism/parse_result/newlines.rb +102 -12
  60. data/lib/prism/parse_result.rb +448 -44
  61. data/lib/prism/pattern.rb +28 -10
  62. data/lib/prism/polyfill/append_as_bytes.rb +15 -0
  63. data/lib/prism/polyfill/byteindex.rb +13 -0
  64. data/lib/prism/polyfill/unpack1.rb +14 -0
  65. data/lib/prism/reflection.rb +413 -0
  66. data/lib/prism/relocation.rb +504 -0
  67. data/lib/prism/serialize.rb +1940 -1198
  68. data/lib/prism/string_query.rb +30 -0
  69. data/lib/prism/translation/parser/builder.rb +61 -0
  70. data/lib/prism/translation/parser/compiler.rb +569 -195
  71. data/lib/prism/translation/parser/lexer.rb +516 -39
  72. data/lib/prism/translation/parser.rb +177 -12
  73. data/lib/prism/translation/parser33.rb +1 -1
  74. data/lib/prism/translation/parser34.rb +1 -1
  75. data/lib/prism/translation/parser35.rb +12 -0
  76. data/lib/prism/translation/ripper/sexp.rb +125 -0
  77. data/lib/prism/translation/ripper/shim.rb +5 -0
  78. data/lib/prism/translation/ripper.rb +3224 -462
  79. data/lib/prism/translation/ruby_parser.rb +194 -69
  80. data/lib/prism/translation.rb +4 -1
  81. data/lib/prism/version.rb +1 -1
  82. data/lib/prism/visitor.rb +13 -0
  83. data/lib/prism.rb +17 -27
  84. data/prism.gemspec +57 -17
  85. data/rbi/prism/compiler.rbi +12 -0
  86. data/rbi/prism/dsl.rbi +524 -0
  87. data/rbi/prism/inspect_visitor.rbi +12 -0
  88. data/rbi/prism/node.rbi +8722 -0
  89. data/rbi/prism/node_ext.rbi +107 -0
  90. data/rbi/prism/parse_result.rbi +404 -0
  91. data/rbi/prism/reflection.rbi +58 -0
  92. data/rbi/prism/string_query.rbi +12 -0
  93. data/rbi/prism/translation/parser.rbi +11 -0
  94. data/rbi/prism/translation/parser33.rbi +6 -0
  95. data/rbi/prism/translation/parser34.rbi +6 -0
  96. data/rbi/prism/translation/parser35.rbi +6 -0
  97. data/rbi/prism/translation/ripper.rbi +15 -0
  98. data/rbi/prism/visitor.rbi +473 -0
  99. data/rbi/prism.rbi +44 -7745
  100. data/sig/prism/compiler.rbs +9 -0
  101. data/sig/prism/dispatcher.rbs +16 -0
  102. data/sig/prism/dot_visitor.rbs +6 -0
  103. data/sig/prism/dsl.rbs +351 -0
  104. data/sig/prism/inspect_visitor.rbs +22 -0
  105. data/sig/prism/lex_compat.rbs +10 -0
  106. data/sig/prism/mutation_compiler.rbs +159 -0
  107. data/sig/prism/node.rbs +3614 -0
  108. data/sig/prism/node_ext.rbs +82 -0
  109. data/sig/prism/pack.rbs +43 -0
  110. data/sig/prism/parse_result.rbs +192 -0
  111. data/sig/prism/pattern.rbs +13 -0
  112. data/sig/prism/reflection.rbs +50 -0
  113. data/sig/prism/relocation.rbs +185 -0
  114. data/sig/prism/serialize.rbs +8 -0
  115. data/sig/prism/string_query.rbs +11 -0
  116. data/sig/prism/visitor.rbs +169 -0
  117. data/sig/prism.rbs +248 -4767
  118. data/src/diagnostic.c +672 -230
  119. data/src/encoding.c +211 -108
  120. data/src/node.c +7541 -1653
  121. data/src/options.c +135 -20
  122. data/src/pack.c +33 -17
  123. data/src/prettyprint.c +1543 -1485
  124. data/src/prism.c +7813 -3050
  125. data/src/regexp.c +225 -73
  126. data/src/serialize.c +101 -77
  127. data/src/static_literals.c +617 -0
  128. data/src/token_type.c +14 -13
  129. data/src/util/pm_buffer.c +187 -20
  130. data/src/util/pm_char.c +5 -5
  131. data/src/util/pm_constant_pool.c +39 -19
  132. data/src/util/pm_integer.c +670 -0
  133. data/src/util/pm_list.c +1 -1
  134. data/src/util/pm_newline_list.c +43 -5
  135. data/src/util/pm_string.c +213 -33
  136. data/src/util/pm_strncasecmp.c +13 -1
  137. data/src/util/pm_strpbrk.c +32 -6
  138. metadata +55 -19
  139. data/docs/ripper.md +0 -36
  140. data/include/prism/util/pm_state_stack.h +0 -42
  141. data/include/prism/util/pm_string_list.h +0 -44
  142. data/lib/prism/debug.rb +0 -206
  143. data/lib/prism/node_inspector.rb +0 -68
  144. data/lib/prism/translation/parser/rubocop.rb +0 -45
  145. data/rbi/prism_static.rbi +0 -207
  146. data/sig/prism_static.rbs +0 -201
  147. data/src/util/pm_state_stack.c +0 -25
  148. data/src/util/pm_string_list.c +0 -28
@@ -0,0 +1,504 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Prism
4
+ # Prism parses deterministically for the same input. This provides a nice
5
+ # property that is exposed through the #node_id API on nodes. Effectively this
6
+ # means that for the same input, these values will remain consistent every
7
+ # time the source is parsed. This means we can reparse the source same with a
8
+ # #node_id value and find the exact same node again.
9
+ #
10
+ # The Relocation module provides an API around this property. It allows you to
11
+ # "save" nodes and locations using a minimal amount of memory (just the
12
+ # node_id and a field identifier) and then reify them later.
13
+ module Relocation
14
+ # An entry in a repository that will lazily reify its values when they are
15
+ # first accessed.
16
+ class Entry
17
+ # Raised if a value that could potentially be on an entry is missing
18
+ # because it was either not configured on the repository or it has not yet
19
+ # been fetched.
20
+ class MissingValueError < StandardError
21
+ end
22
+
23
+ # Initialize a new entry with the given repository.
24
+ def initialize(repository)
25
+ @repository = repository
26
+ @values = nil
27
+ end
28
+
29
+ # Fetch the filepath of the value.
30
+ def filepath
31
+ fetch_value(:filepath)
32
+ end
33
+
34
+ # Fetch the start line of the value.
35
+ def start_line
36
+ fetch_value(:start_line)
37
+ end
38
+
39
+ # Fetch the end line of the value.
40
+ def end_line
41
+ fetch_value(:end_line)
42
+ end
43
+
44
+ # Fetch the start byte offset of the value.
45
+ def start_offset
46
+ fetch_value(:start_offset)
47
+ end
48
+
49
+ # Fetch the end byte offset of the value.
50
+ def end_offset
51
+ fetch_value(:end_offset)
52
+ end
53
+
54
+ # Fetch the start character offset of the value.
55
+ def start_character_offset
56
+ fetch_value(:start_character_offset)
57
+ end
58
+
59
+ # Fetch the end character offset of the value.
60
+ def end_character_offset
61
+ fetch_value(:end_character_offset)
62
+ end
63
+
64
+ # Fetch the start code units offset of the value, for the encoding that
65
+ # was configured on the repository.
66
+ def start_code_units_offset
67
+ fetch_value(:start_code_units_offset)
68
+ end
69
+
70
+ # Fetch the end code units offset of the value, for the encoding that was
71
+ # configured on the repository.
72
+ def end_code_units_offset
73
+ fetch_value(:end_code_units_offset)
74
+ end
75
+
76
+ # Fetch the start byte column of the value.
77
+ def start_column
78
+ fetch_value(:start_column)
79
+ end
80
+
81
+ # Fetch the end byte column of the value.
82
+ def end_column
83
+ fetch_value(:end_column)
84
+ end
85
+
86
+ # Fetch the start character column of the value.
87
+ def start_character_column
88
+ fetch_value(:start_character_column)
89
+ end
90
+
91
+ # Fetch the end character column of the value.
92
+ def end_character_column
93
+ fetch_value(:end_character_column)
94
+ end
95
+
96
+ # Fetch the start code units column of the value, for the encoding that
97
+ # was configured on the repository.
98
+ def start_code_units_column
99
+ fetch_value(:start_code_units_column)
100
+ end
101
+
102
+ # Fetch the end code units column of the value, for the encoding that was
103
+ # configured on the repository.
104
+ def end_code_units_column
105
+ fetch_value(:end_code_units_column)
106
+ end
107
+
108
+ # Fetch the leading comments of the value.
109
+ def leading_comments
110
+ fetch_value(:leading_comments)
111
+ end
112
+
113
+ # Fetch the trailing comments of the value.
114
+ def trailing_comments
115
+ fetch_value(:trailing_comments)
116
+ end
117
+
118
+ # Fetch the leading and trailing comments of the value.
119
+ def comments
120
+ leading_comments.concat(trailing_comments)
121
+ end
122
+
123
+ # Reify the values on this entry with the given values. This is an
124
+ # internal-only API that is called from the repository when it is time to
125
+ # reify the values.
126
+ def reify!(values) # :nodoc:
127
+ @repository = nil
128
+ @values = values
129
+ end
130
+
131
+ private
132
+
133
+ # Fetch a value from the entry, raising an error if it is missing.
134
+ def fetch_value(name)
135
+ values.fetch(name) do
136
+ raise MissingValueError, "No value for #{name}, make sure the " \
137
+ "repository has been properly configured"
138
+ end
139
+ end
140
+
141
+ # Return the values from the repository, reifying them if necessary.
142
+ def values
143
+ @values || (@repository.reify!; @values)
144
+ end
145
+ end
146
+
147
+ # Represents the source of a repository that will be reparsed.
148
+ class Source
149
+ # The value that will need to be reparsed.
150
+ attr_reader :value
151
+
152
+ # Initialize the source with the given value.
153
+ def initialize(value)
154
+ @value = value
155
+ end
156
+
157
+ # Reparse the value and return the parse result.
158
+ def result
159
+ raise NotImplementedError, "Subclasses must implement #result"
160
+ end
161
+
162
+ # Create a code units cache for the given encoding.
163
+ def code_units_cache(encoding)
164
+ result.code_units_cache(encoding)
165
+ end
166
+ end
167
+
168
+ # A source that is represented by a file path.
169
+ class SourceFilepath < Source
170
+ # Reparse the file and return the parse result.
171
+ def result
172
+ Prism.parse_file(value)
173
+ end
174
+ end
175
+
176
+ # A source that is represented by a string.
177
+ class SourceString < Source
178
+ # Reparse the string and return the parse result.
179
+ def result
180
+ Prism.parse(value)
181
+ end
182
+ end
183
+
184
+ # A field that represents the file path.
185
+ class FilepathField
186
+ # The file path that this field represents.
187
+ attr_reader :value
188
+
189
+ # Initialize a new field with the given file path.
190
+ def initialize(value)
191
+ @value = value
192
+ end
193
+
194
+ # Fetch the file path.
195
+ def fields(_value)
196
+ { filepath: value }
197
+ end
198
+ end
199
+
200
+ # A field representing the start and end lines.
201
+ class LinesField
202
+ # Fetches the start and end line of a value.
203
+ def fields(value)
204
+ { start_line: value.start_line, end_line: value.end_line }
205
+ end
206
+ end
207
+
208
+ # A field representing the start and end byte offsets.
209
+ class OffsetsField
210
+ # Fetches the start and end byte offset of a value.
211
+ def fields(value)
212
+ { start_offset: value.start_offset, end_offset: value.end_offset }
213
+ end
214
+ end
215
+
216
+ # A field representing the start and end character offsets.
217
+ class CharacterOffsetsField
218
+ # Fetches the start and end character offset of a value.
219
+ def fields(value)
220
+ {
221
+ start_character_offset: value.start_character_offset,
222
+ end_character_offset: value.end_character_offset
223
+ }
224
+ end
225
+ end
226
+
227
+ # A field representing the start and end code unit offsets.
228
+ class CodeUnitOffsetsField
229
+ # A pointer to the repository object that is used for lazily creating a
230
+ # code units cache.
231
+ attr_reader :repository
232
+
233
+ # The associated encoding for the code units.
234
+ attr_reader :encoding
235
+
236
+ # Initialize a new field with the associated repository and encoding.
237
+ def initialize(repository, encoding)
238
+ @repository = repository
239
+ @encoding = encoding
240
+ @cache = nil
241
+ end
242
+
243
+ # Fetches the start and end code units offset of a value for a particular
244
+ # encoding.
245
+ def fields(value)
246
+ {
247
+ start_code_units_offset: value.cached_start_code_units_offset(cache),
248
+ end_code_units_offset: value.cached_end_code_units_offset(cache)
249
+ }
250
+ end
251
+
252
+ private
253
+
254
+ # Lazily create a code units cache for the associated encoding.
255
+ def cache
256
+ @cache ||= repository.code_units_cache(encoding)
257
+ end
258
+ end
259
+
260
+ # A field representing the start and end byte columns.
261
+ class ColumnsField
262
+ # Fetches the start and end byte column of a value.
263
+ def fields(value)
264
+ { start_column: value.start_column, end_column: value.end_column }
265
+ end
266
+ end
267
+
268
+ # A field representing the start and end character columns.
269
+ class CharacterColumnsField
270
+ # Fetches the start and end character column of a value.
271
+ def fields(value)
272
+ {
273
+ start_character_column: value.start_character_column,
274
+ end_character_column: value.end_character_column
275
+ }
276
+ end
277
+ end
278
+
279
+ # A field representing the start and end code unit columns for a specific
280
+ # encoding.
281
+ class CodeUnitColumnsField
282
+ # The repository object that is used for lazily creating a code units
283
+ # cache.
284
+ attr_reader :repository
285
+
286
+ # The associated encoding for the code units.
287
+ attr_reader :encoding
288
+
289
+ # Initialize a new field with the associated repository and encoding.
290
+ def initialize(repository, encoding)
291
+ @repository = repository
292
+ @encoding = encoding
293
+ @cache = nil
294
+ end
295
+
296
+ # Fetches the start and end code units column of a value for a particular
297
+ # encoding.
298
+ def fields(value)
299
+ {
300
+ start_code_units_column: value.cached_start_code_units_column(cache),
301
+ end_code_units_column: value.cached_end_code_units_column(cache)
302
+ }
303
+ end
304
+
305
+ private
306
+
307
+ # Lazily create a code units cache for the associated encoding.
308
+ def cache
309
+ @cache ||= repository.code_units_cache(encoding)
310
+ end
311
+ end
312
+
313
+ # An abstract field used as the parent class of the two comments fields.
314
+ class CommentsField
315
+ # An object that represents a slice of a comment.
316
+ class Comment
317
+ # The slice of the comment.
318
+ attr_reader :slice
319
+
320
+ # Initialize a new comment with the given slice.
321
+ def initialize(slice)
322
+ @slice = slice
323
+ end
324
+ end
325
+
326
+ private
327
+
328
+ # Create comment objects from the given values.
329
+ def comments(values)
330
+ values.map { |value| Comment.new(value.slice) }
331
+ end
332
+ end
333
+
334
+ # A field representing the leading comments.
335
+ class LeadingCommentsField < CommentsField
336
+ # Fetches the leading comments of a value.
337
+ def fields(value)
338
+ { leading_comments: comments(value.leading_comments) }
339
+ end
340
+ end
341
+
342
+ # A field representing the trailing comments.
343
+ class TrailingCommentsField < CommentsField
344
+ # Fetches the trailing comments of a value.
345
+ def fields(value)
346
+ { trailing_comments: comments(value.trailing_comments) }
347
+ end
348
+ end
349
+
350
+ # A repository is a configured collection of fields and a set of entries
351
+ # that knows how to reparse a source and reify the values.
352
+ class Repository
353
+ # Raised when multiple fields of the same type are configured on the same
354
+ # repository.
355
+ class ConfigurationError < StandardError
356
+ end
357
+
358
+ # The source associated with this repository. This will be either a
359
+ # SourceFilepath (the most common use case) or a SourceString.
360
+ attr_reader :source
361
+
362
+ # The fields that have been configured on this repository.
363
+ attr_reader :fields
364
+
365
+ # The entries that have been saved on this repository.
366
+ attr_reader :entries
367
+
368
+ # Initialize a new repository with the given source.
369
+ def initialize(source)
370
+ @source = source
371
+ @fields = {}
372
+ @entries = Hash.new { |hash, node_id| hash[node_id] = {} }
373
+ end
374
+
375
+ # Create a code units cache for the given encoding from the source.
376
+ def code_units_cache(encoding)
377
+ source.code_units_cache(encoding)
378
+ end
379
+
380
+ # Configure the filepath field for this repository and return self.
381
+ def filepath
382
+ raise ConfigurationError, "Can only specify filepath for a filepath source" unless source.is_a?(SourceFilepath)
383
+ field(:filepath, FilepathField.new(source.value))
384
+ end
385
+
386
+ # Configure the lines field for this repository and return self.
387
+ def lines
388
+ field(:lines, LinesField.new)
389
+ end
390
+
391
+ # Configure the offsets field for this repository and return self.
392
+ def offsets
393
+ field(:offsets, OffsetsField.new)
394
+ end
395
+
396
+ # Configure the character offsets field for this repository and return
397
+ # self.
398
+ def character_offsets
399
+ field(:character_offsets, CharacterOffsetsField.new)
400
+ end
401
+
402
+ # Configure the code unit offsets field for this repository for a specific
403
+ # encoding and return self.
404
+ def code_unit_offsets(encoding)
405
+ field(:code_unit_offsets, CodeUnitOffsetsField.new(self, encoding))
406
+ end
407
+
408
+ # Configure the columns field for this repository and return self.
409
+ def columns
410
+ field(:columns, ColumnsField.new)
411
+ end
412
+
413
+ # Configure the character columns field for this repository and return
414
+ # self.
415
+ def character_columns
416
+ field(:character_columns, CharacterColumnsField.new)
417
+ end
418
+
419
+ # Configure the code unit columns field for this repository for a specific
420
+ # encoding and return self.
421
+ def code_unit_columns(encoding)
422
+ field(:code_unit_columns, CodeUnitColumnsField.new(self, encoding))
423
+ end
424
+
425
+ # Configure the leading comments field for this repository and return
426
+ # self.
427
+ def leading_comments
428
+ field(:leading_comments, LeadingCommentsField.new)
429
+ end
430
+
431
+ # Configure the trailing comments field for this repository and return
432
+ # self.
433
+ def trailing_comments
434
+ field(:trailing_comments, TrailingCommentsField.new)
435
+ end
436
+
437
+ # Configure both the leading and trailing comment fields for this
438
+ # repository and return self.
439
+ def comments
440
+ leading_comments.trailing_comments
441
+ end
442
+
443
+ # This method is called from nodes and locations when they want to enter
444
+ # themselves into the repository. It it internal-only and meant to be
445
+ # called from the #save* APIs.
446
+ def enter(node_id, field_name) # :nodoc:
447
+ entry = Entry.new(self)
448
+ @entries[node_id][field_name] = entry
449
+ entry
450
+ end
451
+
452
+ # This method is called from the entries in the repository when they need
453
+ # to reify their values. It is internal-only and meant to be called from
454
+ # the various value APIs.
455
+ def reify! # :nodoc:
456
+ result = source.result
457
+
458
+ # Attach the comments if they have been requested as part of the
459
+ # configuration of this repository.
460
+ if fields.key?(:leading_comments) || fields.key?(:trailing_comments)
461
+ result.attach_comments!
462
+ end
463
+
464
+ queue = [result.value] #: Array[Prism::node]
465
+ while (node = queue.shift)
466
+ @entries[node.node_id].each do |field_name, entry|
467
+ value = node.public_send(field_name)
468
+ values = {} #: Hash[Symbol, untyped]
469
+
470
+ fields.each_value do |field|
471
+ values.merge!(field.fields(value))
472
+ end
473
+
474
+ entry.reify!(values)
475
+ end
476
+
477
+ queue.concat(node.compact_child_nodes)
478
+ end
479
+
480
+ @entries.clear
481
+ end
482
+
483
+ private
484
+
485
+ # Append the given field to the repository and return the repository so
486
+ # that these calls can be chained.
487
+ def field(name, value)
488
+ raise ConfigurationError, "Cannot specify multiple #{name} fields" if @fields.key?(name)
489
+ @fields[name] = value
490
+ self
491
+ end
492
+ end
493
+
494
+ # Create a new repository for the given filepath.
495
+ def self.filepath(value)
496
+ Repository.new(SourceFilepath.new(value))
497
+ end
498
+
499
+ # Create a new repository for the given string.
500
+ def self.string(value)
501
+ Repository.new(SourceString.new(value))
502
+ end
503
+ end
504
+ end