ed-precompiled_prism 1.5.2-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. checksums.yaml +7 -0
  2. data/BSDmakefile +58 -0
  3. data/CHANGELOG.md +723 -0
  4. data/CODE_OF_CONDUCT.md +76 -0
  5. data/CONTRIBUTING.md +58 -0
  6. data/LICENSE.md +7 -0
  7. data/Makefile +110 -0
  8. data/README.md +143 -0
  9. data/config.yml +4714 -0
  10. data/docs/build_system.md +119 -0
  11. data/docs/configuration.md +68 -0
  12. data/docs/cruby_compilation.md +27 -0
  13. data/docs/design.md +53 -0
  14. data/docs/encoding.md +121 -0
  15. data/docs/fuzzing.md +88 -0
  16. data/docs/heredocs.md +36 -0
  17. data/docs/javascript.md +118 -0
  18. data/docs/local_variable_depth.md +229 -0
  19. data/docs/mapping.md +117 -0
  20. data/docs/parser_translation.md +24 -0
  21. data/docs/parsing_rules.md +22 -0
  22. data/docs/releasing.md +98 -0
  23. data/docs/relocation.md +34 -0
  24. data/docs/ripper_translation.md +72 -0
  25. data/docs/ruby_api.md +44 -0
  26. data/docs/ruby_parser_translation.md +19 -0
  27. data/docs/serialization.md +233 -0
  28. data/docs/testing.md +55 -0
  29. data/ext/prism/api_node.c +6941 -0
  30. data/ext/prism/api_pack.c +276 -0
  31. data/ext/prism/extconf.rb +127 -0
  32. data/ext/prism/extension.c +1419 -0
  33. data/ext/prism/extension.h +19 -0
  34. data/include/prism/ast.h +8220 -0
  35. data/include/prism/defines.h +260 -0
  36. data/include/prism/diagnostic.h +456 -0
  37. data/include/prism/encoding.h +283 -0
  38. data/include/prism/node.h +129 -0
  39. data/include/prism/options.h +482 -0
  40. data/include/prism/pack.h +163 -0
  41. data/include/prism/parser.h +933 -0
  42. data/include/prism/prettyprint.h +34 -0
  43. data/include/prism/regexp.h +43 -0
  44. data/include/prism/static_literals.h +121 -0
  45. data/include/prism/util/pm_buffer.h +236 -0
  46. data/include/prism/util/pm_char.h +204 -0
  47. data/include/prism/util/pm_constant_pool.h +218 -0
  48. data/include/prism/util/pm_integer.h +130 -0
  49. data/include/prism/util/pm_list.h +103 -0
  50. data/include/prism/util/pm_memchr.h +29 -0
  51. data/include/prism/util/pm_newline_list.h +113 -0
  52. data/include/prism/util/pm_string.h +200 -0
  53. data/include/prism/util/pm_strncasecmp.h +32 -0
  54. data/include/prism/util/pm_strpbrk.h +46 -0
  55. data/include/prism/version.h +29 -0
  56. data/include/prism.h +408 -0
  57. data/lib/prism/3.0/prism.bundle +0 -0
  58. data/lib/prism/3.1/prism.bundle +0 -0
  59. data/lib/prism/3.2/prism.bundle +0 -0
  60. data/lib/prism/3.3/prism.bundle +0 -0
  61. data/lib/prism/3.4/prism.bundle +0 -0
  62. data/lib/prism/compiler.rb +801 -0
  63. data/lib/prism/desugar_compiler.rb +392 -0
  64. data/lib/prism/dispatcher.rb +2210 -0
  65. data/lib/prism/dot_visitor.rb +4762 -0
  66. data/lib/prism/dsl.rb +1003 -0
  67. data/lib/prism/ffi.rb +570 -0
  68. data/lib/prism/inspect_visitor.rb +2392 -0
  69. data/lib/prism/lex_compat.rb +928 -0
  70. data/lib/prism/mutation_compiler.rb +772 -0
  71. data/lib/prism/node.rb +18816 -0
  72. data/lib/prism/node_ext.rb +511 -0
  73. data/lib/prism/pack.rb +230 -0
  74. data/lib/prism/parse_result/comments.rb +188 -0
  75. data/lib/prism/parse_result/errors.rb +66 -0
  76. data/lib/prism/parse_result/newlines.rb +155 -0
  77. data/lib/prism/parse_result.rb +911 -0
  78. data/lib/prism/pattern.rb +269 -0
  79. data/lib/prism/polyfill/append_as_bytes.rb +15 -0
  80. data/lib/prism/polyfill/byteindex.rb +13 -0
  81. data/lib/prism/polyfill/scan_byte.rb +14 -0
  82. data/lib/prism/polyfill/unpack1.rb +14 -0
  83. data/lib/prism/polyfill/warn.rb +36 -0
  84. data/lib/prism/reflection.rb +416 -0
  85. data/lib/prism/relocation.rb +505 -0
  86. data/lib/prism/serialize.rb +2398 -0
  87. data/lib/prism/string_query.rb +31 -0
  88. data/lib/prism/translation/parser/builder.rb +62 -0
  89. data/lib/prism/translation/parser/compiler.rb +2234 -0
  90. data/lib/prism/translation/parser/lexer.rb +820 -0
  91. data/lib/prism/translation/parser.rb +374 -0
  92. data/lib/prism/translation/parser33.rb +13 -0
  93. data/lib/prism/translation/parser34.rb +13 -0
  94. data/lib/prism/translation/parser35.rb +13 -0
  95. data/lib/prism/translation/parser_current.rb +24 -0
  96. data/lib/prism/translation/ripper/sexp.rb +126 -0
  97. data/lib/prism/translation/ripper/shim.rb +5 -0
  98. data/lib/prism/translation/ripper.rb +3474 -0
  99. data/lib/prism/translation/ruby_parser.rb +1929 -0
  100. data/lib/prism/translation.rb +16 -0
  101. data/lib/prism/visitor.rb +813 -0
  102. data/lib/prism.rb +97 -0
  103. data/prism.gemspec +174 -0
  104. data/rbi/prism/compiler.rbi +12 -0
  105. data/rbi/prism/dsl.rbi +524 -0
  106. data/rbi/prism/inspect_visitor.rbi +12 -0
  107. data/rbi/prism/node.rbi +8734 -0
  108. data/rbi/prism/node_ext.rbi +107 -0
  109. data/rbi/prism/parse_result.rbi +404 -0
  110. data/rbi/prism/reflection.rbi +58 -0
  111. data/rbi/prism/string_query.rbi +12 -0
  112. data/rbi/prism/translation/parser.rbi +11 -0
  113. data/rbi/prism/translation/parser33.rbi +6 -0
  114. data/rbi/prism/translation/parser34.rbi +6 -0
  115. data/rbi/prism/translation/parser35.rbi +6 -0
  116. data/rbi/prism/translation/ripper.rbi +15 -0
  117. data/rbi/prism/visitor.rbi +473 -0
  118. data/rbi/prism.rbi +66 -0
  119. data/sig/prism/compiler.rbs +9 -0
  120. data/sig/prism/dispatcher.rbs +19 -0
  121. data/sig/prism/dot_visitor.rbs +6 -0
  122. data/sig/prism/dsl.rbs +351 -0
  123. data/sig/prism/inspect_visitor.rbs +22 -0
  124. data/sig/prism/lex_compat.rbs +10 -0
  125. data/sig/prism/mutation_compiler.rbs +159 -0
  126. data/sig/prism/node.rbs +4028 -0
  127. data/sig/prism/node_ext.rbs +149 -0
  128. data/sig/prism/pack.rbs +43 -0
  129. data/sig/prism/parse_result/comments.rbs +38 -0
  130. data/sig/prism/parse_result.rbs +196 -0
  131. data/sig/prism/pattern.rbs +13 -0
  132. data/sig/prism/reflection.rbs +50 -0
  133. data/sig/prism/relocation.rbs +185 -0
  134. data/sig/prism/serialize.rbs +8 -0
  135. data/sig/prism/string_query.rbs +11 -0
  136. data/sig/prism/visitor.rbs +169 -0
  137. data/sig/prism.rbs +254 -0
  138. data/src/diagnostic.c +850 -0
  139. data/src/encoding.c +5235 -0
  140. data/src/node.c +8676 -0
  141. data/src/options.c +328 -0
  142. data/src/pack.c +509 -0
  143. data/src/prettyprint.c +8941 -0
  144. data/src/prism.c +23361 -0
  145. data/src/regexp.c +790 -0
  146. data/src/serialize.c +2268 -0
  147. data/src/static_literals.c +617 -0
  148. data/src/token_type.c +703 -0
  149. data/src/util/pm_buffer.c +357 -0
  150. data/src/util/pm_char.c +318 -0
  151. data/src/util/pm_constant_pool.c +342 -0
  152. data/src/util/pm_integer.c +670 -0
  153. data/src/util/pm_list.c +49 -0
  154. data/src/util/pm_memchr.c +35 -0
  155. data/src/util/pm_newline_list.c +125 -0
  156. data/src/util/pm_string.c +381 -0
  157. data/src/util/pm_strncasecmp.c +36 -0
  158. data/src/util/pm_strpbrk.c +206 -0
  159. metadata +202 -0
@@ -0,0 +1,928 @@
1
+ # frozen_string_literal: true
2
+ # :markup: markdown
3
+
4
+ require "delegate"
5
+ require "ripper"
6
+
7
+ module Prism
8
+ # This class is responsible for lexing the source using prism and then
9
+ # converting those tokens to be compatible with Ripper. In the vast majority
10
+ # of cases, this is a one-to-one mapping of the token type. Everything else
11
+ # generally lines up. However, there are a few cases that require special
12
+ # handling.
13
+ class LexCompat # :nodoc:
14
+ # A result class specialized for holding tokens produced by the lexer.
15
+ class Result < Prism::Result
16
+ # The list of tokens that were produced by the lexer.
17
+ attr_reader :value
18
+
19
+ # Create a new lex compat result object with the given values.
20
+ def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
21
+ @value = value
22
+ super(comments, magic_comments, data_loc, errors, warnings, source)
23
+ end
24
+
25
+ # Implement the hash pattern matching interface for Result.
26
+ def deconstruct_keys(keys)
27
+ super.merge!(value: value)
28
+ end
29
+ end
30
+
31
+ # This is a mapping of prism token types to Ripper token types. This is a
32
+ # many-to-one mapping because we split up our token types, whereas Ripper
33
+ # tends to group them.
34
+ RIPPER = {
35
+ AMPERSAND: :on_op,
36
+ AMPERSAND_AMPERSAND: :on_op,
37
+ AMPERSAND_AMPERSAND_EQUAL: :on_op,
38
+ AMPERSAND_DOT: :on_op,
39
+ AMPERSAND_EQUAL: :on_op,
40
+ BACK_REFERENCE: :on_backref,
41
+ BACKTICK: :on_backtick,
42
+ BANG: :on_op,
43
+ BANG_EQUAL: :on_op,
44
+ BANG_TILDE: :on_op,
45
+ BRACE_LEFT: :on_lbrace,
46
+ BRACE_RIGHT: :on_rbrace,
47
+ BRACKET_LEFT: :on_lbracket,
48
+ BRACKET_LEFT_ARRAY: :on_lbracket,
49
+ BRACKET_LEFT_RIGHT: :on_op,
50
+ BRACKET_LEFT_RIGHT_EQUAL: :on_op,
51
+ BRACKET_RIGHT: :on_rbracket,
52
+ CARET: :on_op,
53
+ CARET_EQUAL: :on_op,
54
+ CHARACTER_LITERAL: :on_CHAR,
55
+ CLASS_VARIABLE: :on_cvar,
56
+ COLON: :on_op,
57
+ COLON_COLON: :on_op,
58
+ COMMA: :on_comma,
59
+ COMMENT: :on_comment,
60
+ CONSTANT: :on_const,
61
+ DOT: :on_period,
62
+ DOT_DOT: :on_op,
63
+ DOT_DOT_DOT: :on_op,
64
+ EMBDOC_BEGIN: :on_embdoc_beg,
65
+ EMBDOC_END: :on_embdoc_end,
66
+ EMBDOC_LINE: :on_embdoc,
67
+ EMBEXPR_BEGIN: :on_embexpr_beg,
68
+ EMBEXPR_END: :on_embexpr_end,
69
+ EMBVAR: :on_embvar,
70
+ EOF: :on_eof,
71
+ EQUAL: :on_op,
72
+ EQUAL_EQUAL: :on_op,
73
+ EQUAL_EQUAL_EQUAL: :on_op,
74
+ EQUAL_GREATER: :on_op,
75
+ EQUAL_TILDE: :on_op,
76
+ FLOAT: :on_float,
77
+ FLOAT_IMAGINARY: :on_imaginary,
78
+ FLOAT_RATIONAL: :on_rational,
79
+ FLOAT_RATIONAL_IMAGINARY: :on_imaginary,
80
+ GREATER: :on_op,
81
+ GREATER_EQUAL: :on_op,
82
+ GREATER_GREATER: :on_op,
83
+ GREATER_GREATER_EQUAL: :on_op,
84
+ GLOBAL_VARIABLE: :on_gvar,
85
+ HEREDOC_END: :on_heredoc_end,
86
+ HEREDOC_START: :on_heredoc_beg,
87
+ IDENTIFIER: :on_ident,
88
+ IGNORED_NEWLINE: :on_ignored_nl,
89
+ INTEGER: :on_int,
90
+ INTEGER_IMAGINARY: :on_imaginary,
91
+ INTEGER_RATIONAL: :on_rational,
92
+ INTEGER_RATIONAL_IMAGINARY: :on_imaginary,
93
+ INSTANCE_VARIABLE: :on_ivar,
94
+ INVALID: :INVALID,
95
+ KEYWORD___ENCODING__: :on_kw,
96
+ KEYWORD___LINE__: :on_kw,
97
+ KEYWORD___FILE__: :on_kw,
98
+ KEYWORD_ALIAS: :on_kw,
99
+ KEYWORD_AND: :on_kw,
100
+ KEYWORD_BEGIN: :on_kw,
101
+ KEYWORD_BEGIN_UPCASE: :on_kw,
102
+ KEYWORD_BREAK: :on_kw,
103
+ KEYWORD_CASE: :on_kw,
104
+ KEYWORD_CLASS: :on_kw,
105
+ KEYWORD_DEF: :on_kw,
106
+ KEYWORD_DEFINED: :on_kw,
107
+ KEYWORD_DO: :on_kw,
108
+ KEYWORD_DO_LOOP: :on_kw,
109
+ KEYWORD_ELSE: :on_kw,
110
+ KEYWORD_ELSIF: :on_kw,
111
+ KEYWORD_END: :on_kw,
112
+ KEYWORD_END_UPCASE: :on_kw,
113
+ KEYWORD_ENSURE: :on_kw,
114
+ KEYWORD_FALSE: :on_kw,
115
+ KEYWORD_FOR: :on_kw,
116
+ KEYWORD_IF: :on_kw,
117
+ KEYWORD_IF_MODIFIER: :on_kw,
118
+ KEYWORD_IN: :on_kw,
119
+ KEYWORD_MODULE: :on_kw,
120
+ KEYWORD_NEXT: :on_kw,
121
+ KEYWORD_NIL: :on_kw,
122
+ KEYWORD_NOT: :on_kw,
123
+ KEYWORD_OR: :on_kw,
124
+ KEYWORD_REDO: :on_kw,
125
+ KEYWORD_RESCUE: :on_kw,
126
+ KEYWORD_RESCUE_MODIFIER: :on_kw,
127
+ KEYWORD_RETRY: :on_kw,
128
+ KEYWORD_RETURN: :on_kw,
129
+ KEYWORD_SELF: :on_kw,
130
+ KEYWORD_SUPER: :on_kw,
131
+ KEYWORD_THEN: :on_kw,
132
+ KEYWORD_TRUE: :on_kw,
133
+ KEYWORD_UNDEF: :on_kw,
134
+ KEYWORD_UNLESS: :on_kw,
135
+ KEYWORD_UNLESS_MODIFIER: :on_kw,
136
+ KEYWORD_UNTIL: :on_kw,
137
+ KEYWORD_UNTIL_MODIFIER: :on_kw,
138
+ KEYWORD_WHEN: :on_kw,
139
+ KEYWORD_WHILE: :on_kw,
140
+ KEYWORD_WHILE_MODIFIER: :on_kw,
141
+ KEYWORD_YIELD: :on_kw,
142
+ LABEL: :on_label,
143
+ LABEL_END: :on_label_end,
144
+ LAMBDA_BEGIN: :on_tlambeg,
145
+ LESS: :on_op,
146
+ LESS_EQUAL: :on_op,
147
+ LESS_EQUAL_GREATER: :on_op,
148
+ LESS_LESS: :on_op,
149
+ LESS_LESS_EQUAL: :on_op,
150
+ METHOD_NAME: :on_ident,
151
+ MINUS: :on_op,
152
+ MINUS_EQUAL: :on_op,
153
+ MINUS_GREATER: :on_tlambda,
154
+ NEWLINE: :on_nl,
155
+ NUMBERED_REFERENCE: :on_backref,
156
+ PARENTHESIS_LEFT: :on_lparen,
157
+ PARENTHESIS_LEFT_PARENTHESES: :on_lparen,
158
+ PARENTHESIS_RIGHT: :on_rparen,
159
+ PERCENT: :on_op,
160
+ PERCENT_EQUAL: :on_op,
161
+ PERCENT_LOWER_I: :on_qsymbols_beg,
162
+ PERCENT_LOWER_W: :on_qwords_beg,
163
+ PERCENT_LOWER_X: :on_backtick,
164
+ PERCENT_UPPER_I: :on_symbols_beg,
165
+ PERCENT_UPPER_W: :on_words_beg,
166
+ PIPE: :on_op,
167
+ PIPE_EQUAL: :on_op,
168
+ PIPE_PIPE: :on_op,
169
+ PIPE_PIPE_EQUAL: :on_op,
170
+ PLUS: :on_op,
171
+ PLUS_EQUAL: :on_op,
172
+ QUESTION_MARK: :on_op,
173
+ RATIONAL_FLOAT: :on_rational,
174
+ RATIONAL_INTEGER: :on_rational,
175
+ REGEXP_BEGIN: :on_regexp_beg,
176
+ REGEXP_END: :on_regexp_end,
177
+ SEMICOLON: :on_semicolon,
178
+ SLASH: :on_op,
179
+ SLASH_EQUAL: :on_op,
180
+ STAR: :on_op,
181
+ STAR_EQUAL: :on_op,
182
+ STAR_STAR: :on_op,
183
+ STAR_STAR_EQUAL: :on_op,
184
+ STRING_BEGIN: :on_tstring_beg,
185
+ STRING_CONTENT: :on_tstring_content,
186
+ STRING_END: :on_tstring_end,
187
+ SYMBOL_BEGIN: :on_symbeg,
188
+ TILDE: :on_op,
189
+ UAMPERSAND: :on_op,
190
+ UCOLON_COLON: :on_op,
191
+ UDOT_DOT: :on_op,
192
+ UDOT_DOT_DOT: :on_op,
193
+ UMINUS: :on_op,
194
+ UMINUS_NUM: :on_op,
195
+ UPLUS: :on_op,
196
+ USTAR: :on_op,
197
+ USTAR_STAR: :on_op,
198
+ WORDS_SEP: :on_words_sep,
199
+ "__END__": :on___end__
200
+ }.freeze
201
+
202
+ # When we produce tokens, we produce the same arrays that Ripper does.
203
+ # However, we add a couple of convenience methods onto them to make them a
204
+ # little easier to work with. We delegate all other methods to the array.
205
+ class Token < SimpleDelegator
206
+ # @dynamic initialize, each, []
207
+
208
+ # The location of the token in the source.
209
+ def location
210
+ self[0]
211
+ end
212
+
213
+ # The type of the token.
214
+ def event
215
+ self[1]
216
+ end
217
+
218
+ # The slice of the source that this token represents.
219
+ def value
220
+ self[2]
221
+ end
222
+
223
+ # The state of the lexer when this token was produced.
224
+ def state
225
+ self[3]
226
+ end
227
+ end
228
+
229
+ # Ripper doesn't include the rest of the token in the event, so we need to
230
+ # trim it down to just the content on the first line when comparing.
231
+ class EndContentToken < Token
232
+ def ==(other) # :nodoc:
233
+ [self[0], self[1], self[2][0..self[2].index("\n")], self[3]] == other
234
+ end
235
+ end
236
+
237
+ # Tokens where state should be ignored
238
+ # used for :on_comment, :on_heredoc_end, :on_embexpr_end
239
+ class IgnoreStateToken < Token
240
+ def ==(other) # :nodoc:
241
+ self[0...-1] == other[0...-1]
242
+ end
243
+ end
244
+
245
+ # Ident tokens for the most part are exactly the same, except sometimes we
246
+ # know an ident is a local when ripper doesn't (when they are introduced
247
+ # through named captures in regular expressions). In that case we don't
248
+ # compare the state.
249
+ class IdentToken < Token
250
+ def ==(other) # :nodoc:
251
+ (self[0...-1] == other[0...-1]) && (
252
+ (other[3] == Ripper::EXPR_LABEL | Ripper::EXPR_END) ||
253
+ (other[3] & Ripper::EXPR_ARG_ANY != 0)
254
+ )
255
+ end
256
+ end
257
+
258
+ # Ignored newlines can occasionally have a LABEL state attached to them, so
259
+ # we compare the state differently here.
260
+ class IgnoredNewlineToken < Token
261
+ def ==(other) # :nodoc:
262
+ return false unless self[0...-1] == other[0...-1]
263
+
264
+ if self[3] == Ripper::EXPR_ARG | Ripper::EXPR_LABELED
265
+ other[3] & Ripper::EXPR_ARG | Ripper::EXPR_LABELED != 0
266
+ else
267
+ self[3] == other[3]
268
+ end
269
+ end
270
+ end
271
+
272
+ # If we have an identifier that follows a method name like:
273
+ #
274
+ # def foo bar
275
+ #
276
+ # then Ripper will mark bar as END|LABEL if there is a local in a parent
277
+ # scope named bar because it hasn't pushed the local table yet. We do this
278
+ # more accurately, so we need to allow comparing against both END and
279
+ # END|LABEL.
280
+ class ParamToken < Token
281
+ def ==(other) # :nodoc:
282
+ (self[0...-1] == other[0...-1]) && (
283
+ (other[3] == Ripper::EXPR_END) ||
284
+ (other[3] == Ripper::EXPR_END | Ripper::EXPR_LABEL)
285
+ )
286
+ end
287
+ end
288
+
289
+ # A heredoc in this case is a list of tokens that belong to the body of the
290
+ # heredoc that should be appended onto the list of tokens when the heredoc
291
+ # closes.
292
+ module Heredoc # :nodoc:
293
+ # Heredocs that are no dash or tilde heredocs are just a list of tokens.
294
+ # We need to keep them around so that we can insert them in the correct
295
+ # order back into the token stream and set the state of the last token to
296
+ # the state that the heredoc was opened in.
297
+ class PlainHeredoc # :nodoc:
298
+ attr_reader :tokens
299
+
300
+ def initialize
301
+ @tokens = []
302
+ end
303
+
304
+ def <<(token)
305
+ tokens << token
306
+ end
307
+
308
+ def to_a
309
+ tokens
310
+ end
311
+ end
312
+
313
+ # Dash heredocs are a little more complicated. They are a list of tokens
314
+ # that need to be split on "\\\n" to mimic Ripper's behavior. We also need
315
+ # to keep track of the state that the heredoc was opened in.
316
+ class DashHeredoc # :nodoc:
317
+ attr_reader :split, :tokens
318
+
319
+ def initialize(split)
320
+ @split = split
321
+ @tokens = []
322
+ end
323
+
324
+ def <<(token)
325
+ tokens << token
326
+ end
327
+
328
+ def to_a
329
+ embexpr_balance = 0
330
+
331
+ tokens.each_with_object([]) do |token, results| #$ Array[Token]
332
+ case token.event
333
+ when :on_embexpr_beg
334
+ embexpr_balance += 1
335
+ results << token
336
+ when :on_embexpr_end
337
+ embexpr_balance -= 1
338
+ results << token
339
+ when :on_tstring_content
340
+ if embexpr_balance == 0
341
+ lineno = token[0][0]
342
+ column = token[0][1]
343
+
344
+ if split
345
+ # Split on "\\\n" to mimic Ripper's behavior. Use a lookbehind
346
+ # to keep the delimiter in the result.
347
+ token.value.split(/(?<=[^\\]\\\n)|(?<=[^\\]\\\r\n)/).each_with_index do |value, index|
348
+ column = 0 if index > 0
349
+ results << Token.new([[lineno, column], :on_tstring_content, value, token.state])
350
+ lineno += value.count("\n")
351
+ end
352
+ else
353
+ results << token
354
+ end
355
+ else
356
+ results << token
357
+ end
358
+ else
359
+ results << token
360
+ end
361
+ end
362
+ end
363
+ end
364
+
365
+ # Heredocs that are dedenting heredocs are a little more complicated.
366
+ # Ripper outputs on_ignored_sp tokens for the whitespace that is being
367
+ # removed from the output. prism only modifies the node itself and keeps
368
+ # the token the same. This simplifies prism, but makes comparing against
369
+ # Ripper much harder because there is a length mismatch.
370
+ #
371
+ # Fortunately, we already have to pull out the heredoc tokens in order to
372
+ # insert them into the stream in the correct order. As such, we can do
373
+ # some extra manipulation on the tokens to make them match Ripper's
374
+ # output by mirroring the dedent logic that Ripper uses.
375
+ class DedentingHeredoc # :nodoc:
376
+ TAB_WIDTH = 8
377
+
378
+ attr_reader :tokens, :dedent_next, :dedent, :embexpr_balance
379
+
380
+ def initialize
381
+ @tokens = []
382
+ @dedent_next = true
383
+ @dedent = nil
384
+ @embexpr_balance = 0
385
+ @ended_on_newline = false
386
+ end
387
+
388
+ # As tokens are coming in, we track the minimum amount of common leading
389
+ # whitespace on plain string content tokens. This allows us to later
390
+ # remove that amount of whitespace from the beginning of each line.
391
+ def <<(token)
392
+ case token.event
393
+ when :on_embexpr_beg, :on_heredoc_beg
394
+ @embexpr_balance += 1
395
+ @dedent = 0 if @dedent_next && @ended_on_newline
396
+ when :on_embexpr_end, :on_heredoc_end
397
+ @embexpr_balance -= 1
398
+ when :on_tstring_content
399
+ if embexpr_balance == 0
400
+ line = token.value
401
+
402
+ if dedent_next && !(line.strip.empty? && line.end_with?("\n"))
403
+ leading = line[/\A(\s*)\n?/, 1]
404
+ next_dedent = 0
405
+
406
+ leading.each_char do |char|
407
+ if char == "\t"
408
+ next_dedent = next_dedent - (next_dedent % TAB_WIDTH) + TAB_WIDTH
409
+ else
410
+ next_dedent += 1
411
+ end
412
+ end
413
+
414
+ @dedent = [dedent, next_dedent].compact.min
415
+ @dedent_next = true
416
+ @ended_on_newline = line.end_with?("\n")
417
+ tokens << token
418
+ return
419
+ end
420
+ end
421
+ end
422
+
423
+ @dedent_next = token.event == :on_tstring_content && embexpr_balance == 0
424
+ @ended_on_newline = false
425
+ tokens << token
426
+ end
427
+
428
+ def to_a
429
+ # If every line in the heredoc is blank, we still need to split up the
430
+ # string content token into multiple tokens.
431
+ if dedent.nil?
432
+ results = [] #: Array[Token]
433
+ embexpr_balance = 0
434
+
435
+ tokens.each do |token|
436
+ case token.event
437
+ when :on_embexpr_beg, :on_heredoc_beg
438
+ embexpr_balance += 1
439
+ results << token
440
+ when :on_embexpr_end, :on_heredoc_end
441
+ embexpr_balance -= 1
442
+ results << token
443
+ when :on_tstring_content
444
+ if embexpr_balance == 0
445
+ lineno = token[0][0]
446
+ column = token[0][1]
447
+
448
+ token.value.split(/(?<=\n)/).each_with_index do |value, index|
449
+ column = 0 if index > 0
450
+ results << Token.new([[lineno, column], :on_tstring_content, value, token.state])
451
+ lineno += 1
452
+ end
453
+ else
454
+ results << token
455
+ end
456
+ else
457
+ results << token
458
+ end
459
+ end
460
+
461
+ return results
462
+ end
463
+
464
+ # If the minimum common whitespace is 0, then we need to concatenate
465
+ # string nodes together that are immediately adjacent.
466
+ if dedent == 0
467
+ results = [] #: Array[Token]
468
+ embexpr_balance = 0
469
+
470
+ index = 0
471
+ max_index = tokens.length
472
+
473
+ while index < max_index
474
+ token = tokens[index]
475
+ results << token
476
+ index += 1
477
+
478
+ case token.event
479
+ when :on_embexpr_beg, :on_heredoc_beg
480
+ embexpr_balance += 1
481
+ when :on_embexpr_end, :on_heredoc_end
482
+ embexpr_balance -= 1
483
+ when :on_tstring_content
484
+ if embexpr_balance == 0
485
+ while index < max_index && tokens[index].event == :on_tstring_content && !token.value.match?(/\\\r?\n\z/)
486
+ token.value << tokens[index].value
487
+ index += 1
488
+ end
489
+ end
490
+ end
491
+ end
492
+
493
+ return results
494
+ end
495
+
496
+ # Otherwise, we're going to run through each token in the list and
497
+ # insert on_ignored_sp tokens for the amount of dedent that we need to
498
+ # perform. We also need to remove the dedent from the beginning of
499
+ # each line of plain string content tokens.
500
+ results = [] #: Array[Token]
501
+ dedent_next = true
502
+ embexpr_balance = 0
503
+
504
+ tokens.each do |token|
505
+ # Notice that the structure of this conditional largely matches the
506
+ # whitespace calculation we performed above. This is because
507
+ # checking if the subsequent token needs to be dedented is common to
508
+ # both the dedent calculation and the ignored_sp insertion.
509
+ case token.event
510
+ when :on_embexpr_beg
511
+ embexpr_balance += 1
512
+ results << token
513
+ when :on_embexpr_end
514
+ embexpr_balance -= 1
515
+ results << token
516
+ when :on_tstring_content
517
+ if embexpr_balance == 0
518
+ # Here we're going to split the string on newlines, but maintain
519
+ # the newlines in the resulting array. We'll do that with a look
520
+ # behind assertion.
521
+ splits = token.value.split(/(?<=\n)/)
522
+ index = 0
523
+
524
+ while index < splits.length
525
+ line = splits[index]
526
+ lineno = token[0][0] + index
527
+ column = token[0][1]
528
+
529
+ # Blank lines do not count toward common leading whitespace
530
+ # calculation and do not need to be dedented.
531
+ if dedent_next || index > 0
532
+ column = 0
533
+ end
534
+
535
+ # If the dedent is 0 and we're not supposed to dedent the next
536
+ # line or this line doesn't start with whitespace, then we
537
+ # should concatenate the rest of the string to match ripper.
538
+ if dedent == 0 && (!dedent_next || !line.start_with?(/\s/))
539
+ line = splits[index..].join
540
+ index = splits.length
541
+ end
542
+
543
+ # If we are supposed to dedent this line or if this is not the
544
+ # first line of the string and this line isn't entirely blank,
545
+ # then we need to insert an on_ignored_sp token and remove the
546
+ # dedent from the beginning of the line.
547
+ if (dedent > 0) && (dedent_next || index > 0)
548
+ deleting = 0
549
+ deleted_chars = [] #: Array[String]
550
+
551
+ # Gather up all of the characters that we're going to
552
+ # delete, stopping when you hit a character that would put
553
+ # you over the dedent amount.
554
+ line.each_char.with_index do |char, i|
555
+ case char
556
+ when "\r"
557
+ if line[i + 1] == "\n"
558
+ break
559
+ end
560
+ when "\n"
561
+ break
562
+ when "\t"
563
+ deleting = deleting - (deleting % TAB_WIDTH) + TAB_WIDTH
564
+ else
565
+ deleting += 1
566
+ end
567
+
568
+ break if deleting > dedent
569
+ deleted_chars << char
570
+ end
571
+
572
+ # If we have something to delete, then delete it from the
573
+ # string and insert an on_ignored_sp token.
574
+ if deleted_chars.any?
575
+ ignored = deleted_chars.join
576
+ line.delete_prefix!(ignored)
577
+
578
+ results << Token.new([[lineno, 0], :on_ignored_sp, ignored, token[3]])
579
+ column = ignored.length
580
+ end
581
+ end
582
+
583
+ results << Token.new([[lineno, column], token[1], line, token[3]]) unless line.empty?
584
+ index += 1
585
+ end
586
+ else
587
+ results << token
588
+ end
589
+ else
590
+ results << token
591
+ end
592
+
593
+ dedent_next =
594
+ ((token.event == :on_tstring_content) || (token.event == :on_heredoc_end)) &&
595
+ embexpr_balance == 0
596
+ end
597
+
598
+ results
599
+ end
600
+ end
601
+
602
+ # Here we will split between the two types of heredocs and return the
603
+ # object that will store their tokens.
604
+ def self.build(opening)
605
+ case opening.value[2]
606
+ when "~"
607
+ DedentingHeredoc.new
608
+ when "-"
609
+ DashHeredoc.new(opening.value[3] != "'")
610
+ else
611
+ PlainHeredoc.new
612
+ end
613
+ end
614
+ end
615
+
616
+ private_constant :Heredoc
617
+
618
+ attr_reader :source, :options
619
+
620
+ def initialize(source, **options)
621
+ @source = source
622
+ @options = options
623
+ end
624
+
625
+ def result
626
+ tokens = [] #: Array[LexCompat::Token]
627
+
628
+ state = :default
629
+ heredoc_stack = [[]] #: Array[Array[Heredoc::PlainHeredoc | Heredoc::DashHeredoc | Heredoc::DedentingHeredoc]]
630
+
631
+ result = Prism.lex(source, **options)
632
+ result_value = result.value
633
+ previous_state = nil #: Ripper::Lexer::State?
634
+ last_heredoc_end = nil #: Integer?
635
+
636
+ # In previous versions of Ruby, Ripper wouldn't flush the bom before the
637
+ # first token, so we had to have a hack in place to account for that. This
638
+ # checks for that behavior.
639
+ bom_flushed = Ripper.lex("\xEF\xBB\xBF# test")[0][0][1] == 0
640
+ bom = source.byteslice(0..2) == "\xEF\xBB\xBF"
641
+
642
+ result_value.each_with_index do |(token, lex_state), index|
643
+ lineno = token.location.start_line
644
+ column = token.location.start_column
645
+
646
+ # If there's a UTF-8 byte-order mark as the start of the file, then for
647
+ # certain tokens ripper sets the first token back by 3 bytes. It also
648
+ # keeps the byte order mark in the first token's value. This is weird,
649
+ # and I don't want to mirror that in our parser. So instead, we'll match
650
+ # up the columns and values here.
651
+ if bom && lineno == 1
652
+ column -= 3
653
+
654
+ if index == 0 && column == 0 && !bom_flushed
655
+ flushed =
656
+ case token.type
657
+ when :BACK_REFERENCE, :INSTANCE_VARIABLE, :CLASS_VARIABLE,
658
+ :GLOBAL_VARIABLE, :NUMBERED_REFERENCE, :PERCENT_LOWER_I,
659
+ :PERCENT_LOWER_X, :PERCENT_LOWER_W, :PERCENT_UPPER_I,
660
+ :PERCENT_UPPER_W, :STRING_BEGIN
661
+ true
662
+ when :REGEXP_BEGIN, :SYMBOL_BEGIN
663
+ token.value.start_with?("%")
664
+ else
665
+ false
666
+ end
667
+
668
+ unless flushed
669
+ column -= 3
670
+ value = token.value
671
+ value.prepend(String.new("\xEF\xBB\xBF", encoding: value.encoding))
672
+ end
673
+ end
674
+ end
675
+
676
+ event = RIPPER.fetch(token.type)
677
+ value = token.value
678
+ lex_state = Ripper::Lexer::State.new(lex_state)
679
+
680
+ token =
681
+ case event
682
+ when :on___end__
683
+ EndContentToken.new([[lineno, column], event, value, lex_state])
684
+ when :on_comment
685
+ IgnoreStateToken.new([[lineno, column], event, value, lex_state])
686
+ when :on_heredoc_end
687
+ # Heredoc end tokens can be emitted in an odd order, so we don't
688
+ # want to bother comparing the state on them.
689
+ last_heredoc_end = token.location.end_offset
690
+ IgnoreStateToken.new([[lineno, column], event, value, lex_state])
691
+ when :on_ident
692
+ if lex_state == Ripper::EXPR_END
693
+ # If we have an identifier that follows a method name like:
694
+ #
695
+ # def foo bar
696
+ #
697
+ # then Ripper will mark bar as END|LABEL if there is a local in a
698
+ # parent scope named bar because it hasn't pushed the local table
699
+ # yet. We do this more accurately, so we need to allow comparing
700
+ # against both END and END|LABEL.
701
+ ParamToken.new([[lineno, column], event, value, lex_state])
702
+ elsif lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL
703
+ # In the event that we're comparing identifiers, we're going to
704
+ # allow a little divergence. Ripper doesn't account for local
705
+ # variables introduced through named captures in regexes, and we
706
+ # do, which accounts for this difference.
707
+ IdentToken.new([[lineno, column], event, value, lex_state])
708
+ else
709
+ Token.new([[lineno, column], event, value, lex_state])
710
+ end
711
+ when :on_embexpr_end
712
+ IgnoreStateToken.new([[lineno, column], event, value, lex_state])
713
+ when :on_ignored_nl
714
+ # Ignored newlines can occasionally have a LABEL state attached to
715
+ # them which doesn't actually impact anything. We don't mirror that
716
+ # state so we ignored it.
717
+ IgnoredNewlineToken.new([[lineno, column], event, value, lex_state])
718
+ when :on_regexp_end
719
+ # On regex end, Ripper scans and then sets end state, so the ripper
720
+ # lexed output is begin, when it should be end. prism sets lex state
721
+ # correctly to end state, but we want to be able to compare against
722
+ # Ripper's lexed state. So here, if it's a regexp end token, we
723
+ # output the state as the previous state, solely for the sake of
724
+ # comparison.
725
+ previous_token = result_value[index - 1][0]
726
+ lex_state =
727
+ if RIPPER.fetch(previous_token.type) == :on_embexpr_end
728
+ # If the previous token is embexpr_end, then we have to do even
729
+ # more processing. The end of an embedded expression sets the
730
+ # state to the state that it had at the beginning of the
731
+ # embedded expression. So we have to go and find that state and
732
+ # set it here.
733
+ counter = 1
734
+ current_index = index - 1
735
+
736
+ until counter == 0
737
+ current_index -= 1
738
+ current_event = RIPPER.fetch(result_value[current_index][0].type)
739
+ counter += { on_embexpr_beg: -1, on_embexpr_end: 1 }[current_event] || 0
740
+ end
741
+
742
+ Ripper::Lexer::State.new(result_value[current_index][1])
743
+ else
744
+ previous_state
745
+ end
746
+
747
+ Token.new([[lineno, column], event, value, lex_state])
748
+ when :on_eof
749
+ previous_token = result_value[index - 1][0]
750
+
751
+ # If we're at the end of the file and the previous token was a
752
+ # comment and there is still whitespace after the comment, then
753
+ # Ripper will append a on_nl token (even though there isn't
754
+ # necessarily a newline). We mirror that here.
755
+ if previous_token.type == :COMMENT
756
+ # If the comment is at the start of a heredoc: <<HEREDOC # comment
757
+ # then the comment's end_offset is up near the heredoc_beg.
758
+ # This is not the correct offset to use for figuring out if
759
+ # there is trailing whitespace after the last token.
760
+ # Use the greater offset of the two to determine the start of
761
+ # the trailing whitespace.
762
+ start_offset = [previous_token.location.end_offset, last_heredoc_end].compact.max
763
+ end_offset = token.location.start_offset
764
+
765
+ if start_offset < end_offset
766
+ if bom
767
+ start_offset += 3
768
+ end_offset += 3
769
+ end
770
+
771
+ tokens << Token.new([[lineno, 0], :on_nl, source.byteslice(start_offset...end_offset), lex_state])
772
+ end
773
+ end
774
+
775
+ Token.new([[lineno, column], event, value, lex_state])
776
+ else
777
+ Token.new([[lineno, column], event, value, lex_state])
778
+ end
779
+
780
+ previous_state = lex_state
781
+
782
+ # The order in which tokens appear in our lexer is different from the
783
+ # order that they appear in Ripper. When we hit the declaration of a
784
+ # heredoc in prism, we skip forward and lex the rest of the content of
785
+ # the heredoc before going back and lexing at the end of the heredoc
786
+ # identifier.
787
+ #
788
+ # To match up to ripper, we keep a small state variable around here to
789
+ # track whether we're in the middle of a heredoc or not. In this way we
790
+ # can shuffle around the token to match Ripper's output.
791
+ case state
792
+ when :default
793
+ # The default state is when there are no heredocs at all. In this
794
+ # state we can append the token to the list of tokens and move on.
795
+ tokens << token
796
+
797
+ # If we get the declaration of a heredoc, then we open a new heredoc
798
+ # and move into the heredoc_opened state.
799
+ if event == :on_heredoc_beg
800
+ state = :heredoc_opened
801
+ heredoc_stack.last << Heredoc.build(token)
802
+ end
803
+ when :heredoc_opened
804
+ # The heredoc_opened state is when we've seen the declaration of a
805
+ # heredoc and are now lexing the body of the heredoc. In this state we
806
+ # push tokens onto the most recently created heredoc.
807
+ heredoc_stack.last.last << token
808
+
809
+ case event
810
+ when :on_heredoc_beg
811
+ # If we receive a heredoc declaration while lexing the body of a
812
+ # heredoc, this means we have nested heredocs. In this case we'll
813
+ # push a new heredoc onto the stack and stay in the heredoc_opened
814
+ # state since we're now lexing the body of the new heredoc.
815
+ heredoc_stack << [Heredoc.build(token)]
816
+ when :on_heredoc_end
817
+ # If we receive the end of a heredoc, then we're done lexing the
818
+ # body of the heredoc. In this case we now have a completed heredoc
819
+ # but need to wait for the next newline to push it into the token
820
+ # stream.
821
+ state = :heredoc_closed
822
+ end
823
+ when :heredoc_closed
824
+ if %i[on_nl on_ignored_nl on_comment].include?(event) || (event == :on_tstring_content && value.end_with?("\n"))
825
+ if heredoc_stack.size > 1
826
+ flushing = heredoc_stack.pop
827
+ heredoc_stack.last.last << token
828
+
829
+ flushing.each do |heredoc|
830
+ heredoc.to_a.each do |flushed_token|
831
+ heredoc_stack.last.last << flushed_token
832
+ end
833
+ end
834
+
835
+ state = :heredoc_opened
836
+ next
837
+ end
838
+ elsif event == :on_heredoc_beg
839
+ tokens << token
840
+ state = :heredoc_opened
841
+ heredoc_stack.last << Heredoc.build(token)
842
+ next
843
+ elsif heredoc_stack.size > 1
844
+ heredoc_stack[-2].last << token
845
+ next
846
+ end
847
+
848
+ heredoc_stack.last.each do |heredoc|
849
+ tokens.concat(heredoc.to_a)
850
+ end
851
+
852
+ heredoc_stack.last.clear
853
+ state = :default
854
+
855
+ tokens << token
856
+ end
857
+ end
858
+
859
+ # Drop the EOF token from the list
860
+ tokens = tokens[0...-1]
861
+
862
+ # We sort by location to compare against Ripper's output
863
+ tokens.sort_by!(&:location)
864
+
865
+ Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, Source.for(source))
866
+ end
867
+ end
868
+
869
+ private_constant :LexCompat
870
+
871
+ # This is a class that wraps the Ripper lexer to produce almost exactly the
872
+ # same tokens.
873
+ class LexRipper # :nodoc:
874
+ attr_reader :source
875
+
876
+ def initialize(source)
877
+ @source = source
878
+ end
879
+
880
+ def result
881
+ previous = [] #: [[Integer, Integer], Symbol, String, untyped] | []
882
+ results = [] #: Array[[[Integer, Integer], Symbol, String, untyped]]
883
+
884
+ lex(source).each do |token|
885
+ case token[1]
886
+ when :on_sp
887
+ # skip
888
+ when :on_tstring_content
889
+ if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@"))
890
+ previous[2] << token[2]
891
+ else
892
+ results << token
893
+ previous = token
894
+ end
895
+ when :on_words_sep
896
+ if previous[1] == :on_words_sep
897
+ previous[2] << token[2]
898
+ else
899
+ results << token
900
+ previous = token
901
+ end
902
+ else
903
+ results << token
904
+ previous = token
905
+ end
906
+ end
907
+
908
+ results
909
+ end
910
+
911
+ private
912
+
913
+ if Ripper.method(:lex).parameters.assoc(:keyrest)
914
+ def lex(source)
915
+ Ripper.lex(source, raise_errors: true)
916
+ end
917
+ else
918
+ def lex(source)
919
+ ripper = Ripper::Lexer.new(source)
920
+ ripper.lex.tap do |result|
921
+ raise SyntaxError, ripper.errors.map(&:message).join(' ;') if ripper.errors.any?
922
+ end
923
+ end
924
+ end
925
+ end
926
+
927
+ private_constant :LexRipper
928
+ end