prism 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ea89f88aef2ec51d2cfb5868cf873ab256393f5ba632381a3a53631c2506dbc3
4
- data.tar.gz: 4da85d79e85d5cca843eb6e71cea07efc80a6e77608aa3027e1cd75c3c3b735a
3
+ metadata.gz: f16f842a06eec8141246c60a39f509a59817de34ab4be3a33502ada040ac1602
4
+ data.tar.gz: 38bd30b4ba63fe67892a0138be5b7b2c84fb6bd66e011871a07ee453be7b0aec
5
5
  SHA512:
6
- metadata.gz: 1b7f92a58fa176b04aab230f49dcdc08f9b810575426402db6fc9eee0921ac5ddddf1f01f18bb9429a914955759e8d4cee5a70674e14eb0b0169018ce615780e
7
- data.tar.gz: e4a5a6ba40bc7692c6c904f452dd8ff18881703c99d51969507619368748eec29936dc3f521e8967b6d728ea331f26f9f670b44daedde2106f649104aefe0c30
6
+ metadata.gz: 9877dc80270515e91c5357418a67721fa832c6de44d755047f576d69a3b09129d64da19e5ac767d184df74afe40982f56c88be3851ce7fc7c99c1d0bbf15ec77
7
+ data.tar.gz: 4578c2f1e2e934f763d6c55ae84dc076b0676ac0560c13364db1ade75425da76d64b7a4cf8fb07ea723568fbfc58c20fb8055e9dec364ac5da05765d26398d39
data/CHANGELOG.md CHANGED
@@ -6,6 +6,21 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [1.2.0] - 2024-10-10
10
+
11
+ ### Added
12
+
13
+ - Introduce `Prism::CodeUnitsCache`.
14
+
15
+ ### Changed
16
+
17
+ - Properly handle lexing global variables that begin with `$-`.
18
+ - Properly reject invalid multi writes within parentheses.
19
+ - Fix unary `*` binding power.
20
+ - Set `contains_keywords` flag for implicit `gets` calls when `-p` is used.
21
+ - Properly reject invalid non-associative operator patterns.
22
+ - Do not warn about unused variables declared on negative lines.
23
+
9
24
  ## [1.1.0] - 2024-10-02
10
25
 
11
26
  ### Added
@@ -591,7 +606,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
591
606
 
592
607
  - 🎉 Initial release! 🎉
593
608
 
594
- [unreleased]: https://github.com/ruby/prism/compare/v1.1.0...HEAD
609
+ [unreleased]: https://github.com/ruby/prism/compare/v1.2.0...HEAD
610
+ [1.2.0]: https://github.com/ruby/prism/compare/v1.1.0...v1.2.0
595
611
  [1.1.0]: https://github.com/ruby/prism/compare/v1.0.0...v1.1.0
596
612
  [1.0.0]: https://github.com/ruby/prism/compare/v0.30.0...v1.0.0
597
613
  [0.30.0]: https://github.com/ruby/prism/compare/v0.29.0...v0.30.0
data/config.yml CHANGED
@@ -141,6 +141,7 @@ errors:
141
141
  - INSTANCE_VARIABLE_BARE
142
142
  - INVALID_BLOCK_EXIT
143
143
  - INVALID_CHARACTER
144
+ - INVALID_COMMA
144
145
  - INVALID_ENCODING_MAGIC_COMMENT
145
146
  - INVALID_ESCAPE_CHARACTER
146
147
  - INVALID_FLOAT_EXPONENT
@@ -3684,7 +3685,7 @@ nodes:
3684
3685
  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3685
3686
  end
3686
3687
 
3687
- `Foo, *splat, Bar` are in the `exceptions` field. `ex` is in the `exception` field.
3688
+ `Foo, *splat, Bar` are in the `exceptions` field. `ex` is in the `reference` field.
3688
3689
  - name: RestParameterNode
3689
3690
  flags: ParameterFlags
3690
3691
  fields:
@@ -1,7 +1,7 @@
1
1
  #ifndef PRISM_EXT_NODE_H
2
2
  #define PRISM_EXT_NODE_H
3
3
 
4
- #define EXPECTED_PRISM_VERSION "1.1.0"
4
+ #define EXPECTED_PRISM_VERSION "1.2.0"
5
5
 
6
6
  #include <ruby.h>
7
7
  #include <ruby/encoding.h>
data/include/prism/ast.h CHANGED
@@ -6490,7 +6490,7 @@ typedef struct pm_rescue_modifier_node {
6490
6490
  * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
6491
6491
  * end
6492
6492
  *
6493
- * `Foo, *splat, Bar` are in the `exceptions` field. `ex` is in the `exception` field.
6493
+ * `Foo, *splat, Bar` are in the `exceptions` field. `ex` is in the `reference` field.
6494
6494
  *
6495
6495
  * Type: ::PM_RESCUE_NODE
6496
6496
  *
@@ -170,6 +170,7 @@ typedef enum {
170
170
  PM_ERR_INSTANCE_VARIABLE_BARE,
171
171
  PM_ERR_INVALID_BLOCK_EXIT,
172
172
  PM_ERR_INVALID_CHARACTER,
173
+ PM_ERR_INVALID_COMMA,
173
174
  PM_ERR_INVALID_ENCODING_MAGIC_COMMENT,
174
175
  PM_ERR_INVALID_ESCAPE_CHARACTER,
175
176
  PM_ERR_INVALID_FLOAT_EXPONENT,
@@ -82,6 +82,23 @@ typedef enum {
82
82
  PM_HEREDOC_INDENT_TILDE,
83
83
  } pm_heredoc_indent_t;
84
84
 
85
+ /**
86
+ * All of the information necessary to store to lexing a heredoc.
87
+ */
88
+ typedef struct {
89
+ /** A pointer to the start of the heredoc identifier. */
90
+ const uint8_t *ident_start;
91
+
92
+ /** The length of the heredoc identifier. */
93
+ size_t ident_length;
94
+
95
+ /** The type of quote that the heredoc uses. */
96
+ pm_heredoc_quote_t quote;
97
+
98
+ /** The type of indentation that the heredoc uses. */
99
+ pm_heredoc_indent_t indent;
100
+ } pm_heredoc_lex_mode_t;
101
+
85
102
  /**
86
103
  * When lexing Ruby source, the lexer has a small amount of state to tell which
87
104
  * kind of token it is currently lexing. For example, when we find the start of
@@ -210,17 +227,10 @@ typedef struct pm_lex_mode {
210
227
  } string;
211
228
 
212
229
  struct {
213
- /** A pointer to the start of the heredoc identifier. */
214
- const uint8_t *ident_start;
215
-
216
- /** The length of the heredoc identifier. */
217
- size_t ident_length;
218
-
219
- /** The type of quote that the heredoc uses. */
220
- pm_heredoc_quote_t quote;
221
-
222
- /** The type of indentation that the heredoc uses. */
223
- pm_heredoc_indent_t indent;
230
+ /**
231
+ * All of the data necessary to lex a heredoc.
232
+ */
233
+ pm_heredoc_lex_mode_t base;
224
234
 
225
235
  /**
226
236
  * This is the pointer to the character where lexing should resume
@@ -233,7 +243,7 @@ typedef struct pm_lex_mode {
233
243
  * line so that we know how much to dedent each line in the case of
234
244
  * a tilde heredoc.
235
245
  */
236
- size_t common_whitespace;
246
+ size_t *common_whitespace;
237
247
 
238
248
  /** True if the previous token ended with a line continuation. */
239
249
  bool line_continuation;
@@ -382,6 +392,9 @@ typedef enum {
382
392
  /** a rescue statement within a module statement */
383
393
  PM_CONTEXT_MODULE_RESCUE,
384
394
 
395
+ /** a multiple target expression */
396
+ PM_CONTEXT_MULTI_TARGET,
397
+
385
398
  /** a parenthesized expression */
386
399
  PM_CONTEXT_PARENS,
387
400
 
@@ -14,7 +14,7 @@
14
14
  /**
15
15
  * The minor version of the Prism library as an int.
16
16
  */
17
- #define PRISM_VERSION_MINOR 1
17
+ #define PRISM_VERSION_MINOR 2
18
18
 
19
19
  /**
20
20
  * The patch version of the Prism library as an int.
@@ -24,6 +24,6 @@
24
24
  /**
25
25
  * The version of the Prism library as a constant string.
26
26
  */
27
- #define PRISM_VERSION "1.1.0"
27
+ #define PRISM_VERSION "1.2.0"
28
28
 
29
29
  #endif
data/lib/prism/node.rb CHANGED
@@ -14219,7 +14219,7 @@ module Prism
14219
14219
  # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
14220
14220
  # end
14221
14221
  #
14222
- # `Foo, *splat, Bar` are in the `exceptions` field. `ex` is in the `exception` field.
14222
+ # `Foo, *splat, Bar` are in the `exceptions` field. `ex` is in the `reference` field.
14223
14223
  class RescueNode < Node
14224
14224
  # Initialize a new RescueNode node.
14225
14225
  def initialize(source, node_id, location, flags, keyword_loc, exceptions, operator_loc, reference, statements, subsequent)
@@ -12,6 +12,21 @@ module Prism
12
12
  def self.for(source, start_line = 1, offsets = [])
13
13
  if source.ascii_only?
14
14
  ASCIISource.new(source, start_line, offsets)
15
+ elsif source.encoding == Encoding::BINARY
16
+ source.force_encoding(Encoding::UTF_8)
17
+
18
+ if source.valid_encoding?
19
+ new(source, start_line, offsets)
20
+ else
21
+ # This is an extremely niche use case where the file is marked as
22
+ # binary, contains multi-byte characters, and those characters are not
23
+ # valid UTF-8. In this case we'll mark it as binary and fall back to
24
+ # treating everything as a single-byte character. This _may_ cause
25
+ # problems when asking for code units, but it appears to be the
26
+ # cleanest solution at the moment.
27
+ source.force_encoding(Encoding::BINARY)
28
+ ASCIISource.new(source, start_line, offsets)
29
+ end
15
30
  else
16
31
  new(source, start_line, offsets)
17
32
  end
@@ -89,8 +104,14 @@ module Prism
89
104
  # This method is tested with UTF-8, UTF-16, and UTF-32. If there is the
90
105
  # concept of code units that differs from the number of characters in other
91
106
  # encodings, it is not captured here.
107
+ #
108
+ # We purposefully replace invalid and undefined characters with replacement
109
+ # characters in this conversion. This happens for two reasons. First, it's
110
+ # possible that the given byte offset will not occur on a character
111
+ # boundary. Second, it's possible that the source code will contain a
112
+ # character that has no equivalent in the given encoding.
92
113
  def code_units_offset(byte_offset, encoding)
93
- byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding)
114
+ byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding, invalid: :replace, undef: :replace)
94
115
 
95
116
  if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
96
117
  byteslice.bytesize / 2
@@ -99,6 +120,12 @@ module Prism
99
120
  end
100
121
  end
101
122
 
123
+ # Generate a cache that targets a specific encoding for calculating code
124
+ # unit offsets.
125
+ def code_units_cache(encoding)
126
+ CodeUnitsCache.new(source, encoding)
127
+ end
128
+
102
129
  # Returns the column number in code units for the given encoding for the
103
130
  # given byte offset.
104
131
  def code_units_column(byte_offset, encoding)
@@ -128,10 +155,84 @@ module Prism
128
155
  end
129
156
  end
130
157
 
158
+ # A cache that can be used to quickly compute code unit offsets from byte
159
+ # offsets. It purposefully provides only a single #[] method to access the
160
+ # cache in order to minimize surface area.
161
+ #
162
+ # Note that there are some known issues here that may or may not be addressed
163
+ # in the future:
164
+ #
165
+ # * The first is that there are issues when the cache computes values that are
166
+ # not on character boundaries. This can result in subsequent computations
167
+ # being off by one or more code units.
168
+ # * The second is that this cache is currently unbounded. In theory we could
169
+ # introduce some kind of LRU cache to limit the number of entries, but this
170
+ # has not yet been implemented.
171
+ #
172
+ class CodeUnitsCache
173
+ class UTF16Counter # :nodoc:
174
+ def initialize(source, encoding)
175
+ @source = source
176
+ @encoding = encoding
177
+ end
178
+
179
+ def count(byte_offset, byte_length)
180
+ @source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).bytesize / 2
181
+ end
182
+ end
183
+
184
+ class LengthCounter # :nodoc:
185
+ def initialize(source, encoding)
186
+ @source = source
187
+ @encoding = encoding
188
+ end
189
+
190
+ def count(byte_offset, byte_length)
191
+ @source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).length
192
+ end
193
+ end
194
+
195
+ private_constant :UTF16Counter, :LengthCounter
196
+
197
+ # Initialize a new cache with the given source and encoding.
198
+ def initialize(source, encoding)
199
+ @source = source
200
+ @counter =
201
+ if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
202
+ UTF16Counter.new(source, encoding)
203
+ else
204
+ LengthCounter.new(source, encoding)
205
+ end
206
+
207
+ @cache = {}
208
+ @offsets = []
209
+ end
210
+
211
+ # Retrieve the code units offset from the given byte offset.
212
+ def [](byte_offset)
213
+ @cache[byte_offset] ||=
214
+ if (index = @offsets.bsearch_index { |offset| offset > byte_offset }).nil?
215
+ @offsets << byte_offset
216
+ @counter.count(0, byte_offset)
217
+ elsif index == 0
218
+ @offsets.unshift(byte_offset)
219
+ @counter.count(0, byte_offset)
220
+ else
221
+ @offsets.insert(index, byte_offset)
222
+ offset = @offsets[index - 1]
223
+ @cache[offset] + @counter.count(offset, byte_offset - offset)
224
+ end
225
+ end
226
+ end
227
+
131
228
  # Specialized version of Prism::Source for source code that includes ASCII
132
229
  # characters only. This class is used to apply performance optimizations that
133
- # cannot be applied to sources that include multibyte characters. Sources that
134
- # include multibyte characters are represented by the Prism::Source class.
230
+ # cannot be applied to sources that include multibyte characters.
231
+ #
232
+ # In the extremely rare case that a source includes multi-byte characters but
233
+ # is marked as binary because of a magic encoding comment and it cannot be
234
+ # eagerly converted to UTF-8, this class will be used as well. This is because
235
+ # at that point we will treat everything as single-byte characters.
135
236
  class ASCIISource < Source
136
237
  # Return the character offset for the given byte offset.
137
238
  def character_offset(byte_offset)
@@ -153,6 +254,13 @@ module Prism
153
254
  byte_offset
154
255
  end
155
256
 
257
+ # Returns a cache that is the identity function in order to maintain the
258
+ # same interface. We can do this because code units are always equivalent to
259
+ # byte offsets for ASCII-only sources.
260
+ def code_units_cache(encoding)
261
+ ->(byte_offset) { byte_offset }
262
+ end
263
+
156
264
  # Specialized version of `code_units_column` that does not depend on
157
265
  # `code_units_offset`, which is a more expensive operation. This is
158
266
  # essentially the same as `Prism::Source#column`.
@@ -262,6 +370,12 @@ module Prism
262
370
  source.code_units_offset(start_offset, encoding)
263
371
  end
264
372
 
373
+ # The start offset from the start of the file in code units using the given
374
+ # cache to fetch or calculate the value.
375
+ def cached_start_code_units_offset(cache)
376
+ cache[start_offset]
377
+ end
378
+
265
379
  # The byte offset from the beginning of the source where this location ends.
266
380
  def end_offset
267
381
  start_offset + length
@@ -278,6 +392,12 @@ module Prism
278
392
  source.code_units_offset(end_offset, encoding)
279
393
  end
280
394
 
395
+ # The end offset from the start of the file in code units using the given
396
+ # cache to fetch or calculate the value.
397
+ def cached_end_code_units_offset(cache)
398
+ cache[end_offset]
399
+ end
400
+
281
401
  # The line number where this location starts.
282
402
  def start_line
283
403
  source.line(start_offset)
@@ -312,6 +432,12 @@ module Prism
312
432
  source.code_units_column(start_offset, encoding)
313
433
  end
314
434
 
435
+ # The start column in code units using the given cache to fetch or calculate
436
+ # the value.
437
+ def cached_start_code_units_column(cache)
438
+ cache[start_offset] - cache[source.line_start(start_offset)]
439
+ end
440
+
315
441
  # The column number in bytes where this location ends from the start of the
316
442
  # line.
317
443
  def end_column
@@ -330,6 +456,12 @@ module Prism
330
456
  source.code_units_column(end_offset, encoding)
331
457
  end
332
458
 
459
+ # The end column in code units using the given cache to fetch or calculate
460
+ # the value.
461
+ def cached_end_code_units_column(cache)
462
+ cache[end_offset] - cache[source.line_start(end_offset)]
463
+ end
464
+
333
465
  # Implement the hash pattern matching interface for Location.
334
466
  def deconstruct_keys(keys)
335
467
  { start_offset: start_offset, end_offset: end_offset }
@@ -579,6 +711,11 @@ module Prism
579
711
  def failure?
580
712
  !success?
581
713
  end
714
+
715
+ # Create a code units cache for the given encoding.
716
+ def code_units_cache(encoding)
717
+ source.code_units_cache(encoding)
718
+ end
582
719
  end
583
720
 
584
721
  # This is a result specific to the `parse` and `parse_file` methods.
@@ -18,7 +18,7 @@ module Prism
18
18
 
19
19
  # The minor version of prism that we are expecting to find in the serialized
20
20
  # strings.
21
- MINOR_VERSION = 1
21
+ MINOR_VERSION = 2
22
22
 
23
23
  # The patch version of prism that we are expecting to find in the serialized
24
24
  # strings.
@@ -28,10 +28,21 @@ module Prism
28
28
  def self.load(input, serialized)
29
29
  input = input.dup
30
30
  source = Source.for(input)
31
+
31
32
  loader = Loader.new(source, serialized)
32
33
  result = loader.load_result
33
34
 
34
35
  input.force_encoding(loader.encoding)
36
+
37
+ # This is an extremely niche use-case where the file was marked as binary
38
+ # but it contained UTF-8-encoded characters. In that case we will actually
39
+ # put it back to UTF-8 to give the location APIs the best chance of being
40
+ # correct.
41
+ if !input.ascii_only? && input.encoding == Encoding::BINARY
42
+ input.force_encoding(Encoding::UTF_8)
43
+ input.force_encoding(Encoding::BINARY) unless input.valid_encoding?
44
+ end
45
+
35
46
  result
36
47
  end
37
48
 
@@ -267,6 +278,7 @@ module Prism
267
278
  :instance_variable_bare,
268
279
  :invalid_block_exit,
269
280
  :invalid_character,
281
+ :invalid_comma,
270
282
  :invalid_encoding_magic_comment,
271
283
  :invalid_escape_character,
272
284
  :invalid_float_exponent,
@@ -51,7 +51,7 @@ module Prism
51
51
  source = source_buffer.source
52
52
 
53
53
  offset_cache = build_offset_cache(source)
54
- result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]], encoding: false), offset_cache)
54
+ result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), partial_script: true, encoding: false), offset_cache)
55
55
 
56
56
  build_ast(result.value, offset_cache)
57
57
  ensure
@@ -64,7 +64,7 @@ module Prism
64
64
  source = source_buffer.source
65
65
 
66
66
  offset_cache = build_offset_cache(source)
67
- result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]], encoding: false), offset_cache)
67
+ result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), partial_script: true, encoding: false), offset_cache)
68
68
 
69
69
  [
70
70
  build_ast(result.value, offset_cache),
@@ -83,7 +83,7 @@ module Prism
83
83
  offset_cache = build_offset_cache(source)
84
84
  result =
85
85
  begin
86
- unwrap(Prism.parse_lex(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]], encoding: false), offset_cache)
86
+ unwrap(Prism.parse_lex(source, filepath: source_buffer.name, version: convert_for_prism(version), partial_script: true, encoding: false), offset_cache)
87
87
  rescue ::Parser::SyntaxError
88
88
  raise if !recover
89
89
  end
@@ -3269,11 +3269,7 @@ module Prism
3269
3269
 
3270
3270
  # Lazily initialize the parse result.
3271
3271
  def result
3272
- @result ||=
3273
- begin
3274
- scopes = RUBY_VERSION >= "3.3.0" ? [] : [[]]
3275
- Prism.parse(source, scopes: scopes)
3276
- end
3272
+ @result ||= Prism.parse(source, partial_script: true)
3277
3273
  end
3278
3274
 
3279
3275
  ##########################################################################
@@ -1596,13 +1596,13 @@ module Prism
1596
1596
  # Parse the given source and translate it into the seattlerb/ruby_parser
1597
1597
  # gem's Sexp format.
1598
1598
  def parse(source, filepath = "(string)")
1599
- translate(Prism.parse(source, filepath: filepath, scopes: [[]]), filepath)
1599
+ translate(Prism.parse(source, filepath: filepath, partial_script: true), filepath)
1600
1600
  end
1601
1601
 
1602
1602
  # Parse the given file and translate it into the seattlerb/ruby_parser
1603
1603
  # gem's Sexp format.
1604
1604
  def parse_file(filepath)
1605
- translate(Prism.parse_file(filepath, scopes: [[]]), filepath)
1605
+ translate(Prism.parse_file(filepath, partial_script: true), filepath)
1606
1606
  end
1607
1607
 
1608
1608
  class << self
data/prism.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |spec|
4
4
  spec.name = "prism"
5
- spec.version = "1.1.0"
5
+ spec.version = "1.2.0"
6
6
  spec.authors = ["Shopify"]
7
7
  spec.email = ["ruby@shopify.com"]
8
8