prism 1.1.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ea89f88aef2ec51d2cfb5868cf873ab256393f5ba632381a3a53631c2506dbc3
4
- data.tar.gz: 4da85d79e85d5cca843eb6e71cea07efc80a6e77608aa3027e1cd75c3c3b735a
3
+ metadata.gz: f16f842a06eec8141246c60a39f509a59817de34ab4be3a33502ada040ac1602
4
+ data.tar.gz: 38bd30b4ba63fe67892a0138be5b7b2c84fb6bd66e011871a07ee453be7b0aec
5
5
  SHA512:
6
- metadata.gz: 1b7f92a58fa176b04aab230f49dcdc08f9b810575426402db6fc9eee0921ac5ddddf1f01f18bb9429a914955759e8d4cee5a70674e14eb0b0169018ce615780e
7
- data.tar.gz: e4a5a6ba40bc7692c6c904f452dd8ff18881703c99d51969507619368748eec29936dc3f521e8967b6d728ea331f26f9f670b44daedde2106f649104aefe0c30
6
+ metadata.gz: 9877dc80270515e91c5357418a67721fa832c6de44d755047f576d69a3b09129d64da19e5ac767d184df74afe40982f56c88be3851ce7fc7c99c1d0bbf15ec77
7
+ data.tar.gz: 4578c2f1e2e934f763d6c55ae84dc076b0676ac0560c13364db1ade75425da76d64b7a4cf8fb07ea723568fbfc58c20fb8055e9dec364ac5da05765d26398d39
data/CHANGELOG.md CHANGED
@@ -6,6 +6,21 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [1.2.0] - 2024-10-10
10
+
11
+ ### Added
12
+
13
+ - Introduce `Prism::CodeUnitsCache`.
14
+
15
+ ### Changed
16
+
17
+ - Properly handle lexing global variables that begin with `$-`.
18
+ - Properly reject invalid multi writes within parentheses.
19
+ - Fix unary `*` binding power.
20
+ - Set `contains_keywords` flag for implicit `gets` calls when `-p` is used.
21
+ - Properly reject invalid non-associative operator patterns.
22
+ - Do not warn about unused variables declared on negative lines.
23
+
9
24
  ## [1.1.0] - 2024-10-02
10
25
 
11
26
  ### Added
@@ -591,7 +606,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
591
606
 
592
607
  - 🎉 Initial release! 🎉
593
608
 
594
- [unreleased]: https://github.com/ruby/prism/compare/v1.1.0...HEAD
609
+ [unreleased]: https://github.com/ruby/prism/compare/v1.2.0...HEAD
610
+ [1.2.0]: https://github.com/ruby/prism/compare/v1.1.0...v1.2.0
595
611
  [1.1.0]: https://github.com/ruby/prism/compare/v1.0.0...v1.1.0
596
612
  [1.0.0]: https://github.com/ruby/prism/compare/v0.30.0...v1.0.0
597
613
  [0.30.0]: https://github.com/ruby/prism/compare/v0.29.0...v0.30.0
data/config.yml CHANGED
@@ -141,6 +141,7 @@ errors:
141
141
  - INSTANCE_VARIABLE_BARE
142
142
  - INVALID_BLOCK_EXIT
143
143
  - INVALID_CHARACTER
144
+ - INVALID_COMMA
144
145
  - INVALID_ENCODING_MAGIC_COMMENT
145
146
  - INVALID_ESCAPE_CHARACTER
146
147
  - INVALID_FLOAT_EXPONENT
@@ -3684,7 +3685,7 @@ nodes:
3684
3685
  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3685
3686
  end
3686
3687
 
3687
- `Foo, *splat, Bar` are in the `exceptions` field. `ex` is in the `exception` field.
3688
+ `Foo, *splat, Bar` are in the `exceptions` field. `ex` is in the `reference` field.
3688
3689
  - name: RestParameterNode
3689
3690
  flags: ParameterFlags
3690
3691
  fields:
@@ -1,7 +1,7 @@
1
1
  #ifndef PRISM_EXT_NODE_H
2
2
  #define PRISM_EXT_NODE_H
3
3
 
4
- #define EXPECTED_PRISM_VERSION "1.1.0"
4
+ #define EXPECTED_PRISM_VERSION "1.2.0"
5
5
 
6
6
  #include <ruby.h>
7
7
  #include <ruby/encoding.h>
data/include/prism/ast.h CHANGED
@@ -6490,7 +6490,7 @@ typedef struct pm_rescue_modifier_node {
6490
6490
  * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
6491
6491
  * end
6492
6492
  *
6493
- * `Foo, *splat, Bar` are in the `exceptions` field. `ex` is in the `exception` field.
6493
+ * `Foo, *splat, Bar` are in the `exceptions` field. `ex` is in the `reference` field.
6494
6494
  *
6495
6495
  * Type: ::PM_RESCUE_NODE
6496
6496
  *
@@ -170,6 +170,7 @@ typedef enum {
170
170
  PM_ERR_INSTANCE_VARIABLE_BARE,
171
171
  PM_ERR_INVALID_BLOCK_EXIT,
172
172
  PM_ERR_INVALID_CHARACTER,
173
+ PM_ERR_INVALID_COMMA,
173
174
  PM_ERR_INVALID_ENCODING_MAGIC_COMMENT,
174
175
  PM_ERR_INVALID_ESCAPE_CHARACTER,
175
176
  PM_ERR_INVALID_FLOAT_EXPONENT,
@@ -82,6 +82,23 @@ typedef enum {
82
82
  PM_HEREDOC_INDENT_TILDE,
83
83
  } pm_heredoc_indent_t;
84
84
 
85
+ /**
86
+ * All of the information necessary to store to lexing a heredoc.
87
+ */
88
+ typedef struct {
89
+ /** A pointer to the start of the heredoc identifier. */
90
+ const uint8_t *ident_start;
91
+
92
+ /** The length of the heredoc identifier. */
93
+ size_t ident_length;
94
+
95
+ /** The type of quote that the heredoc uses. */
96
+ pm_heredoc_quote_t quote;
97
+
98
+ /** The type of indentation that the heredoc uses. */
99
+ pm_heredoc_indent_t indent;
100
+ } pm_heredoc_lex_mode_t;
101
+
85
102
  /**
86
103
  * When lexing Ruby source, the lexer has a small amount of state to tell which
87
104
  * kind of token it is currently lexing. For example, when we find the start of
@@ -210,17 +227,10 @@ typedef struct pm_lex_mode {
210
227
  } string;
211
228
 
212
229
  struct {
213
- /** A pointer to the start of the heredoc identifier. */
214
- const uint8_t *ident_start;
215
-
216
- /** The length of the heredoc identifier. */
217
- size_t ident_length;
218
-
219
- /** The type of quote that the heredoc uses. */
220
- pm_heredoc_quote_t quote;
221
-
222
- /** The type of indentation that the heredoc uses. */
223
- pm_heredoc_indent_t indent;
230
+ /**
231
+ * All of the data necessary to lex a heredoc.
232
+ */
233
+ pm_heredoc_lex_mode_t base;
224
234
 
225
235
  /**
226
236
  * This is the pointer to the character where lexing should resume
@@ -233,7 +243,7 @@ typedef struct pm_lex_mode {
233
243
  * line so that we know how much to dedent each line in the case of
234
244
  * a tilde heredoc.
235
245
  */
236
- size_t common_whitespace;
246
+ size_t *common_whitespace;
237
247
 
238
248
  /** True if the previous token ended with a line continuation. */
239
249
  bool line_continuation;
@@ -382,6 +392,9 @@ typedef enum {
382
392
  /** a rescue statement within a module statement */
383
393
  PM_CONTEXT_MODULE_RESCUE,
384
394
 
395
+ /** a multiple target expression */
396
+ PM_CONTEXT_MULTI_TARGET,
397
+
385
398
  /** a parenthesized expression */
386
399
  PM_CONTEXT_PARENS,
387
400
 
@@ -14,7 +14,7 @@
14
14
  /**
15
15
  * The minor version of the Prism library as an int.
16
16
  */
17
- #define PRISM_VERSION_MINOR 1
17
+ #define PRISM_VERSION_MINOR 2
18
18
 
19
19
  /**
20
20
  * The patch version of the Prism library as an int.
@@ -24,6 +24,6 @@
24
24
  /**
25
25
  * The version of the Prism library as a constant string.
26
26
  */
27
- #define PRISM_VERSION "1.1.0"
27
+ #define PRISM_VERSION "1.2.0"
28
28
 
29
29
  #endif
data/lib/prism/node.rb CHANGED
@@ -14219,7 +14219,7 @@ module Prism
14219
14219
  # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
14220
14220
  # end
14221
14221
  #
14222
- # `Foo, *splat, Bar` are in the `exceptions` field. `ex` is in the `exception` field.
14222
+ # `Foo, *splat, Bar` are in the `exceptions` field. `ex` is in the `reference` field.
14223
14223
  class RescueNode < Node
14224
14224
  # Initialize a new RescueNode node.
14225
14225
  def initialize(source, node_id, location, flags, keyword_loc, exceptions, operator_loc, reference, statements, subsequent)
@@ -12,6 +12,21 @@ module Prism
12
12
  def self.for(source, start_line = 1, offsets = [])
13
13
  if source.ascii_only?
14
14
  ASCIISource.new(source, start_line, offsets)
15
+ elsif source.encoding == Encoding::BINARY
16
+ source.force_encoding(Encoding::UTF_8)
17
+
18
+ if source.valid_encoding?
19
+ new(source, start_line, offsets)
20
+ else
21
+ # This is an extremely niche use case where the file is marked as
22
+ # binary, contains multi-byte characters, and those characters are not
23
+ # valid UTF-8. In this case we'll mark it as binary and fall back to
24
+ # treating everything as a single-byte character. This _may_ cause
25
+ # problems when asking for code units, but it appears to be the
26
+ # cleanest solution at the moment.
27
+ source.force_encoding(Encoding::BINARY)
28
+ ASCIISource.new(source, start_line, offsets)
29
+ end
15
30
  else
16
31
  new(source, start_line, offsets)
17
32
  end
@@ -89,8 +104,14 @@ module Prism
89
104
  # This method is tested with UTF-8, UTF-16, and UTF-32. If there is the
90
105
  # concept of code units that differs from the number of characters in other
91
106
  # encodings, it is not captured here.
107
+ #
108
+ # We purposefully replace invalid and undefined characters with replacement
109
+ # characters in this conversion. This happens for two reasons. First, it's
110
+ # possible that the given byte offset will not occur on a character
111
+ # boundary. Second, it's possible that the source code will contain a
112
+ # character that has no equivalent in the given encoding.
92
113
  def code_units_offset(byte_offset, encoding)
93
- byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding)
114
+ byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding, invalid: :replace, undef: :replace)
94
115
 
95
116
  if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
96
117
  byteslice.bytesize / 2
@@ -99,6 +120,12 @@ module Prism
99
120
  end
100
121
  end
101
122
 
123
+ # Generate a cache that targets a specific encoding for calculating code
124
+ # unit offsets.
125
+ def code_units_cache(encoding)
126
+ CodeUnitsCache.new(source, encoding)
127
+ end
128
+
102
129
  # Returns the column number in code units for the given encoding for the
103
130
  # given byte offset.
104
131
  def code_units_column(byte_offset, encoding)
@@ -128,10 +155,84 @@ module Prism
128
155
  end
129
156
  end
130
157
 
158
+ # A cache that can be used to quickly compute code unit offsets from byte
159
+ # offsets. It purposefully provides only a single #[] method to access the
160
+ # cache in order to minimize surface area.
161
+ #
162
+ # Note that there are some known issues here that may or may not be addressed
163
+ # in the future:
164
+ #
165
+ # * The first is that there are issues when the cache computes values that are
166
+ # not on character boundaries. This can result in subsequent computations
167
+ # being off by one or more code units.
168
+ # * The second is that this cache is currently unbounded. In theory we could
169
+ # introduce some kind of LRU cache to limit the number of entries, but this
170
+ # has not yet been implemented.
171
+ #
172
+ class CodeUnitsCache
173
+ class UTF16Counter # :nodoc:
174
+ def initialize(source, encoding)
175
+ @source = source
176
+ @encoding = encoding
177
+ end
178
+
179
+ def count(byte_offset, byte_length)
180
+ @source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).bytesize / 2
181
+ end
182
+ end
183
+
184
+ class LengthCounter # :nodoc:
185
+ def initialize(source, encoding)
186
+ @source = source
187
+ @encoding = encoding
188
+ end
189
+
190
+ def count(byte_offset, byte_length)
191
+ @source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).length
192
+ end
193
+ end
194
+
195
+ private_constant :UTF16Counter, :LengthCounter
196
+
197
+ # Initialize a new cache with the given source and encoding.
198
+ def initialize(source, encoding)
199
+ @source = source
200
+ @counter =
201
+ if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
202
+ UTF16Counter.new(source, encoding)
203
+ else
204
+ LengthCounter.new(source, encoding)
205
+ end
206
+
207
+ @cache = {}
208
+ @offsets = []
209
+ end
210
+
211
+ # Retrieve the code units offset from the given byte offset.
212
+ def [](byte_offset)
213
+ @cache[byte_offset] ||=
214
+ if (index = @offsets.bsearch_index { |offset| offset > byte_offset }).nil?
215
+ @offsets << byte_offset
216
+ @counter.count(0, byte_offset)
217
+ elsif index == 0
218
+ @offsets.unshift(byte_offset)
219
+ @counter.count(0, byte_offset)
220
+ else
221
+ @offsets.insert(index, byte_offset)
222
+ offset = @offsets[index - 1]
223
+ @cache[offset] + @counter.count(offset, byte_offset - offset)
224
+ end
225
+ end
226
+ end
227
+
131
228
  # Specialized version of Prism::Source for source code that includes ASCII
132
229
  # characters only. This class is used to apply performance optimizations that
133
- # cannot be applied to sources that include multibyte characters. Sources that
134
- # include multibyte characters are represented by the Prism::Source class.
230
+ # cannot be applied to sources that include multibyte characters.
231
+ #
232
+ # In the extremely rare case that a source includes multi-byte characters but
233
+ # is marked as binary because of a magic encoding comment and it cannot be
234
+ # eagerly converted to UTF-8, this class will be used as well. This is because
235
+ # at that point we will treat everything as single-byte characters.
135
236
  class ASCIISource < Source
136
237
  # Return the character offset for the given byte offset.
137
238
  def character_offset(byte_offset)
@@ -153,6 +254,13 @@ module Prism
153
254
  byte_offset
154
255
  end
155
256
 
257
+ # Returns a cache that is the identity function in order to maintain the
258
+ # same interface. We can do this because code units are always equivalent to
259
+ # byte offsets for ASCII-only sources.
260
+ def code_units_cache(encoding)
261
+ ->(byte_offset) { byte_offset }
262
+ end
263
+
156
264
  # Specialized version of `code_units_column` that does not depend on
157
265
  # `code_units_offset`, which is a more expensive operation. This is
158
266
  # essentially the same as `Prism::Source#column`.
@@ -262,6 +370,12 @@ module Prism
262
370
  source.code_units_offset(start_offset, encoding)
263
371
  end
264
372
 
373
+ # The start offset from the start of the file in code units using the given
374
+ # cache to fetch or calculate the value.
375
+ def cached_start_code_units_offset(cache)
376
+ cache[start_offset]
377
+ end
378
+
265
379
  # The byte offset from the beginning of the source where this location ends.
266
380
  def end_offset
267
381
  start_offset + length
@@ -278,6 +392,12 @@ module Prism
278
392
  source.code_units_offset(end_offset, encoding)
279
393
  end
280
394
 
395
+ # The end offset from the start of the file in code units using the given
396
+ # cache to fetch or calculate the value.
397
+ def cached_end_code_units_offset(cache)
398
+ cache[end_offset]
399
+ end
400
+
281
401
  # The line number where this location starts.
282
402
  def start_line
283
403
  source.line(start_offset)
@@ -312,6 +432,12 @@ module Prism
312
432
  source.code_units_column(start_offset, encoding)
313
433
  end
314
434
 
435
+ # The start column in code units using the given cache to fetch or calculate
436
+ # the value.
437
+ def cached_start_code_units_column(cache)
438
+ cache[start_offset] - cache[source.line_start(start_offset)]
439
+ end
440
+
315
441
  # The column number in bytes where this location ends from the start of the
316
442
  # line.
317
443
  def end_column
@@ -330,6 +456,12 @@ module Prism
330
456
  source.code_units_column(end_offset, encoding)
331
457
  end
332
458
 
459
+ # The end column in code units using the given cache to fetch or calculate
460
+ # the value.
461
+ def cached_end_code_units_column(cache)
462
+ cache[end_offset] - cache[source.line_start(end_offset)]
463
+ end
464
+
333
465
  # Implement the hash pattern matching interface for Location.
334
466
  def deconstruct_keys(keys)
335
467
  { start_offset: start_offset, end_offset: end_offset }
@@ -579,6 +711,11 @@ module Prism
579
711
  def failure?
580
712
  !success?
581
713
  end
714
+
715
+ # Create a code units cache for the given encoding.
716
+ def code_units_cache(encoding)
717
+ source.code_units_cache(encoding)
718
+ end
582
719
  end
583
720
 
584
721
  # This is a result specific to the `parse` and `parse_file` methods.
@@ -18,7 +18,7 @@ module Prism
18
18
 
19
19
  # The minor version of prism that we are expecting to find in the serialized
20
20
  # strings.
21
- MINOR_VERSION = 1
21
+ MINOR_VERSION = 2
22
22
 
23
23
  # The patch version of prism that we are expecting to find in the serialized
24
24
  # strings.
@@ -28,10 +28,21 @@ module Prism
28
28
  def self.load(input, serialized)
29
29
  input = input.dup
30
30
  source = Source.for(input)
31
+
31
32
  loader = Loader.new(source, serialized)
32
33
  result = loader.load_result
33
34
 
34
35
  input.force_encoding(loader.encoding)
36
+
37
+ # This is an extremely niche use-case where the file was marked as binary
38
+ # but it contained UTF-8-encoded characters. In that case we will actually
39
+ # put it back to UTF-8 to give the location APIs the best chance of being
40
+ # correct.
41
+ if !input.ascii_only? && input.encoding == Encoding::BINARY
42
+ input.force_encoding(Encoding::UTF_8)
43
+ input.force_encoding(Encoding::BINARY) unless input.valid_encoding?
44
+ end
45
+
35
46
  result
36
47
  end
37
48
 
@@ -267,6 +278,7 @@ module Prism
267
278
  :instance_variable_bare,
268
279
  :invalid_block_exit,
269
280
  :invalid_character,
281
+ :invalid_comma,
270
282
  :invalid_encoding_magic_comment,
271
283
  :invalid_escape_character,
272
284
  :invalid_float_exponent,
@@ -51,7 +51,7 @@ module Prism
51
51
  source = source_buffer.source
52
52
 
53
53
  offset_cache = build_offset_cache(source)
54
- result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]], encoding: false), offset_cache)
54
+ result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), partial_script: true, encoding: false), offset_cache)
55
55
 
56
56
  build_ast(result.value, offset_cache)
57
57
  ensure
@@ -64,7 +64,7 @@ module Prism
64
64
  source = source_buffer.source
65
65
 
66
66
  offset_cache = build_offset_cache(source)
67
- result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]], encoding: false), offset_cache)
67
+ result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), partial_script: true, encoding: false), offset_cache)
68
68
 
69
69
  [
70
70
  build_ast(result.value, offset_cache),
@@ -83,7 +83,7 @@ module Prism
83
83
  offset_cache = build_offset_cache(source)
84
84
  result =
85
85
  begin
86
- unwrap(Prism.parse_lex(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]], encoding: false), offset_cache)
86
+ unwrap(Prism.parse_lex(source, filepath: source_buffer.name, version: convert_for_prism(version), partial_script: true, encoding: false), offset_cache)
87
87
  rescue ::Parser::SyntaxError
88
88
  raise if !recover
89
89
  end
@@ -3269,11 +3269,7 @@ module Prism
3269
3269
 
3270
3270
  # Lazily initialize the parse result.
3271
3271
  def result
3272
- @result ||=
3273
- begin
3274
- scopes = RUBY_VERSION >= "3.3.0" ? [] : [[]]
3275
- Prism.parse(source, scopes: scopes)
3276
- end
3272
+ @result ||= Prism.parse(source, partial_script: true)
3277
3273
  end
3278
3274
 
3279
3275
  ##########################################################################
@@ -1596,13 +1596,13 @@ module Prism
1596
1596
  # Parse the given source and translate it into the seattlerb/ruby_parser
1597
1597
  # gem's Sexp format.
1598
1598
  def parse(source, filepath = "(string)")
1599
- translate(Prism.parse(source, filepath: filepath, scopes: [[]]), filepath)
1599
+ translate(Prism.parse(source, filepath: filepath, partial_script: true), filepath)
1600
1600
  end
1601
1601
 
1602
1602
  # Parse the given file and translate it into the seattlerb/ruby_parser
1603
1603
  # gem's Sexp format.
1604
1604
  def parse_file(filepath)
1605
- translate(Prism.parse_file(filepath, scopes: [[]]), filepath)
1605
+ translate(Prism.parse_file(filepath, partial_script: true), filepath)
1606
1606
  end
1607
1607
 
1608
1608
  class << self
data/prism.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |spec|
4
4
  spec.name = "prism"
5
- spec.version = "1.1.0"
5
+ spec.version = "1.2.0"
6
6
  spec.authors = ["Shopify"]
7
7
  spec.email = ["ruby@shopify.com"]
8
8