prism 0.21.0 → 0.23.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -94,7 +94,7 @@ module Prism
94
94
 
95
95
  # Returns the full name of this constant. For example: "Foo"
96
96
  def full_name
97
- name.name
97
+ name.to_s
98
98
  end
99
99
  end
100
100
 
@@ -135,7 +135,17 @@ module Prism
135
135
  # Returns the list of parts for the full name of this constant path.
136
136
  # For example: [:Foo, :Bar]
137
137
  def full_name_parts
138
- (parent&.full_name_parts || [:""]).push(child.name)
138
+ parts = case parent
139
+ when ConstantPathNode, ConstantReadNode
140
+ parent.full_name_parts
141
+ when nil
142
+ [:""]
143
+ else
144
+ raise ConstantPathNode::DynamicPartsInConstantPathError,
145
+ "Constant path target contains dynamic parts. Cannot compute full name"
146
+ end
147
+
148
+ parts.push(child.name)
139
149
  end
140
150
 
141
151
  # Returns the full name of this constant path. For example: "Foo::Bar"
@@ -144,6 +154,19 @@ module Prism
144
154
  end
145
155
  end
146
156
 
157
+ class ConstantTargetNode < Node
158
+ # Returns the list of parts for the full name of this constant.
159
+ # For example: [:Foo]
160
+ def full_name_parts
161
+ [name]
162
+ end
163
+
164
+ # Returns the full name of this constant. For example: "Foo"
165
+ def full_name
166
+ name.to_s
167
+ end
168
+ end
169
+
147
170
  class ParametersNode < Node
148
171
  # Mirrors the Method#parameters method.
149
172
  def signature
@@ -9,18 +9,16 @@ module Prism
9
9
  attr_reader :source
10
10
 
11
11
  # The line number where this source starts.
12
- attr_accessor :start_line
12
+ attr_reader :start_line
13
13
 
14
14
  # The list of newline byte offsets in the source code.
15
15
  attr_reader :offsets
16
16
 
17
- # Create a new source object with the given source code and newline byte
18
- # offsets. If no newline byte offsets are given, they will be computed from
19
- # the source code.
20
- def initialize(source, start_line = 1, offsets = compute_offsets(source))
17
+ # Create a new source object with the given source code.
18
+ def initialize(source, start_line = 1, offsets = [])
21
19
  @source = source
22
- @start_line = start_line
23
- @offsets = offsets
20
+ @start_line = start_line # set after parsing is done
21
+ @offsets = offsets # set after parsing is done
24
22
  end
25
23
 
26
24
  # Perform a byteslice on the source code using the given byte offset and
@@ -56,6 +54,23 @@ module Prism
56
54
  character_offset(byte_offset) - character_offset(line_start(byte_offset))
57
55
  end
58
56
 
57
+ # Returns the offset from the start of the file for the given byte offset
58
+ # counting in code units for the given encoding.
59
+ #
60
+ # This method is tested with UTF-8, UTF-16, and UTF-32. If there is the
61
+ # concept of code units that differs from the number of characters in other
62
+ # encodings, it is not captured here.
63
+ def code_units_offset(byte_offset, encoding)
64
+ byteslice = source.byteslice(0, byte_offset).encode(encoding)
65
+ (encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE) ? (byteslice.bytesize / 2) : byteslice.length
66
+ end
67
+
68
+ # Returns the column number in code units for the given encoding for the
69
+ # given byte offset.
70
+ def code_units_column(byte_offset, encoding)
71
+ code_units_offset(byte_offset, encoding) - code_units_offset(line_start(byte_offset), encoding)
72
+ end
73
+
59
74
  private
60
75
 
61
76
  # Binary search through the offsets to find the line number for the given
@@ -77,21 +92,14 @@ module Prism
77
92
 
78
93
  left - 1
79
94
  end
80
-
81
- # Find all of the newlines in the source code and return their byte offsets
82
- # from the start of the string an array.
83
- def compute_offsets(code)
84
- offsets = [0]
85
- code.b.scan("\n") { offsets << $~.end(0) }
86
- offsets
87
- end
88
95
  end
89
96
 
90
97
  # This represents a location in the source.
91
98
  class Location
92
99
  # A Source object that is used to determine more information from the given
93
100
  # offset and length.
94
- protected attr_reader :source
101
+ attr_reader :source
102
+ protected :source
95
103
 
96
104
  # The byte offset from the beginning of the source where this location
97
105
  # starts.
@@ -137,6 +145,11 @@ module Prism
137
145
  source.character_offset(start_offset)
138
146
  end
139
147
 
148
+ # The offset from the start of the file in code units of the given encoding.
149
+ def start_code_units_offset(encoding = Encoding::UTF_16LE)
150
+ source.code_units_offset(start_offset, encoding)
151
+ end
152
+
140
153
  # The byte offset from the beginning of the source where this location ends.
141
154
  def end_offset
142
155
  start_offset + length
@@ -148,6 +161,11 @@ module Prism
148
161
  source.character_offset(end_offset)
149
162
  end
150
163
 
164
+ # The offset from the start of the file in code units of the given encoding.
165
+ def end_code_units_offset(encoding = Encoding::UTF_16LE)
166
+ source.code_units_offset(end_offset, encoding)
167
+ end
168
+
151
169
  # The line number where this location starts.
152
170
  def start_line
153
171
  source.line(start_offset)
@@ -176,6 +194,12 @@ module Prism
176
194
  source.character_column(start_offset)
177
195
  end
178
196
 
197
+ # The column number in code units of the given encoding where this location
198
+ # starts from the start of the line.
199
+ def start_code_units_column(encoding = Encoding::UTF_16LE)
200
+ source.code_units_column(start_offset, encoding)
201
+ end
202
+
179
203
  # The column number in bytes where this location ends from the start of the
180
204
  # line.
181
205
  def end_column
@@ -188,6 +212,12 @@ module Prism
188
212
  source.character_column(end_offset)
189
213
  end
190
214
 
215
+ # The column number in code units of the given encoding where this location
216
+ # ends from the start of the line.
217
+ def end_code_units_column(encoding = Encoding::UTF_16LE)
218
+ source.code_units_column(end_offset, encoding)
219
+ end
220
+
191
221
  # Implement the hash pattern matching interface for Location.
192
222
  def deconstruct_keys(keys)
193
223
  { start_offset: start_offset, end_offset: end_offset }
@@ -27,7 +27,7 @@ module Prism
27
27
 
28
28
  # The minor version of prism that we are expecting to find in the serialized
29
29
  # strings.
30
- MINOR_VERSION = 21
30
+ MINOR_VERSION = 23
31
31
 
32
32
  # The patch version of prism that we are expecting to find in the serialized
33
33
  # strings.
@@ -86,11 +86,15 @@ module Prism
86
86
  end
87
87
 
88
88
  def load_start_line
89
- source.start_line = load_varsint
89
+ source.instance_variable_set :@start_line, load_varsint
90
+ end
91
+
92
+ def load_line_offsets
93
+ source.instance_variable_set :@offsets, Array.new(load_varuint) { load_varuint }
90
94
  end
91
95
 
92
96
  def load_comments
93
- load_varuint.times.map do
97
+ Array.new(load_varuint) do
94
98
  case load_varuint
95
99
  when 0 then InlineComment.new(load_location)
96
100
  when 1 then EmbDocComment.new(load_location)
@@ -101,10 +105,10 @@ module Prism
101
105
 
102
106
  def load_metadata
103
107
  comments = load_comments
104
- magic_comments = load_varuint.times.map { MagicComment.new(load_location, load_location) }
108
+ magic_comments = Array.new(load_varuint) { MagicComment.new(load_location, load_location) }
105
109
  data_loc = load_optional_location
106
- errors = load_varuint.times.map { ParseError.new(load_embedded_string, load_location, load_error_level) }
107
- warnings = load_varuint.times.map { ParseWarning.new(load_embedded_string, load_location, load_warning_level) }
110
+ errors = Array.new(load_varuint) { ParseError.new(load_embedded_string, load_location, load_error_level) }
111
+ warnings = Array.new(load_varuint) { ParseWarning.new(load_embedded_string, load_location, load_warning_level) }
108
112
  [comments, magic_comments, data_loc, errors, warnings]
109
113
  end
110
114
 
@@ -125,6 +129,7 @@ module Prism
125
129
  tokens = load_tokens
126
130
  encoding = load_encoding
127
131
  load_start_line
132
+ load_line_offsets
128
133
  comments, magic_comments, data_loc, errors, warnings = load_metadata
129
134
  tokens.each { |token,| token.value.force_encoding(encoding) }
130
135
 
@@ -136,6 +141,7 @@ module Prism
136
141
  load_header
137
142
  load_encoding
138
143
  load_start_line
144
+ load_line_offsets
139
145
 
140
146
  comments, magic_comments, data_loc, errors, warnings = load_metadata
141
147
 
@@ -244,6 +250,8 @@ module Prism
244
250
  case level
245
251
  when 0
246
252
  :fatal
253
+ when 1
254
+ :argument
247
255
  else
248
256
  raise "Unknown level: #{level}"
249
257
  end
@@ -1062,12 +1062,22 @@ module Prism
1062
1062
 
1063
1063
  # foo in bar
1064
1064
  # ^^^^^^^^^^
1065
- def visit_match_predicate_node(node)
1066
- builder.match_pattern_p(
1067
- visit(node.value),
1068
- token(node.operator_loc),
1069
- within_pattern { |compiler| node.pattern.accept(compiler) }
1070
- )
1065
+ if RUBY_VERSION >= "3.0"
1066
+ def visit_match_predicate_node(node)
1067
+ builder.match_pattern_p(
1068
+ visit(node.value),
1069
+ token(node.operator_loc),
1070
+ within_pattern { |compiler| node.pattern.accept(compiler) }
1071
+ )
1072
+ end
1073
+ else
1074
+ def visit_match_predicate_node(node)
1075
+ builder.match_pattern(
1076
+ visit(node.value),
1077
+ token(node.operator_loc),
1078
+ within_pattern { |compiler| node.pattern.accept(compiler) }
1079
+ )
1080
+ end
1071
1081
  end
1072
1082
 
1073
1083
  # foo => bar
@@ -68,17 +68,23 @@ module Prism
68
68
 
69
69
  # Parses a source buffer and returns the AST, the source code comments,
70
70
  # and the tokens emitted by the lexer.
71
- def tokenize(source_buffer, _recover = false)
71
+ def tokenize(source_buffer, recover = false)
72
72
  @source_buffer = source_buffer
73
73
  source = source_buffer.source
74
74
 
75
75
  offset_cache = build_offset_cache(source)
76
- result = unwrap(Prism.parse_lex(source, filepath: source_buffer.name), offset_cache)
76
+ result =
77
+ begin
78
+ unwrap(Prism.parse_lex(source, filepath: source_buffer.name), offset_cache)
79
+ rescue ::Parser::SyntaxError
80
+ raise if !recover
81
+ end
77
82
 
78
83
  program, tokens = result.value
84
+ ast = build_ast(program, offset_cache) if result.success?
79
85
 
80
86
  [
81
- build_ast(program, offset_cache),
87
+ ast,
82
88
  build_comments(result.comments, offset_cache),
83
89
  build_tokens(tokens, offset_cache)
84
90
  ]
@@ -118,20 +124,21 @@ module Prism
118
124
  # build the parser gem AST.
119
125
  #
120
126
  # If the bytesize of the source is the same as the length, then we can
121
- # just use the offset directly. Otherwise, we build a hash that functions
122
- # as a cache for the conversion.
123
- #
124
- # This is a good opportunity for some optimizations. If the source file
125
- # has any multi-byte characters, this can tank the performance of the
126
- # translator. We could make this significantly faster by using a
127
- # different data structure for the cache.
127
+ # just use the offset directly. Otherwise, we build an array where the
128
+ # index is the byte offset and the value is the character offset.
128
129
  def build_offset_cache(source)
129
130
  if source.bytesize == source.length
130
131
  -> (offset) { offset }
131
132
  else
132
- Hash.new do |hash, offset|
133
- hash[offset] = source.byteslice(0, offset).length
133
+ offset_cache = []
134
+ offset = 0
135
+
136
+ source.each_char do |char|
137
+ char.bytesize.times { offset_cache << offset }
138
+ offset += 1
134
139
  end
140
+
141
+ offset_cache << offset
135
142
  end
136
143
  end
137
144