parser-prism 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,293 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Parser
4
+ class Prism
5
+ # Accepts a list of prism tokens and converts them into the expected format
6
+ # for the parser gem.
7
+ class Lexer
8
+ TYPES = {
9
+ # These tokens should never appear in the output of the lexer.
10
+ EOF: nil,
11
+ MISSING: nil,
12
+ NOT_PROVIDED: nil,
13
+ IGNORED_NEWLINE: nil,
14
+ EMBDOC_END: nil,
15
+ EMBDOC_LINE: nil,
16
+ __END__: nil,
17
+
18
+ # These tokens have more or less direct mappings.
19
+ AMPERSAND: :tAMPER2,
20
+ AMPERSAND_AMPERSAND: :tANDOP,
21
+ AMPERSAND_AMPERSAND_EQUAL: :tOP_ASGN,
22
+ AMPERSAND_DOT: :tANDDOT,
23
+ AMPERSAND_EQUAL: :tOP_ASGN,
24
+ BACK_REFERENCE: :tBACK_REF,
25
+ BACKTICK: :tXSTRING_BEG,
26
+ BANG: :tBANG,
27
+ BANG_EQUAL: :tNEQ,
28
+ BANG_TILDE: :tNMATCH,
29
+ BRACE_LEFT: :tLCURLY,
30
+ BRACE_RIGHT: :tRCURLY,
31
+ BRACKET_LEFT: :tLBRACK2,
32
+ BRACKET_LEFT_ARRAY: :tLBRACK,
33
+ BRACKET_LEFT_RIGHT: :tAREF,
34
+ BRACKET_LEFT_RIGHT_EQUAL: :tASET,
35
+ BRACKET_RIGHT: :tRBRACK,
36
+ CARET: :tCARET,
37
+ CARET_EQUAL: :tOP_ASGN,
38
+ CHARACTER_LITERAL: :tCHARACTER,
39
+ CLASS_VARIABLE: :tCVAR,
40
+ COLON: :tCOLON,
41
+ COLON_COLON: :tCOLON2,
42
+ COMMA: :tCOMMA,
43
+ COMMENT: :tCOMMENT,
44
+ CONSTANT: :tCONSTANT,
45
+ DOT: :tDOT,
46
+ DOT_DOT: :tDOT2,
47
+ DOT_DOT_DOT: :tDOT3,
48
+ EMBDOC_BEGIN: :tCOMMENT,
49
+ EMBEXPR_BEGIN: :tSTRING_DBEG,
50
+ EMBEXPR_END: :tSTRING_DEND,
51
+ EMBVAR: :tSTRING_DVAR,
52
+ EQUAL: :tEQL,
53
+ EQUAL_EQUAL: :tEQ,
54
+ EQUAL_EQUAL_EQUAL: :tEQQ,
55
+ EQUAL_GREATER: :tASSOC,
56
+ EQUAL_TILDE: :tMATCH,
57
+ FLOAT: :tFLOAT,
58
+ FLOAT_IMAGINARY: :tIMAGINARY,
59
+ FLOAT_RATIONAL: :tRATIONAL,
60
+ FLOAT_RATIONAL_IMAGINARY: :tIMAGINARY,
61
+ GLOBAL_VARIABLE: :tGVAR,
62
+ GREATER: :tGT,
63
+ GREATER_EQUAL: :tGEQ,
64
+ GREATER_GREATER: :tRSHFT,
65
+ GREATER_GREATER_EQUAL: :tOP_ASGN,
66
+ HEREDOC_START: :tSTRING_BEG,
67
+ HEREDOC_END: :tSTRING_END,
68
+ IDENTIFIER: :tIDENTIFIER,
69
+ INSTANCE_VARIABLE: :tIVAR,
70
+ INTEGER: :tINTEGER,
71
+ INTEGER_IMAGINARY: :tIMAGINARY,
72
+ INTEGER_RATIONAL: :tRATIONAL,
73
+ INTEGER_RATIONAL_IMAGINARY: :tIMAGINARY,
74
+ KEYWORD_ALIAS: :kALIAS,
75
+ KEYWORD_AND: :kAND,
76
+ KEYWORD_BEGIN: :kBEGIN,
77
+ KEYWORD_BEGIN_UPCASE: :klBEGIN,
78
+ KEYWORD_BREAK: :kBREAK,
79
+ KEYWORD_CASE: :kCASE,
80
+ KEYWORD_CLASS: :kCLASS,
81
+ KEYWORD_DEF: :kDEF,
82
+ KEYWORD_DEFINED: :kDEFINED,
83
+ KEYWORD_DO: :kDO,
84
+ KEYWORD_DO_LOOP: :kDO_COND,
85
+ KEYWORD_END: :kEND,
86
+ KEYWORD_END_UPCASE: :klEND,
87
+ KEYWORD_ENSURE: :kENSURE,
88
+ KEYWORD_ELSE: :kELSE,
89
+ KEYWORD_ELSIF: :kELSIF,
90
+ KEYWORD_FALSE: :kFALSE,
91
+ KEYWORD_FOR: :kFOR,
92
+ KEYWORD_IF: :kIF,
93
+ KEYWORD_IF_MODIFIER: :kIF_MOD,
94
+ KEYWORD_IN: :kIN,
95
+ KEYWORD_MODULE: :kMODULE,
96
+ KEYWORD_NEXT: :kNEXT,
97
+ KEYWORD_NIL: :kNIL,
98
+ KEYWORD_NOT: :kNOT,
99
+ KEYWORD_OR: :kOR,
100
+ KEYWORD_REDO: :kREDO,
101
+ KEYWORD_RESCUE: :kRESCUE,
102
+ KEYWORD_RESCUE_MODIFIER: :kRESCUE_MOD,
103
+ KEYWORD_RETRY: :kRETRY,
104
+ KEYWORD_RETURN: :kRETURN,
105
+ KEYWORD_SELF: :kSELF,
106
+ KEYWORD_SUPER: :kSUPER,
107
+ KEYWORD_THEN: :kTHEN,
108
+ KEYWORD_TRUE: :kTRUE,
109
+ KEYWORD_UNDEF: :kUNDEF,
110
+ KEYWORD_UNLESS: :kUNLESS,
111
+ KEYWORD_UNLESS_MODIFIER: :kUNLESS_MOD,
112
+ KEYWORD_UNTIL: :kUNTIL,
113
+ KEYWORD_UNTIL_MODIFIER: :kUNTIL_MOD,
114
+ KEYWORD_WHEN: :kWHEN,
115
+ KEYWORD_WHILE: :kWHILE,
116
+ KEYWORD_WHILE_MODIFIER: :kWHILE_MOD,
117
+ KEYWORD_YIELD: :kYIELD,
118
+ KEYWORD___ENCODING__: :k__ENCODING__,
119
+ KEYWORD___FILE__: :k__FILE__,
120
+ KEYWORD___LINE__: :k__LINE__,
121
+ LABEL: :tLABEL,
122
+ LABEL_END: :tLABEL_END,
123
+ LAMBDA_BEGIN: :tLAMBEG,
124
+ LESS: :tLT,
125
+ LESS_EQUAL: :tLEQ,
126
+ LESS_EQUAL_GREATER: :tCMP,
127
+ LESS_LESS: :tLSHFT,
128
+ LESS_LESS_EQUAL: :tOP_ASGN,
129
+ METHOD_NAME: :tFID,
130
+ MINUS: :tMINUS,
131
+ MINUS_EQUAL: :tOP_ASGN,
132
+ MINUS_GREATER: :tLAMBDA,
133
+ NEWLINE: :tNL,
134
+ NUMBERED_REFERENCE: :tNTH_REF,
135
+ PARENTHESIS_LEFT: :tLPAREN,
136
+ PARENTHESIS_LEFT_PARENTHESES: :tLPAREN_ARG,
137
+ PARENTHESIS_RIGHT: :tRPAREN,
138
+ PERCENT: :tPERCENT,
139
+ PERCENT_EQUAL: :tOP_ASGN,
140
+ PERCENT_LOWER_I: :tQSYMBOLS_BEG,
141
+ PERCENT_LOWER_W: :tQWORDS_BEG,
142
+ PERCENT_UPPER_I: :tSYMBOLS_BEG,
143
+ PERCENT_UPPER_W: :tWORDS_BEG,
144
+ PERCENT_LOWER_X: :tXSTRING_BEG,
145
+ PLUS: :tPLUS,
146
+ PLUS_EQUAL: :tOP_ASGN,
147
+ PIPE_EQUAL: :tOP_ASGN,
148
+ PIPE: :tPIPE,
149
+ PIPE_PIPE: :tOROP,
150
+ PIPE_PIPE_EQUAL: :tOP_ASGN,
151
+ QUESTION_MARK: :tEH,
152
+ REGEXP_BEGIN: :tREGEXP_BEG,
153
+ REGEXP_END: :tSTRING_END,
154
+ SEMICOLON: :tSEMI,
155
+ SLASH: :tDIVIDE,
156
+ SLASH_EQUAL: :tOP_ASGN,
157
+ STAR: :tSTAR2,
158
+ STAR_EQUAL: :tOP_ASGN,
159
+ STAR_STAR: :tPOW,
160
+ STAR_STAR_EQUAL: :tOP_ASGN,
161
+ STRING_BEGIN: :tSTRING_BEG,
162
+ STRING_CONTENT: :tSTRING_CONTENT,
163
+ STRING_END: :tSTRING_END,
164
+ SYMBOL_BEGIN: :tSYMBEG,
165
+ TILDE: :tTILDE,
166
+ UAMPERSAND: :tAMPER,
167
+ UCOLON_COLON: :tCOLON3,
168
+ UDOT_DOT: :tDOT2,
169
+ UDOT_DOT_DOT: :tBDOT3,
170
+ UMINUS: :tUMINUS,
171
+ UMINUS_NUM: :tUNARY_NUM,
172
+ UPLUS: :tUPLUS,
173
+ USTAR: :tSTAR,
174
+ USTAR_STAR: :tPOW,
175
+ WORDS_SEP: :tSPACE
176
+ }
177
+
178
+ private_constant :TYPES
179
+
180
+ attr_reader :buffer, :lexed, :offset_cache
181
+
182
+ def initialize(buffer, lexed, offset_cache)
183
+ @buffer = buffer
184
+ @lexed = lexed
185
+ @offset_cache = offset_cache
186
+ end
187
+
188
+ def to_a
189
+ tokens = []
190
+ index = 0
191
+
192
+ while index < lexed.length
193
+ token, = lexed[index]
194
+ index += 1
195
+ next if token.type == :IGNORED_NEWLINE || token.type == :EOF
196
+
197
+ type = TYPES.fetch(token.type)
198
+ value = token.value
199
+ location = Source::Range.new(buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset])
200
+
201
+ case type
202
+ when :tCHARACTER
203
+ value.delete_prefix!("?")
204
+ when :tCOMMENT
205
+ if token.type == :EMBDOC_BEGIN
206
+ until (next_token = lexed[index]) && next_token.type == :EMBDOC_END
207
+ value += next_token.value
208
+ index += 1
209
+ end
210
+
211
+ value += next_token.value
212
+ location = Source::Range.new(buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index].location.end_offset])
213
+ index += 1
214
+ else
215
+ value.chomp!
216
+ location = Source::Range.new(buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1])
217
+ end
218
+ when :tNL
219
+ value = nil
220
+ when :tFLOAT
221
+ value = Float(value)
222
+ when :tIMAGINARY
223
+ value.chomp!("i")
224
+ value = Complex(0, value.end_with?("r") ? Rational(value.chomp("r")) : value)
225
+ when :tINTEGER
226
+ if value.start_with?("+")
227
+ tokens << [:tUNARY_NUM, ["+", Source::Range.new(buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])]]
228
+ location = Source::Range.new(buffer, token.location.start_offset + 1, token.location.end_offset)
229
+ end
230
+
231
+ value = Integer(value)
232
+ when :tLABEL
233
+ value.chomp!(":")
234
+ when :tLABEL_END
235
+ value.chomp!(":")
236
+ when :tNTH_REF
237
+ value = Integer(value.delete_prefix("$"))
238
+ when :tOP_ASGN
239
+ value.chomp!("=")
240
+ when :tRATIONAL
241
+ value = Rational(value.chomp("r"))
242
+ when :tSPACE
243
+ value = nil
244
+ when :tSTRING_BEG
245
+ if ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_END
246
+ next_location = token.location.join(next_token.location)
247
+ type = :tSTRING
248
+ value = ""
249
+ location = Source::Range.new(buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
250
+ index += 1
251
+ elsif ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_CONTENT && (next_next_token = lexed[index + 1]) && next_next_token.type == :STRING_END
252
+ next_location = token.location.join(next_next_token.location)
253
+ type = :tSTRING
254
+ value = next_token.value
255
+ location = Source::Range.new(buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
256
+ index += 2
257
+ elsif value.start_with?("<<")
258
+ quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
259
+ value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
260
+ end
261
+ when :tSTRING_DVAR
262
+ value = nil
263
+ when :tSTRING_END
264
+ if token.type == :REGEXP_END
265
+ value = value[0]
266
+ location = Source::Range.new(buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
267
+ end
268
+ when :tSYMBEG
269
+ if (next_token = lexed[index]) && next_token.type != :STRING_CONTENT
270
+ next_location = token.location.join(next_token.location)
271
+ type = :tSYMBOL
272
+ value = next_token.value
273
+ location = Source::Range.new(buffer, next_location.start_offset, next_location.end_offset)
274
+ index += 1
275
+ end
276
+ when :tFID
277
+ if tokens[-1][0] == :kDEF
278
+ type = :tIDENTIFIER
279
+ end
280
+ end
281
+
282
+ tokens << [type, [value, location]]
283
+
284
+ if token.type == :REGEXP_END
285
+ tokens << [:tREGEXP_OPT, [token.value[1..], Source::Range.new(buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])]]
286
+ end
287
+ end
288
+
289
+ tokens
290
+ end
291
+ end
292
+ end
293
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "parser"
4
+ require "rubocop"
5
+
6
+ module Parser
7
+ class Prism < Base
8
+ VERSION_3_3 = 80_82_73_83_77.33
9
+ end
10
+ end
11
+
12
+ RuboCop::AST::ProcessedSource.prepend(
13
+ Module.new do
14
+ def parser_class(ruby_version)
15
+ if ruby_version == Parser::Prism::VERSION_3_3
16
+ require "parser/prism"
17
+ Parser::Prism
18
+ else
19
+ super
20
+ end
21
+ end
22
+ end
23
+ )
24
+
25
+ known_rubies = RuboCop::TargetRuby.const_get(:KNOWN_RUBIES)
26
+ RuboCop::TargetRuby.send(:remove_const, :KNOWN_RUBIES)
27
+ RuboCop::TargetRuby::KNOWN_RUBIES = [*known_rubies, Parser::Prism::VERSION_3_3].freeze
@@ -0,0 +1,128 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "parser"
4
+ require "prism"
5
+
6
+ module Parser
7
+ class Prism < Base
8
+ Racc_debug_parser = false
9
+
10
+ def version
11
+ 33
12
+ end
13
+
14
+ def default_encoding
15
+ Encoding::UTF_8
16
+ end
17
+
18
+ def yyerror
19
+ end
20
+
21
+ ##
22
+ # Parses a source buffer and returns the AST.
23
+ #
24
+ # @param [Parser::Source::Buffer] source_buffer The source buffer to parse.
25
+ # @return Parser::AST::Node
26
+ #
27
+ def parse(source_buffer)
28
+ @source_buffer = source_buffer
29
+ source = source_buffer.source
30
+
31
+ build_ast(
32
+ ::Prism.parse(source, filepath: source_buffer.name).value,
33
+ build_offset_cache(source)
34
+ )
35
+ ensure
36
+ @source_buffer = nil
37
+ end
38
+
39
+ ##
40
+ # Parses a source buffer and returns the AST and the source code comments.
41
+ #
42
+ # @see #parse
43
+ # @see Parser::Source::Comment#associate
44
+ # @return [Array]
45
+ #
46
+ def parse_with_comments(source_buffer)
47
+ @source_buffer = source_buffer
48
+ source = source_buffer.source
49
+
50
+ result = ::Prism.parse(source, filepath: source_buffer.name)
51
+
52
+ [
53
+ build_ast(result.value, build_offset_cache(source)),
54
+ build_comments(result.comments)
55
+ ]
56
+ ensure
57
+ @source_buffer = nil
58
+ end
59
+
60
+ ##
61
+ # Parses a source buffer and returns the AST, the source code comments,
62
+ # and the tokens emitted by the lexer.
63
+ #
64
+ # @param [Parser::Source::Buffer] source_buffer
65
+ # @return [Array]
66
+ #
67
+ def tokenize(source_buffer, _recover = false)
68
+ @source_buffer = source_buffer
69
+ source = source_buffer.source
70
+
71
+ offset_cache = build_offset_cache(source)
72
+ result = ::Prism.parse_lex(source, filepath: source_buffer.name)
73
+ program, tokens = result.value
74
+
75
+ [
76
+ build_ast(program, offset_cache),
77
+ build_comments(result.comments),
78
+ build_tokens(tokens, offset_cache)
79
+ ]
80
+ ensure
81
+ @source_buffer = nil
82
+ end
83
+
84
+ # Since prism resolves num params for us, we don't need to support this kind
85
+ # of logic here.
86
+ def try_declare_numparam(node)
87
+ node.children[0].match?(/\A_[1-9]\z/)
88
+ end
89
+
90
+ private
91
+
92
+ # Prism deals with offsets in bytes, while the parser gem deals with offsets
93
+ # in characters. We need to handle this conversion in order to build the
94
+ # parser gem AST.
95
+ #
96
+ # If the bytesize of the source is the same as the length, then we can just
97
+ # use the offset directly. Otherwise, we build a hash that functions as a
98
+ # cache for the conversion.
99
+ def build_offset_cache(source)
100
+ if source.bytesize == source.length
101
+ -> (offset) { offset }
102
+ else
103
+ Hash.new { |hash, offset| hash[offset] = source.byteslice(0, offset).length }
104
+ end
105
+ end
106
+
107
+ # Build the parser gem AST from the prism AST.
108
+ def build_ast(program, offset_cache)
109
+ program.accept(Compiler.new(self, offset_cache))
110
+ end
111
+
112
+ # Build the parser gem comments from the prism comments.
113
+ def build_comments(comments)
114
+ comments.map do |comment|
115
+ location = comment.location
116
+ Source::Comment.new(Source::Range.new(source_buffer, location.start_offset, location.end_offset))
117
+ end
118
+ end
119
+
120
+ # Build the parser gem tokens from the prism tokens.
121
+ def build_tokens(tokens, offset_cache)
122
+ Lexer.new(source_buffer, tokens.map(&:first), offset_cache).to_a
123
+ end
124
+ end
125
+ end
126
+
127
+ require_relative "prism/compiler"
128
+ require_relative "prism/lexer"
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = "parser-prism"
5
+ spec.version = "0.1.0"
6
+ spec.authors = ["Kevin Newton"]
7
+ spec.email = ["kddnewton@gmail.com"]
8
+
9
+ spec.summary = "A prism parser backend"
10
+ spec.homepage = "https://github.com/kddnewton/parser-prism"
11
+ spec.license = "MIT"
12
+
13
+ spec.files =
14
+ Dir.chdir(__dir__) do
15
+ `git ls-files -z`.split("\x0")
16
+ .reject { |f| f.match(%r{^(test|spec|features)/}) }
17
+ end
18
+
19
+ spec.bindir = "exe"
20
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
21
+ spec.require_paths = ["lib"]
22
+
23
+ spec.add_dependency "parser"
24
+ spec.add_dependency "prism"
25
+ end
metadata ADDED
@@ -0,0 +1,87 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: parser-prism
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Kevin Newton
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2024-01-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: parser
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: prism
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description:
42
+ email:
43
+ - kddnewton@gmail.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".github/workflows/main.yml"
49
+ - ".gitignore"
50
+ - ".rubocop.yml"
51
+ - Gemfile
52
+ - Gemfile.lock
53
+ - LICENSE
54
+ - README.md
55
+ - Rakefile
56
+ - bin/bench
57
+ - bin/parse
58
+ - lib/parser/prism.rb
59
+ - lib/parser/prism/compare.rb
60
+ - lib/parser/prism/compiler.rb
61
+ - lib/parser/prism/lexer.rb
62
+ - lib/parser/prism/rubocop.rb
63
+ - parser-prism.gemspec
64
+ homepage: https://github.com/kddnewton/parser-prism
65
+ licenses:
66
+ - MIT
67
+ metadata: {}
68
+ post_install_message:
69
+ rdoc_options: []
70
+ require_paths:
71
+ - lib
72
+ required_ruby_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ requirements: []
83
+ rubygems_version: 3.4.1
84
+ signing_key:
85
+ specification_version: 4
86
+ summary: A prism parser backend
87
+ test_files: []