parser-prism 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,293 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Parser
4
+ class Prism
5
+ # Accepts a list of prism tokens and converts them into the expected format
6
+ # for the parser gem.
7
+ class Lexer
8
+ TYPES = {
9
+ # These tokens should never appear in the output of the lexer.
10
+ EOF: nil,
11
+ MISSING: nil,
12
+ NOT_PROVIDED: nil,
13
+ IGNORED_NEWLINE: nil,
14
+ EMBDOC_END: nil,
15
+ EMBDOC_LINE: nil,
16
+ __END__: nil,
17
+
18
+ # These tokens have more or less direct mappings.
19
+ AMPERSAND: :tAMPER2,
20
+ AMPERSAND_AMPERSAND: :tANDOP,
21
+ AMPERSAND_AMPERSAND_EQUAL: :tOP_ASGN,
22
+ AMPERSAND_DOT: :tANDDOT,
23
+ AMPERSAND_EQUAL: :tOP_ASGN,
24
+ BACK_REFERENCE: :tBACK_REF,
25
+ BACKTICK: :tXSTRING_BEG,
26
+ BANG: :tBANG,
27
+ BANG_EQUAL: :tNEQ,
28
+ BANG_TILDE: :tNMATCH,
29
+ BRACE_LEFT: :tLCURLY,
30
+ BRACE_RIGHT: :tRCURLY,
31
+ BRACKET_LEFT: :tLBRACK2,
32
+ BRACKET_LEFT_ARRAY: :tLBRACK,
33
+ BRACKET_LEFT_RIGHT: :tAREF,
34
+ BRACKET_LEFT_RIGHT_EQUAL: :tASET,
35
+ BRACKET_RIGHT: :tRBRACK,
36
+ CARET: :tCARET,
37
+ CARET_EQUAL: :tOP_ASGN,
38
+ CHARACTER_LITERAL: :tCHARACTER,
39
+ CLASS_VARIABLE: :tCVAR,
40
+ COLON: :tCOLON,
41
+ COLON_COLON: :tCOLON2,
42
+ COMMA: :tCOMMA,
43
+ COMMENT: :tCOMMENT,
44
+ CONSTANT: :tCONSTANT,
45
+ DOT: :tDOT,
46
+ DOT_DOT: :tDOT2,
47
+ DOT_DOT_DOT: :tDOT3,
48
+ EMBDOC_BEGIN: :tCOMMENT,
49
+ EMBEXPR_BEGIN: :tSTRING_DBEG,
50
+ EMBEXPR_END: :tSTRING_DEND,
51
+ EMBVAR: :tSTRING_DVAR,
52
+ EQUAL: :tEQL,
53
+ EQUAL_EQUAL: :tEQ,
54
+ EQUAL_EQUAL_EQUAL: :tEQQ,
55
+ EQUAL_GREATER: :tASSOC,
56
+ EQUAL_TILDE: :tMATCH,
57
+ FLOAT: :tFLOAT,
58
+ FLOAT_IMAGINARY: :tIMAGINARY,
59
+ FLOAT_RATIONAL: :tRATIONAL,
60
+ FLOAT_RATIONAL_IMAGINARY: :tIMAGINARY,
61
+ GLOBAL_VARIABLE: :tGVAR,
62
+ GREATER: :tGT,
63
+ GREATER_EQUAL: :tGEQ,
64
+ GREATER_GREATER: :tRSHFT,
65
+ GREATER_GREATER_EQUAL: :tOP_ASGN,
66
+ HEREDOC_START: :tSTRING_BEG,
67
+ HEREDOC_END: :tSTRING_END,
68
+ IDENTIFIER: :tIDENTIFIER,
69
+ INSTANCE_VARIABLE: :tIVAR,
70
+ INTEGER: :tINTEGER,
71
+ INTEGER_IMAGINARY: :tIMAGINARY,
72
+ INTEGER_RATIONAL: :tRATIONAL,
73
+ INTEGER_RATIONAL_IMAGINARY: :tIMAGINARY,
74
+ KEYWORD_ALIAS: :kALIAS,
75
+ KEYWORD_AND: :kAND,
76
+ KEYWORD_BEGIN: :kBEGIN,
77
+ KEYWORD_BEGIN_UPCASE: :klBEGIN,
78
+ KEYWORD_BREAK: :kBREAK,
79
+ KEYWORD_CASE: :kCASE,
80
+ KEYWORD_CLASS: :kCLASS,
81
+ KEYWORD_DEF: :kDEF,
82
+ KEYWORD_DEFINED: :kDEFINED,
83
+ KEYWORD_DO: :kDO,
84
+ KEYWORD_DO_LOOP: :kDO_COND,
85
+ KEYWORD_END: :kEND,
86
+ KEYWORD_END_UPCASE: :klEND,
87
+ KEYWORD_ENSURE: :kENSURE,
88
+ KEYWORD_ELSE: :kELSE,
89
+ KEYWORD_ELSIF: :kELSIF,
90
+ KEYWORD_FALSE: :kFALSE,
91
+ KEYWORD_FOR: :kFOR,
92
+ KEYWORD_IF: :kIF,
93
+ KEYWORD_IF_MODIFIER: :kIF_MOD,
94
+ KEYWORD_IN: :kIN,
95
+ KEYWORD_MODULE: :kMODULE,
96
+ KEYWORD_NEXT: :kNEXT,
97
+ KEYWORD_NIL: :kNIL,
98
+ KEYWORD_NOT: :kNOT,
99
+ KEYWORD_OR: :kOR,
100
+ KEYWORD_REDO: :kREDO,
101
+ KEYWORD_RESCUE: :kRESCUE,
102
+ KEYWORD_RESCUE_MODIFIER: :kRESCUE_MOD,
103
+ KEYWORD_RETRY: :kRETRY,
104
+ KEYWORD_RETURN: :kRETURN,
105
+ KEYWORD_SELF: :kSELF,
106
+ KEYWORD_SUPER: :kSUPER,
107
+ KEYWORD_THEN: :kTHEN,
108
+ KEYWORD_TRUE: :kTRUE,
109
+ KEYWORD_UNDEF: :kUNDEF,
110
+ KEYWORD_UNLESS: :kUNLESS,
111
+ KEYWORD_UNLESS_MODIFIER: :kUNLESS_MOD,
112
+ KEYWORD_UNTIL: :kUNTIL,
113
+ KEYWORD_UNTIL_MODIFIER: :kUNTIL_MOD,
114
+ KEYWORD_WHEN: :kWHEN,
115
+ KEYWORD_WHILE: :kWHILE,
116
+ KEYWORD_WHILE_MODIFIER: :kWHILE_MOD,
117
+ KEYWORD_YIELD: :kYIELD,
118
+ KEYWORD___ENCODING__: :k__ENCODING__,
119
+ KEYWORD___FILE__: :k__FILE__,
120
+ KEYWORD___LINE__: :k__LINE__,
121
+ LABEL: :tLABEL,
122
+ LABEL_END: :tLABEL_END,
123
+ LAMBDA_BEGIN: :tLAMBEG,
124
+ LESS: :tLT,
125
+ LESS_EQUAL: :tLEQ,
126
+ LESS_EQUAL_GREATER: :tCMP,
127
+ LESS_LESS: :tLSHFT,
128
+ LESS_LESS_EQUAL: :tOP_ASGN,
129
+ METHOD_NAME: :tFID,
130
+ MINUS: :tMINUS,
131
+ MINUS_EQUAL: :tOP_ASGN,
132
+ MINUS_GREATER: :tLAMBDA,
133
+ NEWLINE: :tNL,
134
+ NUMBERED_REFERENCE: :tNTH_REF,
135
+ PARENTHESIS_LEFT: :tLPAREN,
136
+ PARENTHESIS_LEFT_PARENTHESES: :tLPAREN_ARG,
137
+ PARENTHESIS_RIGHT: :tRPAREN,
138
+ PERCENT: :tPERCENT,
139
+ PERCENT_EQUAL: :tOP_ASGN,
140
+ PERCENT_LOWER_I: :tQSYMBOLS_BEG,
141
+ PERCENT_LOWER_W: :tQWORDS_BEG,
142
+ PERCENT_UPPER_I: :tSYMBOLS_BEG,
143
+ PERCENT_UPPER_W: :tWORDS_BEG,
144
+ PERCENT_LOWER_X: :tXSTRING_BEG,
145
+ PLUS: :tPLUS,
146
+ PLUS_EQUAL: :tOP_ASGN,
147
+ PIPE_EQUAL: :tOP_ASGN,
148
+ PIPE: :tPIPE,
149
+ PIPE_PIPE: :tOROP,
150
+ PIPE_PIPE_EQUAL: :tOP_ASGN,
151
+ QUESTION_MARK: :tEH,
152
+ REGEXP_BEGIN: :tREGEXP_BEG,
153
+ REGEXP_END: :tSTRING_END,
154
+ SEMICOLON: :tSEMI,
155
+ SLASH: :tDIVIDE,
156
+ SLASH_EQUAL: :tOP_ASGN,
157
+ STAR: :tSTAR2,
158
+ STAR_EQUAL: :tOP_ASGN,
159
+ STAR_STAR: :tPOW,
160
+ STAR_STAR_EQUAL: :tOP_ASGN,
161
+ STRING_BEGIN: :tSTRING_BEG,
162
+ STRING_CONTENT: :tSTRING_CONTENT,
163
+ STRING_END: :tSTRING_END,
164
+ SYMBOL_BEGIN: :tSYMBEG,
165
+ TILDE: :tTILDE,
166
+ UAMPERSAND: :tAMPER,
167
+ UCOLON_COLON: :tCOLON3,
168
+ UDOT_DOT: :tDOT2,
169
+ UDOT_DOT_DOT: :tBDOT3,
170
+ UMINUS: :tUMINUS,
171
+ UMINUS_NUM: :tUNARY_NUM,
172
+ UPLUS: :tUPLUS,
173
+ USTAR: :tSTAR,
174
+ USTAR_STAR: :tPOW,
175
+ WORDS_SEP: :tSPACE
176
+ }
177
+
178
+ private_constant :TYPES
179
+
180
+ attr_reader :buffer, :lexed, :offset_cache
181
+
182
+ def initialize(buffer, lexed, offset_cache)
183
+ @buffer = buffer
184
+ @lexed = lexed
185
+ @offset_cache = offset_cache
186
+ end
187
+
188
+ def to_a
189
+ tokens = []
190
+ index = 0
191
+
192
+ while index < lexed.length
193
+ token, = lexed[index]
194
+ index += 1
195
+ next if token.type == :IGNORED_NEWLINE || token.type == :EOF
196
+
197
+ type = TYPES.fetch(token.type)
198
+ value = token.value
199
+ location = Source::Range.new(buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset])
200
+
201
+ case type
202
+ when :tCHARACTER
203
+ value.delete_prefix!("?")
204
+ when :tCOMMENT
205
+ if token.type == :EMBDOC_BEGIN
206
+ until (next_token = lexed[index]) && next_token.type == :EMBDOC_END
207
+ value += next_token.value
208
+ index += 1
209
+ end
210
+
211
+ value += next_token.value
212
+ location = Source::Range.new(buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index].location.end_offset])
213
+ index += 1
214
+ else
215
+ value.chomp!
216
+ location = Source::Range.new(buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1])
217
+ end
218
+ when :tNL
219
+ value = nil
220
+ when :tFLOAT
221
+ value = Float(value)
222
+ when :tIMAGINARY
223
+ value.chomp!("i")
224
+ value = Complex(0, value.end_with?("r") ? Rational(value.chomp("r")) : value)
225
+ when :tINTEGER
226
+ if value.start_with?("+")
227
+ tokens << [:tUNARY_NUM, ["+", Source::Range.new(buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])]]
228
+ location = Source::Range.new(buffer, token.location.start_offset + 1, token.location.end_offset)
229
+ end
230
+
231
+ value = Integer(value)
232
+ when :tLABEL
233
+ value.chomp!(":")
234
+ when :tLABEL_END
235
+ value.chomp!(":")
236
+ when :tNTH_REF
237
+ value = Integer(value.delete_prefix("$"))
238
+ when :tOP_ASGN
239
+ value.chomp!("=")
240
+ when :tRATIONAL
241
+ value = Rational(value.chomp("r"))
242
+ when :tSPACE
243
+ value = nil
244
+ when :tSTRING_BEG
245
+ if ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_END
246
+ next_location = token.location.join(next_token.location)
247
+ type = :tSTRING
248
+ value = ""
249
+ location = Source::Range.new(buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
250
+ index += 1
251
+ elsif ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_CONTENT && (next_next_token = lexed[index + 1]) && next_next_token.type == :STRING_END
252
+ next_location = token.location.join(next_next_token.location)
253
+ type = :tSTRING
254
+ value = next_token.value
255
+ location = Source::Range.new(buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
256
+ index += 2
257
+ elsif value.start_with?("<<")
258
+ quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
259
+ value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
260
+ end
261
+ when :tSTRING_DVAR
262
+ value = nil
263
+ when :tSTRING_END
264
+ if token.type == :REGEXP_END
265
+ value = value[0]
266
+ location = Source::Range.new(buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
267
+ end
268
+ when :tSYMBEG
269
+ if (next_token = lexed[index]) && next_token.type != :STRING_CONTENT
270
+ next_location = token.location.join(next_token.location)
271
+ type = :tSYMBOL
272
+ value = next_token.value
273
+ location = Source::Range.new(buffer, next_location.start_offset, next_location.end_offset)
274
+ index += 1
275
+ end
276
+ when :tFID
277
+ if tokens[-1][0] == :kDEF
278
+ type = :tIDENTIFIER
279
+ end
280
+ end
281
+
282
+ tokens << [type, [value, location]]
283
+
284
+ if token.type == :REGEXP_END
285
+ tokens << [:tREGEXP_OPT, [token.value[1..], Source::Range.new(buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])]]
286
+ end
287
+ end
288
+
289
+ tokens
290
+ end
291
+ end
292
+ end
293
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "parser"
4
+ require "rubocop"
5
+
6
+ module Parser
7
+ class Prism < Base
8
+ VERSION_3_3 = 80_82_73_83_77.33
9
+ end
10
+ end
11
+
12
+ RuboCop::AST::ProcessedSource.prepend(
13
+ Module.new do
14
+ def parser_class(ruby_version)
15
+ if ruby_version == Parser::Prism::VERSION_3_3
16
+ require "parser/prism"
17
+ Parser::Prism
18
+ else
19
+ super
20
+ end
21
+ end
22
+ end
23
+ )
24
+
25
+ known_rubies = RuboCop::TargetRuby.const_get(:KNOWN_RUBIES)
26
+ RuboCop::TargetRuby.send(:remove_const, :KNOWN_RUBIES)
27
+ RuboCop::TargetRuby::KNOWN_RUBIES = [*known_rubies, Parser::Prism::VERSION_3_3].freeze
@@ -0,0 +1,128 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "parser"
4
+ require "prism"
5
+
6
+ module Parser
7
+ class Prism < Base
8
+ Racc_debug_parser = false
9
+
10
+ def version
11
+ 33
12
+ end
13
+
14
+ def default_encoding
15
+ Encoding::UTF_8
16
+ end
17
+
18
+ def yyerror
19
+ end
20
+
21
+ ##
22
+ # Parses a source buffer and returns the AST.
23
+ #
24
+ # @param [Parser::Source::Buffer] source_buffer The source buffer to parse.
25
+ # @return Parser::AST::Node
26
+ #
27
+ def parse(source_buffer)
28
+ @source_buffer = source_buffer
29
+ source = source_buffer.source
30
+
31
+ build_ast(
32
+ ::Prism.parse(source, filepath: source_buffer.name).value,
33
+ build_offset_cache(source)
34
+ )
35
+ ensure
36
+ @source_buffer = nil
37
+ end
38
+
39
+ ##
40
+ # Parses a source buffer and returns the AST and the source code comments.
41
+ #
42
+ # @see #parse
43
+ # @see Parser::Source::Comment#associate
44
+ # @return [Array]
45
+ #
46
+ def parse_with_comments(source_buffer)
47
+ @source_buffer = source_buffer
48
+ source = source_buffer.source
49
+
50
+ result = ::Prism.parse(source, filepath: source_buffer.name)
51
+
52
+ [
53
+ build_ast(result.value, build_offset_cache(source)),
54
+ build_comments(result.comments)
55
+ ]
56
+ ensure
57
+ @source_buffer = nil
58
+ end
59
+
60
+ ##
61
+ # Parses a source buffer and returns the AST, the source code comments,
62
+ # and the tokens emitted by the lexer.
63
+ #
64
+ # @param [Parser::Source::Buffer] source_buffer
65
+ # @return [Array]
66
+ #
67
+ def tokenize(source_buffer, _recover = false)
68
+ @source_buffer = source_buffer
69
+ source = source_buffer.source
70
+
71
+ offset_cache = build_offset_cache(source)
72
+ result = ::Prism.parse_lex(source, filepath: source_buffer.name)
73
+ program, tokens = result.value
74
+
75
+ [
76
+ build_ast(program, offset_cache),
77
+ build_comments(result.comments),
78
+ build_tokens(tokens, offset_cache)
79
+ ]
80
+ ensure
81
+ @source_buffer = nil
82
+ end
83
+
84
+ # Since prism resolves num params for us, we don't need to support this kind
85
+ # of logic here.
86
+ def try_declare_numparam(node)
87
+ node.children[0].match?(/\A_[1-9]\z/)
88
+ end
89
+
90
+ private
91
+
92
+ # Prism deals with offsets in bytes, while the parser gem deals with offsets
93
+ # in characters. We need to handle this conversion in order to build the
94
+ # parser gem AST.
95
+ #
96
+ # If the bytesize of the source is the same as the length, then we can just
97
+ # use the offset directly. Otherwise, we build a hash that functions as a
98
+ # cache for the conversion.
99
+ def build_offset_cache(source)
100
+ if source.bytesize == source.length
101
+ -> (offset) { offset }
102
+ else
103
+ Hash.new { |hash, offset| hash[offset] = source.byteslice(0, offset).length }
104
+ end
105
+ end
106
+
107
+ # Build the parser gem AST from the prism AST.
108
+ def build_ast(program, offset_cache)
109
+ program.accept(Compiler.new(self, offset_cache))
110
+ end
111
+
112
+ # Build the parser gem comments from the prism comments.
113
+ def build_comments(comments)
114
+ comments.map do |comment|
115
+ location = comment.location
116
+ Source::Comment.new(Source::Range.new(source_buffer, location.start_offset, location.end_offset))
117
+ end
118
+ end
119
+
120
+ # Build the parser gem tokens from the prism tokens.
121
+ def build_tokens(tokens, offset_cache)
122
+ Lexer.new(source_buffer, tokens.map(&:first), offset_cache).to_a
123
+ end
124
+ end
125
+ end
126
+
127
+ require_relative "prism/compiler"
128
+ require_relative "prism/lexer"
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = "parser-prism"
5
+ spec.version = "0.1.0"
6
+ spec.authors = ["Kevin Newton"]
7
+ spec.email = ["kddnewton@gmail.com"]
8
+
9
+ spec.summary = "A prism parser backend"
10
+ spec.homepage = "https://github.com/kddnewton/parser-prism"
11
+ spec.license = "MIT"
12
+
13
+ spec.files =
14
+ Dir.chdir(__dir__) do
15
+ `git ls-files -z`.split("\x0")
16
+ .reject { |f| f.match(%r{^(test|spec|features)/}) }
17
+ end
18
+
19
+ spec.bindir = "exe"
20
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
21
+ spec.require_paths = ["lib"]
22
+
23
+ spec.add_dependency "parser"
24
+ spec.add_dependency "prism"
25
+ end
metadata ADDED
@@ -0,0 +1,87 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: parser-prism
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Kevin Newton
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2024-01-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: parser
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: prism
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description:
42
+ email:
43
+ - kddnewton@gmail.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".github/workflows/main.yml"
49
+ - ".gitignore"
50
+ - ".rubocop.yml"
51
+ - Gemfile
52
+ - Gemfile.lock
53
+ - LICENSE
54
+ - README.md
55
+ - Rakefile
56
+ - bin/bench
57
+ - bin/parse
58
+ - lib/parser/prism.rb
59
+ - lib/parser/prism/compare.rb
60
+ - lib/parser/prism/compiler.rb
61
+ - lib/parser/prism/lexer.rb
62
+ - lib/parser/prism/rubocop.rb
63
+ - parser-prism.gemspec
64
+ homepage: https://github.com/kddnewton/parser-prism
65
+ licenses:
66
+ - MIT
67
+ metadata: {}
68
+ post_install_message:
69
+ rdoc_options: []
70
+ require_paths:
71
+ - lib
72
+ required_ruby_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ requirements: []
83
+ rubygems_version: 3.4.1
84
+ signing_key:
85
+ specification_version: 4
86
+ summary: A prism parser backend
87
+ test_files: []