ruby_json_parser 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +7 -0
  2. data/.rubocop.yml +35 -0
  3. data/CHANGELOG.md +5 -0
  4. data/LICENSE +21 -0
  5. data/README.md +143 -0
  6. data/Rakefile +12 -0
  7. data/lib/ruby_json_parser/ast.rb +312 -0
  8. data/lib/ruby_json_parser/evaluator.rb +81 -0
  9. data/lib/ruby_json_parser/lexer.rb +358 -0
  10. data/lib/ruby_json_parser/parser.rb +205 -0
  11. data/lib/ruby_json_parser/result.rb +43 -0
  12. data/lib/ruby_json_parser/token.rb +171 -0
  13. data/lib/ruby_json_parser/version.rb +6 -0
  14. data/lib/ruby_json_parser.rb +77 -0
  15. data/sorbet/config +4 -0
  16. data/sorbet/rbi/annotations/.gitattributes +1 -0
  17. data/sorbet/rbi/annotations/minitest.rbi +119 -0
  18. data/sorbet/rbi/annotations/rainbow.rbi +269 -0
  19. data/sorbet/rbi/gems/.gitattributes +1 -0
  20. data/sorbet/rbi/gems/ast@2.4.2.rbi +585 -0
  21. data/sorbet/rbi/gems/erubi@1.13.0.rbi +150 -0
  22. data/sorbet/rbi/gems/json@2.7.2.rbi +1562 -0
  23. data/sorbet/rbi/gems/language_server-protocol@3.17.0.3.rbi +14238 -0
  24. data/sorbet/rbi/gems/minitest@5.24.1.rbi +1563 -0
  25. data/sorbet/rbi/gems/netrc@0.11.0.rbi +159 -0
  26. data/sorbet/rbi/gems/parallel@1.25.1.rbi +287 -0
  27. data/sorbet/rbi/gems/parser@3.3.4.0.rbi +5519 -0
  28. data/sorbet/rbi/gems/prism@0.30.0.rbi +39212 -0
  29. data/sorbet/rbi/gems/racc@1.8.0.rbi +162 -0
  30. data/sorbet/rbi/gems/rainbow@3.1.1.rbi +403 -0
  31. data/sorbet/rbi/gems/rake@13.2.1.rbi +3028 -0
  32. data/sorbet/rbi/gems/rbi@0.1.13.rbi +3078 -0
  33. data/sorbet/rbi/gems/regexp_parser@2.9.2.rbi +3772 -0
  34. data/sorbet/rbi/gems/rexml@3.3.1.rbi +4813 -0
  35. data/sorbet/rbi/gems/rubocop-ast@1.31.3.rbi +7015 -0
  36. data/sorbet/rbi/gems/rubocop@1.65.0.rbi +58191 -0
  37. data/sorbet/rbi/gems/ruby-progressbar@1.13.0.rbi +1318 -0
  38. data/sorbet/rbi/gems/spoom@1.3.3.rbi +4926 -0
  39. data/sorbet/rbi/gems/strscan@3.1.0.rbi +9 -0
  40. data/sorbet/rbi/gems/tapioca@0.15.1.rbi +3566 -0
  41. data/sorbet/rbi/gems/thor@1.3.1.rbi +4352 -0
  42. data/sorbet/rbi/gems/unicode-display_width@2.5.0.rbi +66 -0
  43. data/sorbet/rbi/gems/yard-sorbet@0.9.0.rbi +435 -0
  44. data/sorbet/rbi/gems/yard@0.9.36.rbi +18221 -0
  45. data/sorbet/tapioca/config.yml +13 -0
  46. data/sorbet/tapioca/require.rb +4 -0
  47. metadata +105 -0
@@ -0,0 +1,358 @@
1
+ # typed: strict
2
+ # frozen_string_literal: true
3
+
4
+ require_relative 'token'
5
+
6
+ class RubyJsonParser
7
+ # A lexical analyzer (tokenizer) for JSON
8
+ class Lexer
9
+ extend T::Sig
10
+ extend T::Generic
11
+ include Enumerable
12
+
13
+ # Type parameter for `Enumerable`
14
+ # Declares the type that the lexer returns for tokens
15
+ Elem = type_member { { fixed: Token } }
16
+
17
+ class << self
18
+ extend T::Sig
19
+
20
+ sig { params(source: String).returns(T::Array[Token]) }
21
+ def lex(source)
22
+ new(source).to_a
23
+ end
24
+ end
25
+
26
+ sig { params(source: String).void }
27
+ def initialize(source)
28
+ @source = source
29
+
30
+ # offset of the first character of the current lexeme
31
+ @start_cursor = T.let(0, Integer)
32
+ # offset of the next character
33
+ @cursor = T.let(0, Integer)
34
+ end
35
+
36
+ sig { returns(Token) }
37
+ def next
38
+ return Token.new(Token::END_OF_FILE) unless more_tokens?
39
+
40
+ scan_token
41
+ end
42
+
43
+ sig { override.params(block: T.nilable(T.proc.params(arg0: Token).void)).returns(T.untyped) }
44
+ def each(&block)
45
+ return enum_for(T.must(__method__)) unless block
46
+
47
+ loop do
48
+ tok = self.next
49
+ break if tok.type == Token::END_OF_FILE
50
+
51
+ block.call(tok)
52
+ end
53
+
54
+ self
55
+ end
56
+
57
+ private
58
+
59
+ sig { returns(T::Boolean) }
60
+ def more_tokens?
61
+ @cursor < @source.length
62
+ end
63
+
64
+ sig { params(type: Symbol).returns(Token) }
65
+ def token_with_consumed_value(type)
66
+ token(type, token_value)
67
+ end
68
+
69
+ sig { params(type: Symbol, value: T.nilable(String)).returns(Token) }
70
+ def token(type, value = nil)
71
+ @start_cursor = @cursor
72
+ Token.new(type, value)
73
+ end
74
+
75
+ # Returns the current token value.
76
+ sig { returns(String) }
77
+ def token_value
78
+ T.must @source[@start_cursor...@cursor]
79
+ end
80
+
81
+ sig { returns([String, T::Boolean]) }
82
+ def advance_char
83
+ return '', false unless more_tokens?
84
+
85
+ char = next_char
86
+
87
+ @cursor += 1
88
+ [char, true]
89
+ end
90
+
91
+ sig { returns(String) }
92
+ def next_char
93
+ T.must @source[@cursor]
94
+ end
95
+
96
+ # Gets the next UTF-8 encoded character
97
+ # without incrementing the cursor.
98
+ sig { returns(String) }
99
+ def peek_char
100
+ return '' unless more_tokens?
101
+
102
+ char, = next_char
103
+ char
104
+ end
105
+
106
+ # Advance the next `n` characters
107
+ sig { params(n: Integer).returns(T::Boolean) }
108
+ def advance_chars(n)
109
+ n.times do
110
+ _, ok = advance_char
111
+ return false unless ok
112
+ end
113
+
114
+ true
115
+ end
116
+
117
+ # Checks if the given character matches
118
+ # the next UTF-8 encoded character in source code.
119
+ # If they match, the cursor gets incremented.
120
+ sig { params(char: String).returns(T::Boolean) }
121
+ def match_char(char)
122
+ return false unless more_tokens?
123
+
124
+ if peek_char == char
125
+ advance_char
126
+ return true
127
+ end
128
+
129
+ false
130
+ end
131
+
132
+ # Consumes the next character if it's from the valid set.
133
+ sig { params(valid_chars: String).returns(T::Boolean) }
134
+ def match_chars(valid_chars)
135
+ return false unless more_tokens?
136
+
137
+ p = peek_char
138
+ if p != '' && valid_chars.include?(p)
139
+ advance_char
140
+ return true
141
+ end
142
+
143
+ false
144
+ end
145
+
146
+ # Rewinds the cursor back n chars.
147
+ sig { params(n: Integer).void }
148
+ def backup_chars(n)
149
+ @cursor -= n
150
+ end
151
+
152
+ # Skips the current accumulated token.
153
+ sig { void }
154
+ def skip_token
155
+ @start_cursor = @cursor
156
+ end
157
+
158
+ sig { returns(Token) }
159
+ def scan_token
160
+ loop do
161
+ char, ok = advance_char
162
+ return token(Token::END_OF_FILE) unless ok
163
+
164
+ case char
165
+ when '['
166
+ return token(Token::LBRACKET)
167
+ when ']'
168
+ return token(Token::RBRACKET)
169
+ when '{'
170
+ return token(Token::LBRACE)
171
+ when '}'
172
+ return token(Token::RBRACE)
173
+ when ','
174
+ return token(Token::COMMA)
175
+ when ':'
176
+ return token(Token::COLON)
177
+ when '.'
178
+ return token(Token::DOT)
179
+ when '"'
180
+ return scan_string
181
+ when '-'
182
+ p = peek_char
183
+ return token(Token::ERROR, 'unexpected EOF') if p == ''
184
+ return token(Token::ERROR, "unexpected number char: `#{p}`") unless Token::DIGITS.include?(p)
185
+
186
+ char, = advance_char
187
+ return scan_number(char)
188
+ when ' ', "\n", "\r", "\t"
189
+ skip_token
190
+ next
191
+ else
192
+ if char.match?(/[[:alpha:]]/)
193
+ return scan_identifier
194
+ elsif char.match?(/\d/)
195
+ return scan_number(char)
196
+ end
197
+
198
+ return token(Token::ERROR, "unexpected char `#{char}`")
199
+ end
200
+ end
201
+ end
202
+
203
+ sig { params(char: String).returns(T::Boolean) }
204
+ def identifier_char?(char)
205
+ char.match?(/[[:alpha:][:digit:]_]/)
206
+ end
207
+
208
+ sig { returns(Token) }
209
+ def scan_identifier
210
+ advance_char while identifier_char?(peek_char)
211
+
212
+ value = token_value
213
+ return token(value.to_sym) if Token::KEYWORDS.include?(value)
214
+
215
+ token(Token::ERROR, "unexpected identifier: `#{value}`")
216
+ end
217
+
218
+ sig { void }
219
+ def consume_digits
220
+ loop do
221
+ p = peek_char
222
+ break if p == '' || !Token::DIGITS.include?(peek_char)
223
+
224
+ _, ok = advance_char
225
+ break unless ok
226
+ end
227
+ end
228
+
229
+ # Checks if the next `n` characters are from the valid set.
230
+ sig { params(valid_chars: String, n: Integer).returns(T::Boolean) }
231
+ def accept_chars(valid_chars, n)
232
+ result = T.let(true, T::Boolean)
233
+ n.times do
234
+ unless match_chars(valid_chars)
235
+ result = false
236
+ break
237
+ end
238
+ end
239
+
240
+ backup_chars(n)
241
+
242
+ result
243
+ end
244
+
245
+ sig { params(init_char: String).returns(Token) }
246
+ def scan_number(init_char)
247
+ if init_char == '0'
248
+ p = peek_char
249
+ if accept_chars(Token::DIGITS, 1)
250
+ consume_digits
251
+ return token(
252
+ Token::ERROR,
253
+ 'illegal trailing zero in number literal',
254
+ )
255
+ end
256
+ end
257
+
258
+ consume_digits
259
+
260
+ if match_char('.')
261
+ p = peek_char
262
+ if p == ''
263
+ return token(
264
+ Token::ERROR,
265
+ 'unexpected EOF',
266
+ )
267
+ end
268
+
269
+ unless Token::DIGITS.include?(p)
270
+ return token(
271
+ Token::ERROR,
272
+ "unexpected char in number literal: `#{p}`",
273
+ )
274
+ end
275
+
276
+ consume_digits
277
+ end
278
+
279
+ if match_char('e') || match_char('E')
280
+ match_char('+') || match_char('-')
281
+ p = peek_char
282
+ if p == ''
283
+ return token(
284
+ Token::ERROR,
285
+ 'unexpected EOF',
286
+ )
287
+ end
288
+ unless Token::DIGITS.include?(p)
289
+ return token(
290
+ Token::ERROR,
291
+ "unexpected char in number literal: `#{p}`",
292
+ )
293
+ end
294
+ consume_digits
295
+ end
296
+
297
+ token_with_consumed_value(Token::NUMBER)
298
+ end
299
+
300
+ sig { void }
301
+ def swallow_rest_of_the_string
302
+ loop do
303
+ # swallow the rest of the string
304
+ ch, more_tokens = advance_char
305
+ break if !more_tokens || ch == '"'
306
+ end
307
+ end
308
+
309
+ sig { returns(Token) }
310
+ def scan_string
311
+ value_buffer = String.new
312
+ loop do
313
+ char, ok = advance_char
314
+ return token(Token::ERROR, 'unterminated string literal') unless ok
315
+ return token(Token::STRING, value_buffer) if char == '"'
316
+
317
+ if char != '\\'
318
+ value_buffer << char
319
+ next
320
+ end
321
+
322
+ char, ok = advance_char
323
+ return token(Token::ERROR, 'unterminated string literal') unless ok
324
+
325
+ case char
326
+ when '"'
327
+ value_buffer << '"'
328
+ when '\\'
329
+ value_buffer << '\\'
330
+ when '/'
331
+ value_buffer << '/'
332
+ when 'b'
333
+ value_buffer << "\b"
334
+ when 'f'
335
+ value_buffer << "\f"
336
+ when 'n'
337
+ value_buffer << "\n"
338
+ when 'r'
339
+ value_buffer << "\r"
340
+ when 't'
341
+ value_buffer << "\t"
342
+ when 'u'
343
+ unless accept_chars(Token::HEX_DIGITS, 4)
344
+ swallow_rest_of_the_string
345
+ return Token.new(Token::ERROR, 'invalid unicode escape')
346
+ end
347
+
348
+ advance_chars(4)
349
+ last4 = T.must @source[@cursor - 4...@cursor]
350
+ value_buffer << [last4.hex].pack('U')
351
+ else
352
+ swallow_rest_of_the_string
353
+ return Token.new(Token::ERROR, "invalid escape `\\#{char}`")
354
+ end
355
+ end
356
+ end
357
+ end
358
+ end
@@ -0,0 +1,205 @@
1
+ # typed: strict
2
+ # frozen_string_literal: true
3
+
4
+ class RubyJsonParser
5
+ # JSON parser
6
+ class Parser
7
+ extend T::Sig
8
+
9
+ class << self
10
+ extend T::Sig
11
+
12
+ sig { params(source: String).returns(Result) }
13
+ def parse(source)
14
+ new(source).parse
15
+ end
16
+
17
+ private :new
18
+ end
19
+
20
+ sig { params(source: String).void }
21
+ def initialize(source)
22
+ # Lexer/Tokenizer that produces tokens
23
+ @lexer = T.let(Lexer.new(source), Lexer)
24
+ # Next token used for predicting productions
25
+ @lookahead = T.let(Token.new(Token::NONE), Token)
26
+ @errors = T.let([], T::Array[String])
27
+ end
28
+
29
+ sig { returns(Result) }
30
+ def parse
31
+ advance # populate @lookahead
32
+ ast = parse_value
33
+ Result.new(ast, @errors)
34
+ end
35
+
36
+ private
37
+
38
+ sig { returns(AST::Node) }
39
+ def parse_value
40
+ case @lookahead.type
41
+ when Token::FALSE
42
+ advance
43
+ AST::FalseLiteralNode.new
44
+ when Token::TRUE
45
+ advance
46
+ AST::TrueLiteralNode.new
47
+ when Token::NULL
48
+ advance
49
+ AST::NullLiteralNode.new
50
+ when Token::NUMBER
51
+ tok = advance
52
+ AST::NumberLiteralNode.new(T.must(tok.value))
53
+ when Token::STRING
54
+ tok = advance
55
+ AST::StringLiteralNode.new(T.must(tok.value))
56
+ when Token::LBRACKET
57
+ parse_array
58
+ when Token::LBRACE
59
+ parse_object
60
+ else
61
+ tok = advance
62
+ add_error("unexpected token `#{tok}`") if tok.type != Token::ERROR
63
+ AST::InvalidNode.new(tok)
64
+ end
65
+ end
66
+
67
+ sig { returns(AST::Node) }
68
+ def parse_object
69
+ advance # swallow `{`
70
+ return AST::ObjectLiteralNode.new([]) if match(Token::RBRACE)
71
+
72
+ pairs = parse_key_value_pairs
73
+ consume(Token::RBRACE)
74
+ AST::ObjectLiteralNode.new(pairs)
75
+ end
76
+
77
+ sig { returns(T::Array[AST::KeyValuePairNode]) }
78
+ def parse_key_value_pairs
79
+ elements = [parse_key_value_pair]
80
+
81
+ loop do
82
+ break if accept(Token::END_OF_FILE, Token::RBRACE)
83
+ break unless match(Token::COMMA)
84
+
85
+ if accept(Token::RBRACE)
86
+ add_error('illegal trailing comma in object literal')
87
+ break
88
+ end
89
+
90
+ elements << parse_key_value_pair
91
+ end
92
+
93
+ elements
94
+ end
95
+
96
+ sig { returns(AST::KeyValuePairNode) }
97
+ def parse_key_value_pair
98
+ key = parse_value
99
+ if accept(Token::COMMA, Token::RBRACE, Token::END_OF_FILE)
100
+ add_error("missing key in object literal for value: `#{key}`")
101
+ return AST::KeyValuePairNode.new(nil, key)
102
+ end
103
+
104
+ add_error("non-string key in object literal: `#{key}`") unless key.is_a?(AST::StringLiteralNode)
105
+ consume(Token::COLON)
106
+ value = parse_value
107
+
108
+ AST::KeyValuePairNode.new(key, value)
109
+ end
110
+
111
+ sig { returns(AST::Node) }
112
+ def parse_array
113
+ advance # swallow `[`
114
+ return AST::ArrayLiteralNode.new([]) if match(Token::RBRACKET)
115
+
116
+ elements = parse_array_elements
117
+ consume(Token::RBRACKET)
118
+ AST::ArrayLiteralNode.new(elements)
119
+ end
120
+
121
+ sig { returns(T::Array[AST::Node]) }
122
+ def parse_array_elements
123
+ elements = [parse_value]
124
+
125
+ loop do
126
+ break if accept(Token::END_OF_FILE, Token::RBRACKET)
127
+ break unless match(Token::COMMA)
128
+
129
+ if accept(Token::RBRACKET)
130
+ add_error('illegal trailing comma in array literal')
131
+ break
132
+ end
133
+
134
+ elements << parse_value
135
+ end
136
+
137
+ elements
138
+ end
139
+
140
+ # Move over to the next token.
141
+ sig { returns(Token) }
142
+ def advance
143
+ previous = @lookahead
144
+ @lookahead = @lexer.next
145
+ handle_error_token(@lookahead) if @lookahead.type == Token::ERROR
146
+
147
+ previous
148
+ end
149
+
150
+ # Add the content of an error token to the syntax error list.
151
+ sig { params(err: Token).void }
152
+ def handle_error_token(err)
153
+ msg = err.value
154
+ return unless msg
155
+
156
+ add_error(msg)
157
+ end
158
+
159
+ # Register a syntax error
160
+ sig { params(err: String).void }
161
+ def add_error(err)
162
+ @errors << err
163
+ end
164
+
165
+ # Checks if the next token matches any of the given types,
166
+ # if so it gets consumed.
167
+ sig { params(token_types: Symbol).returns(T.nilable(Token)) }
168
+ def match(*token_types)
169
+ token_types.each do |type|
170
+ return advance if accept(type)
171
+ end
172
+
173
+ nil
174
+ end
175
+
176
+ # Checks whether the next token matches any the specified types.
177
+ sig { params(token_types: Symbol).returns(T::Boolean) }
178
+ def accept(*token_types)
179
+ token_types.each do |type|
180
+ return true if @lookahead.type == type
181
+ end
182
+
183
+ false
184
+ end
185
+
186
+ sig { params(token_type: Symbol).returns([Token, T::Boolean]) }
187
+ def consume(token_type)
188
+ return advance, false if @lookahead.type == Token::ERROR
189
+
190
+ if @lookahead.type != token_type
191
+ error_expected(Token.type_to_string(token_type))
192
+ return advance, false
193
+ end
194
+
195
+ [advance, true]
196
+ end
197
+
198
+ # Adds an error which tells the user that another type of token
199
+ # was expected.
200
+ sig { params(expected: String).void }
201
+ def error_expected(expected)
202
+ add_error("unexpected `#{@lookahead}`, expected `#{expected}`")
203
+ end
204
+ end
205
+ end
@@ -0,0 +1,43 @@
1
+ # typed: strong
2
+ # frozen_string_literal: true
3
+
4
+ class RubyJsonParser
5
+ # The result of parsing a JSON string/file.
6
+ # Combines an AST (Abstract Syntax Tree) and a list of errors.
7
+ class Result
8
+ extend T::Sig
9
+
10
+ sig { returns(AST::Node) }
11
+ attr_reader :ast
12
+
13
+ sig { returns(T::Array[String]) }
14
+ attr_reader :errors
15
+
16
+ sig { params(ast: AST::Node, errors: T::Array[String]).void }
17
+ def initialize(ast, errors)
18
+ @ast = ast
19
+ @errors = errors
20
+ end
21
+
22
+ sig { returns(T::Boolean) }
23
+ def err?
24
+ @errors.any?
25
+ end
26
+
27
+ sig { returns(String) }
28
+ def inspect
29
+ buff = String.new
30
+ buff << "<RubyJsonParser::Result>\n"
31
+ if @errors.any?
32
+ buff << " !Errors!\n"
33
+ @errors.each do |err|
34
+ buff << " - #{err}\n"
35
+ end
36
+ buff << "\n"
37
+ end
38
+
39
+ buff << " AST:\n"
40
+ buff << @ast.inspect(2)
41
+ end
42
+ end
43
+ end