ruby_json_parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +7 -0
  2. data/.rubocop.yml +35 -0
  3. data/CHANGELOG.md +5 -0
  4. data/LICENSE +21 -0
  5. data/README.md +143 -0
  6. data/Rakefile +12 -0
  7. data/lib/ruby_json_parser/ast.rb +312 -0
  8. data/lib/ruby_json_parser/evaluator.rb +81 -0
  9. data/lib/ruby_json_parser/lexer.rb +358 -0
  10. data/lib/ruby_json_parser/parser.rb +205 -0
  11. data/lib/ruby_json_parser/result.rb +43 -0
  12. data/lib/ruby_json_parser/token.rb +171 -0
  13. data/lib/ruby_json_parser/version.rb +6 -0
  14. data/lib/ruby_json_parser.rb +77 -0
  15. data/sorbet/config +4 -0
  16. data/sorbet/rbi/annotations/.gitattributes +1 -0
  17. data/sorbet/rbi/annotations/minitest.rbi +119 -0
  18. data/sorbet/rbi/annotations/rainbow.rbi +269 -0
  19. data/sorbet/rbi/gems/.gitattributes +1 -0
  20. data/sorbet/rbi/gems/ast@2.4.2.rbi +585 -0
  21. data/sorbet/rbi/gems/erubi@1.13.0.rbi +150 -0
  22. data/sorbet/rbi/gems/json@2.7.2.rbi +1562 -0
  23. data/sorbet/rbi/gems/language_server-protocol@3.17.0.3.rbi +14238 -0
  24. data/sorbet/rbi/gems/minitest@5.24.1.rbi +1563 -0
  25. data/sorbet/rbi/gems/netrc@0.11.0.rbi +159 -0
  26. data/sorbet/rbi/gems/parallel@1.25.1.rbi +287 -0
  27. data/sorbet/rbi/gems/parser@3.3.4.0.rbi +5519 -0
  28. data/sorbet/rbi/gems/prism@0.30.0.rbi +39212 -0
  29. data/sorbet/rbi/gems/racc@1.8.0.rbi +162 -0
  30. data/sorbet/rbi/gems/rainbow@3.1.1.rbi +403 -0
  31. data/sorbet/rbi/gems/rake@13.2.1.rbi +3028 -0
  32. data/sorbet/rbi/gems/rbi@0.1.13.rbi +3078 -0
  33. data/sorbet/rbi/gems/regexp_parser@2.9.2.rbi +3772 -0
  34. data/sorbet/rbi/gems/rexml@3.3.1.rbi +4813 -0
  35. data/sorbet/rbi/gems/rubocop-ast@1.31.3.rbi +7015 -0
  36. data/sorbet/rbi/gems/rubocop@1.65.0.rbi +58191 -0
  37. data/sorbet/rbi/gems/ruby-progressbar@1.13.0.rbi +1318 -0
  38. data/sorbet/rbi/gems/spoom@1.3.3.rbi +4926 -0
  39. data/sorbet/rbi/gems/strscan@3.1.0.rbi +9 -0
  40. data/sorbet/rbi/gems/tapioca@0.15.1.rbi +3566 -0
  41. data/sorbet/rbi/gems/thor@1.3.1.rbi +4352 -0
  42. data/sorbet/rbi/gems/unicode-display_width@2.5.0.rbi +66 -0
  43. data/sorbet/rbi/gems/yard-sorbet@0.9.0.rbi +435 -0
  44. data/sorbet/rbi/gems/yard@0.9.36.rbi +18221 -0
  45. data/sorbet/tapioca/config.yml +13 -0
  46. data/sorbet/tapioca/require.rb +4 -0
  47. metadata +105 -0
@@ -0,0 +1,358 @@
1
+ # typed: strict
2
+ # frozen_string_literal: true
3
+
4
+ require_relative 'token'
5
+
6
+ class RubyJsonParser
7
+ # A lexical analyzer (tokenizer) for JSON
8
+ class Lexer
9
+ extend T::Sig
10
+ extend T::Generic
11
+ include Enumerable
12
+
13
+ # Type parameter for `Enumerable`
14
+ # Declares the type that the lexer returns for tokens
15
+ Elem = type_member { { fixed: Token } }
16
+
17
+ class << self
18
+ extend T::Sig
19
+
20
+ sig { params(source: String).returns(T::Array[Token]) }
21
+ def lex(source)
22
+ new(source).to_a
23
+ end
24
+ end
25
+
26
+ sig { params(source: String).void }
27
+ def initialize(source)
28
+ @source = source
29
+
30
+ # offset of the first character of the current lexeme
31
+ @start_cursor = T.let(0, Integer)
32
+ # offset of the next character
33
+ @cursor = T.let(0, Integer)
34
+ end
35
+
36
+ sig { returns(Token) }
37
+ def next
38
+ return Token.new(Token::END_OF_FILE) unless more_tokens?
39
+
40
+ scan_token
41
+ end
42
+
43
+ sig { override.params(block: T.nilable(T.proc.params(arg0: Token).void)).returns(T.untyped) }
44
+ def each(&block)
45
+ return enum_for(T.must(__method__)) unless block
46
+
47
+ loop do
48
+ tok = self.next
49
+ break if tok.type == Token::END_OF_FILE
50
+
51
+ block.call(tok)
52
+ end
53
+
54
+ self
55
+ end
56
+
57
+ private
58
+
59
+ sig { returns(T::Boolean) }
60
+ def more_tokens?
61
+ @cursor < @source.length
62
+ end
63
+
64
+ sig { params(type: Symbol).returns(Token) }
65
+ def token_with_consumed_value(type)
66
+ token(type, token_value)
67
+ end
68
+
69
+ sig { params(type: Symbol, value: T.nilable(String)).returns(Token) }
70
+ def token(type, value = nil)
71
+ @start_cursor = @cursor
72
+ Token.new(type, value)
73
+ end
74
+
75
+ # Returns the current token value.
76
+ sig { returns(String) }
77
+ def token_value
78
+ T.must @source[@start_cursor...@cursor]
79
+ end
80
+
81
+ sig { returns([String, T::Boolean]) }
82
+ def advance_char
83
+ return '', false unless more_tokens?
84
+
85
+ char = next_char
86
+
87
+ @cursor += 1
88
+ [char, true]
89
+ end
90
+
91
+ sig { returns(String) }
92
+ def next_char
93
+ T.must @source[@cursor]
94
+ end
95
+
96
+ # Gets the next UTF-8 encoded character
97
+ # without incrementing the cursor.
98
+ sig { returns(String) }
99
+ def peek_char
100
+ return '' unless more_tokens?
101
+
102
+ char, = next_char
103
+ char
104
+ end
105
+
106
+ # Advance the next `n` characters
107
+ sig { params(n: Integer).returns(T::Boolean) }
108
+ def advance_chars(n)
109
+ n.times do
110
+ _, ok = advance_char
111
+ return false unless ok
112
+ end
113
+
114
+ true
115
+ end
116
+
117
+ # Checks if the given character matches
118
+ # the next UTF-8 encoded character in source code.
119
+ # If they match, the cursor gets incremented.
120
+ sig { params(char: String).returns(T::Boolean) }
121
+ def match_char(char)
122
+ return false unless more_tokens?
123
+
124
+ if peek_char == char
125
+ advance_char
126
+ return true
127
+ end
128
+
129
+ false
130
+ end
131
+
132
+ # Consumes the next character if it's from the valid set.
133
+ sig { params(valid_chars: String).returns(T::Boolean) }
134
+ def match_chars(valid_chars)
135
+ return false unless more_tokens?
136
+
137
+ p = peek_char
138
+ if p != '' && valid_chars.include?(p)
139
+ advance_char
140
+ return true
141
+ end
142
+
143
+ false
144
+ end
145
+
146
+ # Rewinds the cursor back n chars.
147
+ sig { params(n: Integer).void }
148
+ def backup_chars(n)
149
+ @cursor -= n
150
+ end
151
+
152
+ # Skips the current accumulated token.
153
+ sig { void }
154
+ def skip_token
155
+ @start_cursor = @cursor
156
+ end
157
+
158
+ sig { returns(Token) }
159
+ def scan_token
160
+ loop do
161
+ char, ok = advance_char
162
+ return token(Token::END_OF_FILE) unless ok
163
+
164
+ case char
165
+ when '['
166
+ return token(Token::LBRACKET)
167
+ when ']'
168
+ return token(Token::RBRACKET)
169
+ when '{'
170
+ return token(Token::LBRACE)
171
+ when '}'
172
+ return token(Token::RBRACE)
173
+ when ','
174
+ return token(Token::COMMA)
175
+ when ':'
176
+ return token(Token::COLON)
177
+ when '.'
178
+ return token(Token::DOT)
179
+ when '"'
180
+ return scan_string
181
+ when '-'
182
+ p = peek_char
183
+ return token(Token::ERROR, 'unexpected EOF') if p == ''
184
+ return token(Token::ERROR, "unexpected number char: `#{p}`") unless Token::DIGITS.include?(p)
185
+
186
+ char, = advance_char
187
+ return scan_number(char)
188
+ when ' ', "\n", "\r", "\t"
189
+ skip_token
190
+ next
191
+ else
192
+ if char.match?(/[[:alpha:]]/)
193
+ return scan_identifier
194
+ elsif char.match?(/\d/)
195
+ return scan_number(char)
196
+ end
197
+
198
+ return token(Token::ERROR, "unexpected char `#{char}`")
199
+ end
200
+ end
201
+ end
202
+
203
+ sig { params(char: String).returns(T::Boolean) }
204
+ def identifier_char?(char)
205
+ char.match?(/[[:alpha:][:digit:]_]/)
206
+ end
207
+
208
+ sig { returns(Token) }
209
+ def scan_identifier
210
+ advance_char while identifier_char?(peek_char)
211
+
212
+ value = token_value
213
+ return token(value.to_sym) if Token::KEYWORDS.include?(value)
214
+
215
+ token(Token::ERROR, "unexpected identifier: `#{value}`")
216
+ end
217
+
218
+ sig { void }
219
+ def consume_digits
220
+ loop do
221
+ p = peek_char
222
+ break if p == '' || !Token::DIGITS.include?(peek_char)
223
+
224
+ _, ok = advance_char
225
+ break unless ok
226
+ end
227
+ end
228
+
229
+ # Checks if the next `n` characters are from the valid set.
230
+ sig { params(valid_chars: String, n: Integer).returns(T::Boolean) }
231
+ def accept_chars(valid_chars, n)
232
+ result = T.let(true, T::Boolean)
233
+ n.times do
234
+ unless match_chars(valid_chars)
235
+ result = false
236
+ break
237
+ end
238
+ end
239
+
240
+ backup_chars(n)
241
+
242
+ result
243
+ end
244
+
245
+ sig { params(init_char: String).returns(Token) }
246
+ def scan_number(init_char)
247
+ if init_char == '0'
248
+ p = peek_char
249
+ if accept_chars(Token::DIGITS, 1)
250
+ consume_digits
251
+ return token(
252
+ Token::ERROR,
253
+ 'illegal trailing zero in number literal',
254
+ )
255
+ end
256
+ end
257
+
258
+ consume_digits
259
+
260
+ if match_char('.')
261
+ p = peek_char
262
+ if p == ''
263
+ return token(
264
+ Token::ERROR,
265
+ 'unexpected EOF',
266
+ )
267
+ end
268
+
269
+ unless Token::DIGITS.include?(p)
270
+ return token(
271
+ Token::ERROR,
272
+ "unexpected char in number literal: `#{p}`",
273
+ )
274
+ end
275
+
276
+ consume_digits
277
+ end
278
+
279
+ if match_char('e') || match_char('E')
280
+ match_char('+') || match_char('-')
281
+ p = peek_char
282
+ if p == ''
283
+ return token(
284
+ Token::ERROR,
285
+ 'unexpected EOF',
286
+ )
287
+ end
288
+ unless Token::DIGITS.include?(p)
289
+ return token(
290
+ Token::ERROR,
291
+ "unexpected char in number literal: `#{p}`",
292
+ )
293
+ end
294
+ consume_digits
295
+ end
296
+
297
+ token_with_consumed_value(Token::NUMBER)
298
+ end
299
+
300
+ sig { void }
301
+ def swallow_rest_of_the_string
302
+ loop do
303
+ # swallow the rest of the string
304
+ ch, more_tokens = advance_char
305
+ break if !more_tokens || ch == '"'
306
+ end
307
+ end
308
+
309
+ sig { returns(Token) }
310
+ def scan_string
311
+ value_buffer = String.new
312
+ loop do
313
+ char, ok = advance_char
314
+ return token(Token::ERROR, 'unterminated string literal') unless ok
315
+ return token(Token::STRING, value_buffer) if char == '"'
316
+
317
+ if char != '\\'
318
+ value_buffer << char
319
+ next
320
+ end
321
+
322
+ char, ok = advance_char
323
+ return token(Token::ERROR, 'unterminated string literal') unless ok
324
+
325
+ case char
326
+ when '"'
327
+ value_buffer << '"'
328
+ when '\\'
329
+ value_buffer << '\\'
330
+ when '/'
331
+ value_buffer << '/'
332
+ when 'b'
333
+ value_buffer << "\b"
334
+ when 'f'
335
+ value_buffer << "\f"
336
+ when 'n'
337
+ value_buffer << "\n"
338
+ when 'r'
339
+ value_buffer << "\r"
340
+ when 't'
341
+ value_buffer << "\t"
342
+ when 'u'
343
+ unless accept_chars(Token::HEX_DIGITS, 4)
344
+ swallow_rest_of_the_string
345
+ return Token.new(Token::ERROR, 'invalid unicode escape')
346
+ end
347
+
348
+ advance_chars(4)
349
+ last4 = T.must @source[@cursor - 4...@cursor]
350
+ value_buffer << [last4.hex].pack('U')
351
+ else
352
+ swallow_rest_of_the_string
353
+ return Token.new(Token::ERROR, "invalid escape `\\#{char}`")
354
+ end
355
+ end
356
+ end
357
+ end
358
+ end
@@ -0,0 +1,205 @@
1
+ # typed: strict
2
+ # frozen_string_literal: true
3
+
4
+ class RubyJsonParser
5
+ # JSON parser
6
+ class Parser
7
+ extend T::Sig
8
+
9
+ class << self
10
+ extend T::Sig
11
+
12
+ sig { params(source: String).returns(Result) }
13
+ def parse(source)
14
+ new(source).parse
15
+ end
16
+
17
+ private :new
18
+ end
19
+
20
+ sig { params(source: String).void }
21
+ def initialize(source)
22
+ # Lexer/Tokenizer that produces tokens
23
+ @lexer = T.let(Lexer.new(source), Lexer)
24
+ # Next token used for predicting productions
25
+ @lookahead = T.let(Token.new(Token::NONE), Token)
26
+ @errors = T.let([], T::Array[String])
27
+ end
28
+
29
+ sig { returns(Result) }
30
+ def parse
31
+ advance # populate @lookahead
32
+ ast = parse_value
33
+ Result.new(ast, @errors)
34
+ end
35
+
36
+ private
37
+
38
+ sig { returns(AST::Node) }
39
+ def parse_value
40
+ case @lookahead.type
41
+ when Token::FALSE
42
+ advance
43
+ AST::FalseLiteralNode.new
44
+ when Token::TRUE
45
+ advance
46
+ AST::TrueLiteralNode.new
47
+ when Token::NULL
48
+ advance
49
+ AST::NullLiteralNode.new
50
+ when Token::NUMBER
51
+ tok = advance
52
+ AST::NumberLiteralNode.new(T.must(tok.value))
53
+ when Token::STRING
54
+ tok = advance
55
+ AST::StringLiteralNode.new(T.must(tok.value))
56
+ when Token::LBRACKET
57
+ parse_array
58
+ when Token::LBRACE
59
+ parse_object
60
+ else
61
+ tok = advance
62
+ add_error("unexpected token `#{tok}`") if tok.type != Token::ERROR
63
+ AST::InvalidNode.new(tok)
64
+ end
65
+ end
66
+
67
+ sig { returns(AST::Node) }
68
+ def parse_object
69
+ advance # swallow `{`
70
+ return AST::ObjectLiteralNode.new([]) if match(Token::RBRACE)
71
+
72
+ pairs = parse_key_value_pairs
73
+ consume(Token::RBRACE)
74
+ AST::ObjectLiteralNode.new(pairs)
75
+ end
76
+
77
+ sig { returns(T::Array[AST::KeyValuePairNode]) }
78
+ def parse_key_value_pairs
79
+ elements = [parse_key_value_pair]
80
+
81
+ loop do
82
+ break if accept(Token::END_OF_FILE, Token::RBRACE)
83
+ break unless match(Token::COMMA)
84
+
85
+ if accept(Token::RBRACE)
86
+ add_error('illegal trailing comma in object literal')
87
+ break
88
+ end
89
+
90
+ elements << parse_key_value_pair
91
+ end
92
+
93
+ elements
94
+ end
95
+
96
+ sig { returns(AST::KeyValuePairNode) }
97
+ def parse_key_value_pair
98
+ key = parse_value
99
+ if accept(Token::COMMA, Token::RBRACE, Token::END_OF_FILE)
100
+ add_error("missing key in object literal for value: `#{key}`")
101
+ return AST::KeyValuePairNode.new(nil, key)
102
+ end
103
+
104
+ add_error("non-string key in object literal: `#{key}`") unless key.is_a?(AST::StringLiteralNode)
105
+ consume(Token::COLON)
106
+ value = parse_value
107
+
108
+ AST::KeyValuePairNode.new(key, value)
109
+ end
110
+
111
+ sig { returns(AST::Node) }
112
+ def parse_array
113
+ advance # swallow `[`
114
+ return AST::ArrayLiteralNode.new([]) if match(Token::RBRACKET)
115
+
116
+ elements = parse_array_elements
117
+ consume(Token::RBRACKET)
118
+ AST::ArrayLiteralNode.new(elements)
119
+ end
120
+
121
+ sig { returns(T::Array[AST::Node]) }
122
+ def parse_array_elements
123
+ elements = [parse_value]
124
+
125
+ loop do
126
+ break if accept(Token::END_OF_FILE, Token::RBRACKET)
127
+ break unless match(Token::COMMA)
128
+
129
+ if accept(Token::RBRACKET)
130
+ add_error('illegal trailing comma in array literal')
131
+ break
132
+ end
133
+
134
+ elements << parse_value
135
+ end
136
+
137
+ elements
138
+ end
139
+
140
+ # Move over to the next token.
141
+ sig { returns(Token) }
142
+ def advance
143
+ previous = @lookahead
144
+ @lookahead = @lexer.next
145
+ handle_error_token(@lookahead) if @lookahead.type == Token::ERROR
146
+
147
+ previous
148
+ end
149
+
150
+ # Add the content of an error token to the syntax error list.
151
+ sig { params(err: Token).void }
152
+ def handle_error_token(err)
153
+ msg = err.value
154
+ return unless msg
155
+
156
+ add_error(msg)
157
+ end
158
+
159
+ # Register a syntax error
160
+ sig { params(err: String).void }
161
+ def add_error(err)
162
+ @errors << err
163
+ end
164
+
165
+ # Checks if the next token matches any of the given types,
166
+ # if so it gets consumed.
167
+ sig { params(token_types: Symbol).returns(T.nilable(Token)) }
168
+ def match(*token_types)
169
+ token_types.each do |type|
170
+ return advance if accept(type)
171
+ end
172
+
173
+ nil
174
+ end
175
+
176
+ # Checks whether the next token matches any the specified types.
177
+ sig { params(token_types: Symbol).returns(T::Boolean) }
178
+ def accept(*token_types)
179
+ token_types.each do |type|
180
+ return true if @lookahead.type == type
181
+ end
182
+
183
+ false
184
+ end
185
+
186
+ sig { params(token_type: Symbol).returns([Token, T::Boolean]) }
187
+ def consume(token_type)
188
+ return advance, false if @lookahead.type == Token::ERROR
189
+
190
+ if @lookahead.type != token_type
191
+ error_expected(Token.type_to_string(token_type))
192
+ return advance, false
193
+ end
194
+
195
+ [advance, true]
196
+ end
197
+
198
+ # Adds an error which tells the user that another type of token
199
+ # was expected.
200
+ sig { params(expected: String).void }
201
+ def error_expected(expected)
202
+ add_error("unexpected `#{@lookahead}`, expected `#{expected}`")
203
+ end
204
+ end
205
+ end
@@ -0,0 +1,43 @@
1
+ # typed: strong
2
+ # frozen_string_literal: true
3
+
4
+ class RubyJsonParser
5
+ # The result of parsing a JSON string/file.
6
+ # Combines an AST (Abstract Syntax Tree) and a list of errors.
7
+ class Result
8
+ extend T::Sig
9
+
10
+ sig { returns(AST::Node) }
11
+ attr_reader :ast
12
+
13
+ sig { returns(T::Array[String]) }
14
+ attr_reader :errors
15
+
16
+ sig { params(ast: AST::Node, errors: T::Array[String]).void }
17
+ def initialize(ast, errors)
18
+ @ast = ast
19
+ @errors = errors
20
+ end
21
+
22
+ sig { returns(T::Boolean) }
23
+ def err?
24
+ @errors.any?
25
+ end
26
+
27
+ sig { returns(String) }
28
+ def inspect
29
+ buff = String.new
30
+ buff << "<RubyJsonParser::Result>\n"
31
+ if @errors.any?
32
+ buff << " !Errors!\n"
33
+ @errors.each do |err|
34
+ buff << " - #{err}\n"
35
+ end
36
+ buff << "\n"
37
+ end
38
+
39
+ buff << " AST:\n"
40
+ buff << @ast.inspect(2)
41
+ end
42
+ end
43
+ end