json_completer 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +38 -21
- data/lib/json_completer/completion_engine.rb +241 -0
- data/lib/json_completer/parser_engine.rb +386 -0
- data/lib/json_completer/scanners.rb +448 -0
- data/lib/json_completer.rb +36 -688
- metadata +5 -2
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class JsonCompleter
|
|
4
|
+
module ParserEngine
|
|
5
|
+
def parse(partial_json)
|
|
6
|
+
input = partial_json
|
|
7
|
+
# The hot path works on raw bytes, not 1-character Ruby strings. JSON punctuation is ASCII,
|
|
8
|
+
# so getbyte/bytesize let us compare cheap integers while multibyte UTF-8 payload stays intact.
|
|
9
|
+
input_length = input.bytesize
|
|
10
|
+
|
|
11
|
+
if @parse_state.nil? ||
|
|
12
|
+
@parse_state.input_length > input_length ||
|
|
13
|
+
(@parse_state.input_length < input_length && reset_parse_state_for_input_growth?(input))
|
|
14
|
+
@parse_state = self.class.new_parse_state
|
|
15
|
+
elsif @parse_state.input_length == input_length
|
|
16
|
+
if @parse_state.input_snapshot == input
|
|
17
|
+
finalize_parse_result
|
|
18
|
+
return @parse_state.root
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
@parse_state = self.class.new_parse_state
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
return nil if input.empty?
|
|
25
|
+
|
|
26
|
+
begin
|
|
27
|
+
prepare_parse_state_for_incremental_input
|
|
28
|
+
|
|
29
|
+
index = @parse_state.last_index
|
|
30
|
+
while index < input_length
|
|
31
|
+
if @parse_state.token_state
|
|
32
|
+
index = continue_parse_token(input, index)
|
|
33
|
+
next
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
byte = input.getbyte(index)
|
|
37
|
+
if top_level_value_complete? && !whitespace_byte?(byte)
|
|
38
|
+
raise ParseError, 'unexpected token after top-level value'
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# ASCII byte values: 9/10/13/32 = whitespace, 34 = ", 44 = ,, 45 = -, 58 = :,
|
|
42
|
+
# 91/93 = [] , 102/110/116 = f/n/t, 123/125 = {}.
|
|
43
|
+
case byte
|
|
44
|
+
when 9, 10, 13, 32
|
|
45
|
+
index += 1
|
|
46
|
+
when 34
|
|
47
|
+
start_parse_string_token
|
|
48
|
+
index += 1
|
|
49
|
+
when 44
|
|
50
|
+
parse_comma!
|
|
51
|
+
index += 1
|
|
52
|
+
when 45, 48..57
|
|
53
|
+
start_parse_number_token(byte)
|
|
54
|
+
index += 1
|
|
55
|
+
when 58
|
|
56
|
+
parse_colon!
|
|
57
|
+
index += 1
|
|
58
|
+
when 91
|
|
59
|
+
start_parse_container([])
|
|
60
|
+
index += 1
|
|
61
|
+
when 93
|
|
62
|
+
close_parse_array!
|
|
63
|
+
index += 1
|
|
64
|
+
when 102, 110, 116
|
|
65
|
+
start_parse_keyword_token(byte)
|
|
66
|
+
index += 1
|
|
67
|
+
when 123
|
|
68
|
+
start_parse_container({})
|
|
69
|
+
index += 1
|
|
70
|
+
when 125
|
|
71
|
+
close_parse_object!
|
|
72
|
+
index += 1
|
|
73
|
+
else
|
|
74
|
+
raise ParseError, "unexpected token #{input.byteslice(index, 1).inspect}"
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
@parse_state.last_index = index
|
|
79
|
+
@parse_state.input_length = input_length
|
|
80
|
+
@parse_state.input_snapshot = input
|
|
81
|
+
finalize_parse_result
|
|
82
|
+
@parse_state.root
|
|
83
|
+
rescue ParseError
|
|
84
|
+
@parse_state = self.class.new_parse_state
|
|
85
|
+
raise
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
private
|
|
90
|
+
|
|
91
|
+
def prepare_parse_state_for_incremental_input
|
|
92
|
+
token = @parse_state.token_state
|
|
93
|
+
return unless token.is_a?(Scanners::ParsedStringToken) && token.role == :key && token.visible_key
|
|
94
|
+
|
|
95
|
+
restore_visible_key_placeholder(token)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def continue_parse_token(input, index)
|
|
99
|
+
token = @parse_state.token_state
|
|
100
|
+
|
|
101
|
+
case token
|
|
102
|
+
when Scanners::ParsedStringToken
|
|
103
|
+
continue_parse_string_token(input, index)
|
|
104
|
+
when Scanners::NumberToken
|
|
105
|
+
continue_parse_number_token(input, index)
|
|
106
|
+
when Scanners::KeywordToken
|
|
107
|
+
continue_parse_keyword_token(input, index)
|
|
108
|
+
else
|
|
109
|
+
raise ParseError, "unsupported token state: #{token.class}"
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def start_parse_container(container)
|
|
114
|
+
slot = parse_value_slot!
|
|
115
|
+
assign_parse_slot(slot, container)
|
|
116
|
+
transition_after_parse_value(slot)
|
|
117
|
+
|
|
118
|
+
@parse_state.context_stack << if container.is_a?(Hash)
|
|
119
|
+
ObjectContext.new(container: container)
|
|
120
|
+
else
|
|
121
|
+
ArrayContext.new(container: container)
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def close_parse_object!
|
|
126
|
+
context = @parse_state.context_stack.last
|
|
127
|
+
raise ParseError, 'unexpected object close' unless context.is_a?(ObjectContext)
|
|
128
|
+
raise ParseError, 'cannot close object while a key is incomplete' if context.mode == :key_in_progress
|
|
129
|
+
raise ParseError, 'cannot close object before a colon' if context.mode == :after_key
|
|
130
|
+
raise ParseError, 'cannot close object while a value is missing' if context.mode == :value
|
|
131
|
+
|
|
132
|
+
@parse_state.context_stack.pop
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def close_parse_array!
|
|
136
|
+
context = @parse_state.context_stack.last
|
|
137
|
+
raise ParseError, 'unexpected array close' unless context.is_a?(ArrayContext)
|
|
138
|
+
raise ParseError, 'cannot close array while a value is missing' if context.provisional_index
|
|
139
|
+
|
|
140
|
+
@parse_state.context_stack.pop
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def start_parse_string_token
|
|
144
|
+
context = @parse_state.context_stack.last
|
|
145
|
+
|
|
146
|
+
if context.is_a?(ObjectContext) && context.mode == :key_or_end
|
|
147
|
+
context.mode = :key_in_progress
|
|
148
|
+
@parse_state.token_state = Scanners::ParsedStringToken.new(role: :key, context: context)
|
|
149
|
+
return
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
slot = parse_value_slot!
|
|
153
|
+
token = Scanners::ParsedStringToken.new(role: :value, slot: slot)
|
|
154
|
+
assign_parse_slot(slot, token.buffer)
|
|
155
|
+
transition_after_parse_value(slot)
|
|
156
|
+
@parse_state.token_state = token
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def continue_parse_string_token(input, index)
|
|
160
|
+
token = @parse_state.token_state
|
|
161
|
+
index, status = Scanners.scan_string(input, index, token)
|
|
162
|
+
raise ParseError, 'invalid string escape sequence' if status == :invalid_escape
|
|
163
|
+
raise ParseError, 'invalid unicode escape sequence' if status == :invalid_unicode
|
|
164
|
+
raise ParseError, 'invalid control character in string literal' if status == :invalid_control_character
|
|
165
|
+
|
|
166
|
+
finish_parse_string_token! if status == :terminated
|
|
167
|
+
index
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def finish_parse_string_token!
|
|
171
|
+
token = @parse_state.token_state
|
|
172
|
+
return unless token
|
|
173
|
+
|
|
174
|
+
if token.role == :key
|
|
175
|
+
token.context.current_key = token.buffer.dup
|
|
176
|
+
token.context.mode = :after_key
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
@parse_state.token_state = nil
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def start_parse_number_token(first_byte)
|
|
183
|
+
slot = parse_value_slot!
|
|
184
|
+
token = Scanners::NumberToken.new(slot: slot)
|
|
185
|
+
token.append_byte(first_byte)
|
|
186
|
+
assign_parse_slot(slot, token.parsed_value)
|
|
187
|
+
transition_after_parse_value(slot)
|
|
188
|
+
@parse_state.token_state = token
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def continue_parse_number_token(input, index)
|
|
192
|
+
token = @parse_state.token_state
|
|
193
|
+
length = input.bytesize
|
|
194
|
+
|
|
195
|
+
while index < length && token.append_byte(input.getbyte(index))
|
|
196
|
+
assign_parse_slot(token.slot, token.parsed_value)
|
|
197
|
+
index += 1
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
raise ParseError, 'invalid number literal' if token.invalid?
|
|
201
|
+
|
|
202
|
+
@parse_state.token_state = nil if index < length
|
|
203
|
+
index
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def start_parse_keyword_token(first_byte)
|
|
207
|
+
slot = parse_value_slot!
|
|
208
|
+
token = Scanners::KeywordToken.new(slot: slot, target: keyword_target_for_byte(first_byte), matched: 1)
|
|
209
|
+
assign_parse_slot(slot, token.parsed_value)
|
|
210
|
+
transition_after_parse_value(slot)
|
|
211
|
+
@parse_state.token_state = token
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def continue_parse_keyword_token(input, index)
|
|
215
|
+
token = @parse_state.token_state
|
|
216
|
+
length = input.bytesize
|
|
217
|
+
|
|
218
|
+
while index < length && token.matched < token.target.length && token.append_byte(input.getbyte(index))
|
|
219
|
+
index += 1
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
raise ParseError, 'invalid keyword literal' if token.matched < token.target.length && index < length
|
|
223
|
+
|
|
224
|
+
@parse_state.token_state = nil if index < length || token.matched == token.target.length
|
|
225
|
+
index
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def parse_colon!
|
|
229
|
+
context = @parse_state.context_stack.last
|
|
230
|
+
raise ParseError, 'unexpected colon' unless context.is_a?(ObjectContext) && context.mode == :after_key
|
|
231
|
+
|
|
232
|
+
context.mode = :value
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
def parse_comma!
|
|
236
|
+
context = @parse_state.context_stack.last
|
|
237
|
+
raise ParseError, 'unexpected comma' unless context
|
|
238
|
+
|
|
239
|
+
case context
|
|
240
|
+
when ArrayContext
|
|
241
|
+
raise ParseError, 'cannot add a comma while an array value is missing' unless context.mode == :after_value
|
|
242
|
+
|
|
243
|
+
context.mode = :value_or_end
|
|
244
|
+
context.provisional_index = context.container.length
|
|
245
|
+
when ObjectContext
|
|
246
|
+
raise ParseError, 'cannot add a comma while an object entry is incomplete' unless context.mode == :after_value
|
|
247
|
+
|
|
248
|
+
context.mode = :key_or_end
|
|
249
|
+
context.current_key = nil
|
|
250
|
+
end
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
def parse_value_slot!
|
|
254
|
+
context = @parse_state.context_stack.last
|
|
255
|
+
|
|
256
|
+
unless context
|
|
257
|
+
raise ParseError, 'unexpected token after top-level value' if @parse_state.root_assigned
|
|
258
|
+
|
|
259
|
+
return ParseSlot.new(root: true)
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
case context
|
|
263
|
+
when ArrayContext
|
|
264
|
+
raise ParseError, 'expected comma before next array value' if context.mode == :after_value
|
|
265
|
+
raise ParseError, 'cannot parse array value here' unless context.mode == :value_or_end
|
|
266
|
+
|
|
267
|
+
index = context.provisional_index || context.container.length
|
|
268
|
+
context.provisional_index = nil
|
|
269
|
+
ParseSlot.new(container: context.container, key: index, root: false)
|
|
270
|
+
when ObjectContext
|
|
271
|
+
raise ParseError, 'expected colon before object value' if context.mode == :after_key
|
|
272
|
+
raise ParseError, 'expected comma before next object entry' if context.mode == :after_value
|
|
273
|
+
raise ParseError, 'expected object key' unless context.mode == :value
|
|
274
|
+
|
|
275
|
+
ParseSlot.new(container: context.container, key: context.current_key, root: false)
|
|
276
|
+
end
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
def top_level_value_complete?
|
|
280
|
+
@parse_state.root_assigned &&
|
|
281
|
+
@parse_state.context_stack.empty? &&
|
|
282
|
+
@parse_state.token_state.nil?
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
def assign_parse_slot(slot, value)
|
|
286
|
+
if slot.root
|
|
287
|
+
@parse_state.root = value
|
|
288
|
+
@parse_state.root_assigned = true
|
|
289
|
+
else
|
|
290
|
+
slot.container[slot.key] = value
|
|
291
|
+
end
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
def transition_after_parse_value(slot)
|
|
295
|
+
context = @parse_state.context_stack.last
|
|
296
|
+
|
|
297
|
+
case context
|
|
298
|
+
when ArrayContext
|
|
299
|
+
context.mode = :after_value
|
|
300
|
+
when ObjectContext
|
|
301
|
+
context.mode = :after_value if slot.root || !context.current_key.nil?
|
|
302
|
+
end
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
def finalize_parse_result
|
|
306
|
+
token = @parse_state.token_state
|
|
307
|
+
|
|
308
|
+
if token.is_a?(Scanners::ParsedStringToken) && token.role == :key
|
|
309
|
+
update_visible_key_placeholder(token)
|
|
310
|
+
return
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
@parse_state.context_stack.each do |context|
|
|
314
|
+
case context
|
|
315
|
+
when ObjectContext
|
|
316
|
+
next unless %i[after_key value].include?(context.mode) && context.current_key
|
|
317
|
+
|
|
318
|
+
context.container[context.current_key] = nil
|
|
319
|
+
when ArrayContext
|
|
320
|
+
next unless context.provisional_index
|
|
321
|
+
|
|
322
|
+
context.container[context.provisional_index] = nil
|
|
323
|
+
end
|
|
324
|
+
end
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
def restore_visible_key_placeholder(token)
|
|
328
|
+
if token.visible_key_replaced_present
|
|
329
|
+
token.context.container[token.visible_key] = token.visible_key_replaced_value
|
|
330
|
+
else
|
|
331
|
+
token.context.container.delete(token.visible_key)
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
token.visible_key = nil
|
|
335
|
+
token.visible_key_replaced_value = nil
|
|
336
|
+
token.visible_key_replaced_present = false
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
def update_visible_key_placeholder(token)
|
|
340
|
+
current_key = token.buffer.dup
|
|
341
|
+
return if token.visible_key == current_key
|
|
342
|
+
|
|
343
|
+
restore_visible_key_placeholder(token) if token.visible_key
|
|
344
|
+
|
|
345
|
+
token.visible_key = current_key
|
|
346
|
+
token.visible_key_replaced_present = token.context.container.key?(current_key)
|
|
347
|
+
token.visible_key_replaced_value = token.context.container[current_key]
|
|
348
|
+
token.context.container[current_key] = nil
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
def reset_parse_state_for_input_growth?(input)
|
|
352
|
+
return false unless @parse_state.input_snapshot
|
|
353
|
+
return false unless prefix_validation_required?
|
|
354
|
+
|
|
355
|
+
!input.start_with?(@parse_state.input_snapshot)
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
def prefix_validation_required?
|
|
359
|
+
@parse_state.context_stack.empty?
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
def keyword_target_for_byte(byte)
|
|
363
|
+
case byte
|
|
364
|
+
when 102
|
|
365
|
+
'false'
|
|
366
|
+
when 110
|
|
367
|
+
'null'
|
|
368
|
+
when 116
|
|
369
|
+
'true'
|
|
370
|
+
else
|
|
371
|
+
raise ParseError, "unexpected keyword token byte: #{byte}"
|
|
372
|
+
end
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
def whitespace_byte?(byte)
|
|
376
|
+
case byte
|
|
377
|
+
when 9, 10, 13, 32
|
|
378
|
+
true
|
|
379
|
+
else
|
|
380
|
+
false
|
|
381
|
+
end
|
|
382
|
+
end
|
|
383
|
+
end
|
|
384
|
+
|
|
385
|
+
include ParserEngine
|
|
386
|
+
end
|