json_completer 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,386 @@
1
+ # frozen_string_literal: true
2
+
3
+ class JsonCompleter
4
+ module ParserEngine
5
+ def parse(partial_json)
6
+ input = partial_json
7
+ # The hot path works on raw bytes, not 1-character Ruby strings. JSON punctuation is ASCII,
8
+ # so getbyte/bytesize let us compare cheap integers while multibyte UTF-8 payload stays intact.
9
+ input_length = input.bytesize
10
+
11
+ if @parse_state.nil? ||
12
+ @parse_state.input_length > input_length ||
13
+ (@parse_state.input_length < input_length && reset_parse_state_for_input_growth?(input))
14
+ @parse_state = self.class.new_parse_state
15
+ elsif @parse_state.input_length == input_length
16
+ if @parse_state.input_snapshot == input
17
+ finalize_parse_result
18
+ return @parse_state.root
19
+ end
20
+
21
+ @parse_state = self.class.new_parse_state
22
+ end
23
+
24
+ return nil if input.empty?
25
+
26
+ begin
27
+ prepare_parse_state_for_incremental_input
28
+
29
+ index = @parse_state.last_index
30
+ while index < input_length
31
+ if @parse_state.token_state
32
+ index = continue_parse_token(input, index)
33
+ next
34
+ end
35
+
36
+ byte = input.getbyte(index)
37
+ if top_level_value_complete? && !whitespace_byte?(byte)
38
+ raise ParseError, 'unexpected token after top-level value'
39
+ end
40
+
41
+ # ASCII byte values: 9/10/13/32 = whitespace, 34 = ", 44 = ,, 45 = -, 58 = :,
42
+ # 91/93 = [] , 102/110/116 = f/n/t, 123/125 = {}.
43
+ case byte
44
+ when 9, 10, 13, 32
45
+ index += 1
46
+ when 34
47
+ start_parse_string_token
48
+ index += 1
49
+ when 44
50
+ parse_comma!
51
+ index += 1
52
+ when 45, 48..57
53
+ start_parse_number_token(byte)
54
+ index += 1
55
+ when 58
56
+ parse_colon!
57
+ index += 1
58
+ when 91
59
+ start_parse_container([])
60
+ index += 1
61
+ when 93
62
+ close_parse_array!
63
+ index += 1
64
+ when 102, 110, 116
65
+ start_parse_keyword_token(byte)
66
+ index += 1
67
+ when 123
68
+ start_parse_container({})
69
+ index += 1
70
+ when 125
71
+ close_parse_object!
72
+ index += 1
73
+ else
74
+ raise ParseError, "unexpected token #{input.byteslice(index, 1).inspect}"
75
+ end
76
+ end
77
+
78
+ @parse_state.last_index = index
79
+ @parse_state.input_length = input_length
80
+ @parse_state.input_snapshot = input
81
+ finalize_parse_result
82
+ @parse_state.root
83
+ rescue ParseError
84
+ @parse_state = self.class.new_parse_state
85
+ raise
86
+ end
87
+ end
88
+
89
+ private
90
+
91
+ def prepare_parse_state_for_incremental_input
92
+ token = @parse_state.token_state
93
+ return unless token.is_a?(Scanners::ParsedStringToken) && token.role == :key && token.visible_key
94
+
95
+ restore_visible_key_placeholder(token)
96
+ end
97
+
98
+ def continue_parse_token(input, index)
99
+ token = @parse_state.token_state
100
+
101
+ case token
102
+ when Scanners::ParsedStringToken
103
+ continue_parse_string_token(input, index)
104
+ when Scanners::NumberToken
105
+ continue_parse_number_token(input, index)
106
+ when Scanners::KeywordToken
107
+ continue_parse_keyword_token(input, index)
108
+ else
109
+ raise ParseError, "unsupported token state: #{token.class}"
110
+ end
111
+ end
112
+
113
+ def start_parse_container(container)
114
+ slot = parse_value_slot!
115
+ assign_parse_slot(slot, container)
116
+ transition_after_parse_value(slot)
117
+
118
+ @parse_state.context_stack << if container.is_a?(Hash)
119
+ ObjectContext.new(container: container)
120
+ else
121
+ ArrayContext.new(container: container)
122
+ end
123
+ end
124
+
125
+ def close_parse_object!
126
+ context = @parse_state.context_stack.last
127
+ raise ParseError, 'unexpected object close' unless context.is_a?(ObjectContext)
128
+ raise ParseError, 'cannot close object while a key is incomplete' if context.mode == :key_in_progress
129
+ raise ParseError, 'cannot close object before a colon' if context.mode == :after_key
130
+ raise ParseError, 'cannot close object while a value is missing' if context.mode == :value
131
+
132
+ @parse_state.context_stack.pop
133
+ end
134
+
135
+ def close_parse_array!
136
+ context = @parse_state.context_stack.last
137
+ raise ParseError, 'unexpected array close' unless context.is_a?(ArrayContext)
138
+ raise ParseError, 'cannot close array while a value is missing' if context.provisional_index
139
+
140
+ @parse_state.context_stack.pop
141
+ end
142
+
143
+ def start_parse_string_token
144
+ context = @parse_state.context_stack.last
145
+
146
+ if context.is_a?(ObjectContext) && context.mode == :key_or_end
147
+ context.mode = :key_in_progress
148
+ @parse_state.token_state = Scanners::ParsedStringToken.new(role: :key, context: context)
149
+ return
150
+ end
151
+
152
+ slot = parse_value_slot!
153
+ token = Scanners::ParsedStringToken.new(role: :value, slot: slot)
154
+ assign_parse_slot(slot, token.buffer)
155
+ transition_after_parse_value(slot)
156
+ @parse_state.token_state = token
157
+ end
158
+
159
+ def continue_parse_string_token(input, index)
160
+ token = @parse_state.token_state
161
+ index, status = Scanners.scan_string(input, index, token)
162
+ raise ParseError, 'invalid string escape sequence' if status == :invalid_escape
163
+ raise ParseError, 'invalid unicode escape sequence' if status == :invalid_unicode
164
+ raise ParseError, 'invalid control character in string literal' if status == :invalid_control_character
165
+
166
+ finish_parse_string_token! if status == :terminated
167
+ index
168
+ end
169
+
170
+ def finish_parse_string_token!
171
+ token = @parse_state.token_state
172
+ return unless token
173
+
174
+ if token.role == :key
175
+ token.context.current_key = token.buffer.dup
176
+ token.context.mode = :after_key
177
+ end
178
+
179
+ @parse_state.token_state = nil
180
+ end
181
+
182
+ def start_parse_number_token(first_byte)
183
+ slot = parse_value_slot!
184
+ token = Scanners::NumberToken.new(slot: slot)
185
+ token.append_byte(first_byte)
186
+ assign_parse_slot(slot, token.parsed_value)
187
+ transition_after_parse_value(slot)
188
+ @parse_state.token_state = token
189
+ end
190
+
191
+ def continue_parse_number_token(input, index)
192
+ token = @parse_state.token_state
193
+ length = input.bytesize
194
+
195
+ while index < length && token.append_byte(input.getbyte(index))
196
+ assign_parse_slot(token.slot, token.parsed_value)
197
+ index += 1
198
+ end
199
+
200
+ raise ParseError, 'invalid number literal' if token.invalid?
201
+
202
+ @parse_state.token_state = nil if index < length
203
+ index
204
+ end
205
+
206
+ def start_parse_keyword_token(first_byte)
207
+ slot = parse_value_slot!
208
+ token = Scanners::KeywordToken.new(slot: slot, target: keyword_target_for_byte(first_byte), matched: 1)
209
+ assign_parse_slot(slot, token.parsed_value)
210
+ transition_after_parse_value(slot)
211
+ @parse_state.token_state = token
212
+ end
213
+
214
+ def continue_parse_keyword_token(input, index)
215
+ token = @parse_state.token_state
216
+ length = input.bytesize
217
+
218
+ while index < length && token.matched < token.target.length && token.append_byte(input.getbyte(index))
219
+ index += 1
220
+ end
221
+
222
+ raise ParseError, 'invalid keyword literal' if token.matched < token.target.length && index < length
223
+
224
+ @parse_state.token_state = nil if index < length || token.matched == token.target.length
225
+ index
226
+ end
227
+
228
+ def parse_colon!
229
+ context = @parse_state.context_stack.last
230
+ raise ParseError, 'unexpected colon' unless context.is_a?(ObjectContext) && context.mode == :after_key
231
+
232
+ context.mode = :value
233
+ end
234
+
235
+ def parse_comma!
236
+ context = @parse_state.context_stack.last
237
+ raise ParseError, 'unexpected comma' unless context
238
+
239
+ case context
240
+ when ArrayContext
241
+ raise ParseError, 'cannot add a comma while an array value is missing' unless context.mode == :after_value
242
+
243
+ context.mode = :value_or_end
244
+ context.provisional_index = context.container.length
245
+ when ObjectContext
246
+ raise ParseError, 'cannot add a comma while an object entry is incomplete' unless context.mode == :after_value
247
+
248
+ context.mode = :key_or_end
249
+ context.current_key = nil
250
+ end
251
+ end
252
+
253
+ def parse_value_slot!
254
+ context = @parse_state.context_stack.last
255
+
256
+ unless context
257
+ raise ParseError, 'unexpected token after top-level value' if @parse_state.root_assigned
258
+
259
+ return ParseSlot.new(root: true)
260
+ end
261
+
262
+ case context
263
+ when ArrayContext
264
+ raise ParseError, 'expected comma before next array value' if context.mode == :after_value
265
+ raise ParseError, 'cannot parse array value here' unless context.mode == :value_or_end
266
+
267
+ index = context.provisional_index || context.container.length
268
+ context.provisional_index = nil
269
+ ParseSlot.new(container: context.container, key: index, root: false)
270
+ when ObjectContext
271
+ raise ParseError, 'expected colon before object value' if context.mode == :after_key
272
+ raise ParseError, 'expected comma before next object entry' if context.mode == :after_value
273
+ raise ParseError, 'expected object key' unless context.mode == :value
274
+
275
+ ParseSlot.new(container: context.container, key: context.current_key, root: false)
276
+ end
277
+ end
278
+
279
+ def top_level_value_complete?
280
+ @parse_state.root_assigned &&
281
+ @parse_state.context_stack.empty? &&
282
+ @parse_state.token_state.nil?
283
+ end
284
+
285
+ def assign_parse_slot(slot, value)
286
+ if slot.root
287
+ @parse_state.root = value
288
+ @parse_state.root_assigned = true
289
+ else
290
+ slot.container[slot.key] = value
291
+ end
292
+ end
293
+
294
+ def transition_after_parse_value(slot)
295
+ context = @parse_state.context_stack.last
296
+
297
+ case context
298
+ when ArrayContext
299
+ context.mode = :after_value
300
+ when ObjectContext
301
+ context.mode = :after_value if slot.root || !context.current_key.nil?
302
+ end
303
+ end
304
+
305
+ def finalize_parse_result
306
+ token = @parse_state.token_state
307
+
308
+ if token.is_a?(Scanners::ParsedStringToken) && token.role == :key
309
+ update_visible_key_placeholder(token)
310
+ return
311
+ end
312
+
313
+ @parse_state.context_stack.each do |context|
314
+ case context
315
+ when ObjectContext
316
+ next unless %i[after_key value].include?(context.mode) && context.current_key
317
+
318
+ context.container[context.current_key] = nil
319
+ when ArrayContext
320
+ next unless context.provisional_index
321
+
322
+ context.container[context.provisional_index] = nil
323
+ end
324
+ end
325
+ end
326
+
327
+ def restore_visible_key_placeholder(token)
328
+ if token.visible_key_replaced_present
329
+ token.context.container[token.visible_key] = token.visible_key_replaced_value
330
+ else
331
+ token.context.container.delete(token.visible_key)
332
+ end
333
+
334
+ token.visible_key = nil
335
+ token.visible_key_replaced_value = nil
336
+ token.visible_key_replaced_present = false
337
+ end
338
+
339
+ def update_visible_key_placeholder(token)
340
+ current_key = token.buffer.dup
341
+ return if token.visible_key == current_key
342
+
343
+ restore_visible_key_placeholder(token) if token.visible_key
344
+
345
+ token.visible_key = current_key
346
+ token.visible_key_replaced_present = token.context.container.key?(current_key)
347
+ token.visible_key_replaced_value = token.context.container[current_key]
348
+ token.context.container[current_key] = nil
349
+ end
350
+
351
+ def reset_parse_state_for_input_growth?(input)
352
+ return false unless @parse_state.input_snapshot
353
+ return false unless prefix_validation_required?
354
+
355
+ !input.start_with?(@parse_state.input_snapshot)
356
+ end
357
+
358
+ def prefix_validation_required?
359
+ @parse_state.context_stack.empty?
360
+ end
361
+
362
+ def keyword_target_for_byte(byte)
363
+ case byte
364
+ when 102
365
+ 'false'
366
+ when 110
367
+ 'null'
368
+ when 116
369
+ 'true'
370
+ else
371
+ raise ParseError, "unexpected keyword token byte: #{byte}"
372
+ end
373
+ end
374
+
375
+ def whitespace_byte?(byte)
376
+ case byte
377
+ when 9, 10, 13, 32
378
+ true
379
+ else
380
+ false
381
+ end
382
+ end
383
+ end
384
+
385
+ include ParserEngine
386
+ end