json_completer 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +38 -21
- data/lib/json_completer/completion_engine.rb +241 -0
- data/lib/json_completer/parser_engine.rb +386 -0
- data/lib/json_completer/scanners.rb +448 -0
- data/lib/json_completer.rb +36 -688
- metadata +5 -2
|
@@ -0,0 +1,448 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class JsonCompleter
|
|
4
|
+
module Scanners
|
|
5
|
+
class CompletionStringToken < Struct.new(:buffer, :escape_state, :unicode_digits, keyword_init: true)
|
|
6
|
+
def initialize(buffer: nil, escape_state: nil, unicode_digits: nil)
|
|
7
|
+
buffer ||= StringIO.new
|
|
8
|
+
buffer << '"' if buffer.string.empty?
|
|
9
|
+
super
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def start_escape!
|
|
13
|
+
buffer << '\\'
|
|
14
|
+
self.escape_state = :backslash
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def append_slice(input, start_index, length)
|
|
18
|
+
buffer << input.byteslice(start_index, length)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# completion keeps escape bytes verbatim, so convert the ASCII byte back into a 1-byte string.
|
|
22
|
+
def append_simple_escape(byte)
|
|
23
|
+
buffer << byte.chr(Encoding::UTF_8)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def valid_simple_escape?(_byte)
|
|
27
|
+
true
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def start_unicode_escape!
|
|
31
|
+
self.unicode_digits = String.new
|
|
32
|
+
buffer << 'u'
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def append_unicode_digit(byte)
|
|
36
|
+
unicode_digits << byte
|
|
37
|
+
buffer << byte.chr(Encoding::UTF_8)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def finish_unicode_escape!; end
|
|
41
|
+
|
|
42
|
+
def invalid_unicode!
|
|
43
|
+
current = buffer.string
|
|
44
|
+
current = current.sub(/\\u[0-9a-fA-F]*\z/, '')
|
|
45
|
+
self.buffer = StringIO.new
|
|
46
|
+
buffer << current
|
|
47
|
+
self.unicode_digits = nil
|
|
48
|
+
self.escape_state = nil
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def terminate!
|
|
52
|
+
buffer << '"'
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def finalized_incomplete_value
|
|
56
|
+
value = buffer.string.dup
|
|
57
|
+
trailing_backslashes = 0
|
|
58
|
+
index = value.length - 1
|
|
59
|
+
|
|
60
|
+
while index >= 0 && value[index] == '\\'
|
|
61
|
+
trailing_backslashes += 1
|
|
62
|
+
index -= 1
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
value = value[0...-1] if trailing_backslashes.odd?
|
|
66
|
+
value = value.sub(/\\u[0-9a-fA-F]{0,3}\z/, '')
|
|
67
|
+
"#{value}\""
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
class ParsedStringToken < Struct.new(
|
|
72
|
+
:role, :slot, :context, :buffer, :escape_state, :unicode_digits, :pending_high_surrogate, :visible_key,
|
|
73
|
+
:visible_key_replaced_value, :visible_key_replaced_present,
|
|
74
|
+
keyword_init: true
|
|
75
|
+
)
|
|
76
|
+
def initialize(
|
|
77
|
+
role:, slot: nil, context: nil, buffer: nil, escape_state: nil, unicode_digits: nil,
|
|
78
|
+
pending_high_surrogate: nil, visible_key: nil, visible_key_replaced_value: nil, visible_key_replaced_present: false
|
|
79
|
+
)
|
|
80
|
+
super
|
|
81
|
+
self.buffer ||= String.new
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def start_escape!
|
|
85
|
+
self.escape_state = :backslash
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def append_slice(input, start_index, length)
|
|
89
|
+
buffer << input.byteslice(start_index, length)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# ASCII escape bytes: 98/102/110/114/116 = b/f/n/r/t.
|
|
93
|
+
def append_simple_escape(byte)
|
|
94
|
+
buffer << case byte
|
|
95
|
+
when 98
|
|
96
|
+
"\b"
|
|
97
|
+
when 102
|
|
98
|
+
"\f"
|
|
99
|
+
when 110
|
|
100
|
+
"\n"
|
|
101
|
+
when 114
|
|
102
|
+
"\r"
|
|
103
|
+
when 116
|
|
104
|
+
"\t"
|
|
105
|
+
else
|
|
106
|
+
byte
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def valid_simple_escape?(byte)
|
|
111
|
+
case byte
|
|
112
|
+
when 34, 92, 47, 98, 102, 110, 114, 116
|
|
113
|
+
true
|
|
114
|
+
else
|
|
115
|
+
false
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def start_unicode_escape!
|
|
120
|
+
self.unicode_digits = String.new
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def append_unicode_digit(byte)
|
|
124
|
+
unicode_digits << byte
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def finish_unicode_escape!
|
|
128
|
+
codepoint = unicode_digits.to_i(16)
|
|
129
|
+
|
|
130
|
+
if pending_high_surrogate
|
|
131
|
+
unless codepoint.between?(0xDC00, 0xDFFF)
|
|
132
|
+
self.pending_high_surrogate = nil
|
|
133
|
+
return :invalid_unicode
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
combined = 0x10000 + ((pending_high_surrogate - 0xD800) << 10) + (codepoint - 0xDC00)
|
|
137
|
+
buffer << combined.chr(Encoding::UTF_8)
|
|
138
|
+
self.pending_high_surrogate = nil
|
|
139
|
+
return :ok
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
if codepoint.between?(0xD800, 0xDBFF)
|
|
143
|
+
self.pending_high_surrogate = codepoint
|
|
144
|
+
elsif codepoint.between?(0xDC00, 0xDFFF)
|
|
145
|
+
return :invalid_unicode
|
|
146
|
+
else
|
|
147
|
+
buffer << codepoint.chr(Encoding::UTF_8)
|
|
148
|
+
end
|
|
149
|
+
:ok
|
|
150
|
+
rescue RangeError
|
|
151
|
+
self.pending_high_surrogate = nil
|
|
152
|
+
:invalid_unicode
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def invalid_unicode!
|
|
156
|
+
self.escape_state = nil
|
|
157
|
+
self.unicode_digits = nil
|
|
158
|
+
self.pending_high_surrogate = nil
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def terminate!; end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
class NumberToken < Struct.new(:slot, :raw, :phase, :invalid, keyword_init: true)
|
|
165
|
+
def initialize(slot: nil, raw: nil, phase: nil, invalid: false)
|
|
166
|
+
super
|
|
167
|
+
self.raw ||= String.new
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# append_byte consumes ASCII bytes, not 1-character strings:
|
|
171
|
+
# 45 = -, 46 = ., 48..57 = 0..9, 69/101 = E/e.
|
|
172
|
+
def append_byte(byte)
|
|
173
|
+
case phase
|
|
174
|
+
when nil
|
|
175
|
+
case byte
|
|
176
|
+
when 45
|
|
177
|
+
raw << byte
|
|
178
|
+
self.phase = :sign
|
|
179
|
+
when 48
|
|
180
|
+
raw << byte
|
|
181
|
+
self.phase = :zero
|
|
182
|
+
when 49..57
|
|
183
|
+
raw << byte
|
|
184
|
+
self.phase = :int
|
|
185
|
+
else
|
|
186
|
+
return false
|
|
187
|
+
end
|
|
188
|
+
when :sign
|
|
189
|
+
case byte
|
|
190
|
+
when 48
|
|
191
|
+
raw << byte
|
|
192
|
+
self.phase = :zero
|
|
193
|
+
when 49..57
|
|
194
|
+
raw << byte
|
|
195
|
+
self.phase = :int
|
|
196
|
+
when 46
|
|
197
|
+
raw << byte
|
|
198
|
+
self.phase = :frac_start
|
|
199
|
+
else
|
|
200
|
+
return false
|
|
201
|
+
end
|
|
202
|
+
when :zero
|
|
203
|
+
if Scanners.digit_byte?(byte)
|
|
204
|
+
self.invalid = true
|
|
205
|
+
return false
|
|
206
|
+
elsif byte == 46
|
|
207
|
+
raw << byte
|
|
208
|
+
self.phase = :frac_start
|
|
209
|
+
elsif Scanners.exponent_byte?(byte)
|
|
210
|
+
raw << byte
|
|
211
|
+
self.phase = :exp_start
|
|
212
|
+
else
|
|
213
|
+
return false
|
|
214
|
+
end
|
|
215
|
+
when :int
|
|
216
|
+
if Scanners.digit_byte?(byte)
|
|
217
|
+
raw << byte
|
|
218
|
+
elsif byte == 46
|
|
219
|
+
raw << byte
|
|
220
|
+
self.phase = :frac_start
|
|
221
|
+
elsif Scanners.exponent_byte?(byte)
|
|
222
|
+
raw << byte
|
|
223
|
+
self.phase = :exp_start
|
|
224
|
+
else
|
|
225
|
+
return false
|
|
226
|
+
end
|
|
227
|
+
when :frac_start
|
|
228
|
+
return false unless Scanners.digit_byte?(byte)
|
|
229
|
+
|
|
230
|
+
raw << byte
|
|
231
|
+
self.phase = :frac
|
|
232
|
+
when :frac
|
|
233
|
+
if Scanners.digit_byte?(byte)
|
|
234
|
+
raw << byte
|
|
235
|
+
elsif Scanners.exponent_byte?(byte)
|
|
236
|
+
raw << byte
|
|
237
|
+
self.phase = :exp_start
|
|
238
|
+
else
|
|
239
|
+
return false
|
|
240
|
+
end
|
|
241
|
+
when :exp_start
|
|
242
|
+
case byte
|
|
243
|
+
when 43, 45
|
|
244
|
+
raw << byte
|
|
245
|
+
self.phase = :exp_sign
|
|
246
|
+
when 48..57
|
|
247
|
+
raw << byte
|
|
248
|
+
self.phase = :exp
|
|
249
|
+
else
|
|
250
|
+
return false
|
|
251
|
+
end
|
|
252
|
+
when :exp_sign
|
|
253
|
+
return false unless Scanners.digit_byte?(byte)
|
|
254
|
+
|
|
255
|
+
raw << byte
|
|
256
|
+
self.phase = :exp
|
|
257
|
+
when :exp
|
|
258
|
+
return false unless Scanners.digit_byte?(byte)
|
|
259
|
+
|
|
260
|
+
raw << byte
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
true
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
def completed_literal
|
|
267
|
+
literal = raw.dup
|
|
268
|
+
|
|
269
|
+
case phase
|
|
270
|
+
when :sign
|
|
271
|
+
literal = '0'
|
|
272
|
+
when :frac_start
|
|
273
|
+
literal = literal == '-.' ? '-0.0' : "#{literal}0"
|
|
274
|
+
when :exp_start, :exp_sign
|
|
275
|
+
literal = "#{literal}0"
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
literal = "0#{literal}" if literal.start_with?('.')
|
|
279
|
+
literal = '0' if literal.empty? || literal == '-'
|
|
280
|
+
literal
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
def parsed_value
|
|
284
|
+
literal = completed_literal
|
|
285
|
+
literal.match?(/[.eE]/) ? literal.to_f : literal.to_i
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
def invalid?
|
|
289
|
+
invalid
|
|
290
|
+
end
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
class KeywordToken < Struct.new(:slot, :target, :matched, keyword_init: true)
|
|
294
|
+
def initialize(target:, slot: nil, matched: 0)
|
|
295
|
+
super
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
def append_byte(byte)
|
|
299
|
+
return false if matched >= target.length
|
|
300
|
+
return false unless (byte | 0x20) == target.getbyte(matched)
|
|
301
|
+
|
|
302
|
+
self.matched += 1
|
|
303
|
+
true
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
def parsed_value
|
|
307
|
+
case target
|
|
308
|
+
when 'true'
|
|
309
|
+
true
|
|
310
|
+
when 'false'
|
|
311
|
+
false
|
|
312
|
+
end
|
|
313
|
+
end
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
module_function
|
|
317
|
+
|
|
318
|
+
def scan_string(input, index, token)
|
|
319
|
+
strict = token.is_a?(ParsedStringToken)
|
|
320
|
+
# JSON string syntax is ASCII, so scanning bytes is safe here: multibyte UTF-8 content is
|
|
321
|
+
# treated as opaque payload and copied via byteslice until we hit an ASCII delimiter/escape.
|
|
322
|
+
length = input.bytesize
|
|
323
|
+
segment_start = index
|
|
324
|
+
|
|
325
|
+
while index < length
|
|
326
|
+
byte = input.getbyte(index)
|
|
327
|
+
|
|
328
|
+
if token.unicode_digits
|
|
329
|
+
if hex_digit_byte?(byte)
|
|
330
|
+
token.append_unicode_digit(byte)
|
|
331
|
+
index += 1
|
|
332
|
+
|
|
333
|
+
if token.unicode_digits.length == 4
|
|
334
|
+
status = token.finish_unicode_escape!
|
|
335
|
+
token.escape_state = nil
|
|
336
|
+
token.unicode_digits = nil
|
|
337
|
+
return [index, :invalid_unicode] if status == :invalid_unicode
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
segment_start = index
|
|
341
|
+
next
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
token.invalid_unicode!
|
|
345
|
+
token.terminate!
|
|
346
|
+
return [index, :invalid_unicode]
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
if token.escape_state == :backslash
|
|
350
|
+
if strict && token.pending_high_surrogate && byte != 117
|
|
351
|
+
return [index, :invalid_unicode]
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
if byte == 117
|
|
355
|
+
token.start_unicode_escape!
|
|
356
|
+
index += 1
|
|
357
|
+
segment_start = index
|
|
358
|
+
next
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
return [index, :invalid_escape] unless token.valid_simple_escape?(byte)
|
|
362
|
+
|
|
363
|
+
token.append_simple_escape(byte)
|
|
364
|
+
token.escape_state = nil
|
|
365
|
+
index += 1
|
|
366
|
+
segment_start = index
|
|
367
|
+
next
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
if strict && token.pending_high_surrogate && byte != 92
|
|
371
|
+
return [index, :invalid_unicode]
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
if byte == 34
|
|
375
|
+
token.append_slice(input, segment_start, index - segment_start) if index > segment_start
|
|
376
|
+
|
|
377
|
+
if strict && token.pending_high_surrogate
|
|
378
|
+
return [index, :invalid_unicode]
|
|
379
|
+
end
|
|
380
|
+
|
|
381
|
+
token.terminate!
|
|
382
|
+
return [index + 1, :terminated]
|
|
383
|
+
end
|
|
384
|
+
|
|
385
|
+
if byte == 92
|
|
386
|
+
token.append_slice(input, segment_start, index - segment_start) if index > segment_start
|
|
387
|
+
token.start_escape!
|
|
388
|
+
index += 1
|
|
389
|
+
segment_start = index
|
|
390
|
+
next
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
if strict && byte < 0x20
|
|
394
|
+
token.append_slice(input, segment_start, index - segment_start) if index > segment_start
|
|
395
|
+
return [index, :invalid_control_character]
|
|
396
|
+
end
|
|
397
|
+
|
|
398
|
+
index += 1
|
|
399
|
+
end
|
|
400
|
+
|
|
401
|
+
token.append_slice(input, segment_start, index - segment_start) if index > segment_start
|
|
402
|
+
[index, :incomplete]
|
|
403
|
+
end
|
|
404
|
+
|
|
405
|
+
def scan_number_literal(input, index)
|
|
406
|
+
start_index = index
|
|
407
|
+
token = NumberToken.new
|
|
408
|
+
length = input.bytesize
|
|
409
|
+
|
|
410
|
+
while index < length && token.append_byte(input.getbyte(index))
|
|
411
|
+
index += 1
|
|
412
|
+
end
|
|
413
|
+
|
|
414
|
+
[token.completed_literal, index - start_index]
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
def scan_keyword_literal(input, index, target_keyword)
|
|
418
|
+
start_index = index
|
|
419
|
+
token = KeywordToken.new(target: target_keyword)
|
|
420
|
+
length = input.bytesize
|
|
421
|
+
|
|
422
|
+
while index < length && token.append_byte(input.getbyte(index))
|
|
423
|
+
index += 1
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
return [input.byteslice(start_index, 1), 1] if token.matched.zero?
|
|
427
|
+
|
|
428
|
+
[target_keyword, index - start_index]
|
|
429
|
+
end
|
|
430
|
+
|
|
431
|
+
def digit_byte?(byte)
|
|
432
|
+
byte.between?(48, 57)
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
def exponent_byte?(byte)
|
|
436
|
+
case byte
|
|
437
|
+
when 69, 101
|
|
438
|
+
true
|
|
439
|
+
else
|
|
440
|
+
false
|
|
441
|
+
end
|
|
442
|
+
end
|
|
443
|
+
|
|
444
|
+
def hex_digit_byte?(byte)
|
|
445
|
+
digit_byte?(byte) || byte.between?(65, 70) || byte.between?(97, 102)
|
|
446
|
+
end
|
|
447
|
+
end
|
|
448
|
+
end
|