json_completer 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +35 -21
- data/lib/json_completer/completion_engine.rb +223 -0
- data/lib/json_completer/parser_engine.rb +345 -0
- data/lib/json_completer/scanners.rb +402 -0
- data/lib/json_completer.rb +36 -688
- metadata +5 -2
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class JsonCompleter
|
|
4
|
+
module Scanners
|
|
5
|
+
class CompletionStringToken < Struct.new(:buffer, :escape_state, :unicode_digits, keyword_init: true)
|
|
6
|
+
def initialize(buffer: nil, escape_state: nil, unicode_digits: nil)
|
|
7
|
+
buffer ||= StringIO.new
|
|
8
|
+
buffer << '"' if buffer.string.empty?
|
|
9
|
+
super
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def start_escape!
|
|
13
|
+
buffer << '\\'
|
|
14
|
+
self.escape_state = :backslash
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def append_char(char)
|
|
18
|
+
buffer << char
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def append_simple_escape(char)
|
|
22
|
+
buffer << char
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def valid_simple_escape?(_char)
|
|
26
|
+
true
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def start_unicode_escape!
|
|
30
|
+
self.unicode_digits = String.new
|
|
31
|
+
buffer << 'u'
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def append_unicode_digit(char)
|
|
35
|
+
unicode_digits << char
|
|
36
|
+
buffer << char
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def finish_unicode_escape!; end
|
|
40
|
+
|
|
41
|
+
def invalid_unicode!
|
|
42
|
+
current = buffer.string
|
|
43
|
+
current = current.sub(/\\u[0-9a-fA-F]*\z/, '')
|
|
44
|
+
self.buffer = StringIO.new
|
|
45
|
+
buffer << current
|
|
46
|
+
self.unicode_digits = nil
|
|
47
|
+
self.escape_state = nil
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def terminate!
|
|
51
|
+
buffer << '"'
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def finalized_incomplete_value
|
|
55
|
+
value = buffer.string.dup
|
|
56
|
+
trailing_backslashes = 0
|
|
57
|
+
index = value.length - 1
|
|
58
|
+
|
|
59
|
+
while index >= 0 && value[index] == '\\'
|
|
60
|
+
trailing_backslashes += 1
|
|
61
|
+
index -= 1
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
value = value[0...-1] if trailing_backslashes.odd?
|
|
65
|
+
value = value.sub(/\\u[0-9a-fA-F]{0,3}\z/, '')
|
|
66
|
+
"#{value}\""
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
class ParsedStringToken < Struct.new(
|
|
71
|
+
:role, :slot, :context, :buffer, :escape_state, :unicode_digits, :pending_high_surrogate, :visible_key,
|
|
72
|
+
:visible_key_replaced_value, :visible_key_replaced_present,
|
|
73
|
+
keyword_init: true
|
|
74
|
+
)
|
|
75
|
+
def initialize(
|
|
76
|
+
role:, slot: nil, context: nil, buffer: nil, escape_state: nil, unicode_digits: nil,
|
|
77
|
+
pending_high_surrogate: nil, visible_key: nil, visible_key_replaced_value: nil, visible_key_replaced_present: false
|
|
78
|
+
)
|
|
79
|
+
super
|
|
80
|
+
self.buffer ||= String.new
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def start_escape!
|
|
84
|
+
self.escape_state = :backslash
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def append_char(char)
|
|
88
|
+
buffer << char
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def append_simple_escape(char)
|
|
92
|
+
buffer << case char
|
|
93
|
+
when 'b'
|
|
94
|
+
"\b"
|
|
95
|
+
when 'f'
|
|
96
|
+
"\f"
|
|
97
|
+
when 'n'
|
|
98
|
+
"\n"
|
|
99
|
+
when 'r'
|
|
100
|
+
"\r"
|
|
101
|
+
when 't'
|
|
102
|
+
"\t"
|
|
103
|
+
else
|
|
104
|
+
char
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def valid_simple_escape?(char)
|
|
109
|
+
['"', '\\', '/', 'b', 'f', 'n', 'r', 't'].include?(char)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def start_unicode_escape!
|
|
113
|
+
self.unicode_digits = String.new
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def append_unicode_digit(char)
|
|
117
|
+
unicode_digits << char
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def finish_unicode_escape!
|
|
121
|
+
codepoint = unicode_digits.to_i(16)
|
|
122
|
+
|
|
123
|
+
if pending_high_surrogate
|
|
124
|
+
unless codepoint.between?(0xDC00, 0xDFFF)
|
|
125
|
+
self.pending_high_surrogate = nil
|
|
126
|
+
return :invalid_unicode
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
combined = 0x10000 + ((pending_high_surrogate - 0xD800) << 10) + (codepoint - 0xDC00)
|
|
130
|
+
buffer << combined.chr(Encoding::UTF_8)
|
|
131
|
+
self.pending_high_surrogate = nil
|
|
132
|
+
return :ok
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
if codepoint.between?(0xD800, 0xDBFF)
|
|
136
|
+
self.pending_high_surrogate = codepoint
|
|
137
|
+
elsif codepoint.between?(0xDC00, 0xDFFF)
|
|
138
|
+
return :invalid_unicode
|
|
139
|
+
else
|
|
140
|
+
buffer << codepoint.chr(Encoding::UTF_8)
|
|
141
|
+
end
|
|
142
|
+
:ok
|
|
143
|
+
rescue RangeError
|
|
144
|
+
self.pending_high_surrogate = nil
|
|
145
|
+
:invalid_unicode
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def invalid_unicode!
|
|
149
|
+
self.escape_state = nil
|
|
150
|
+
self.unicode_digits = nil
|
|
151
|
+
self.pending_high_surrogate = nil
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def terminate!; end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
class NumberToken < Struct.new(:slot, :raw, :phase, :invalid, keyword_init: true)
|
|
158
|
+
def initialize(slot: nil, raw: nil, phase: nil, invalid: false)
|
|
159
|
+
super
|
|
160
|
+
self.raw ||= String.new
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def append(char)
|
|
164
|
+
case phase
|
|
165
|
+
when nil
|
|
166
|
+
case char
|
|
167
|
+
when '-'
|
|
168
|
+
raw << char
|
|
169
|
+
self.phase = :sign
|
|
170
|
+
when '0'
|
|
171
|
+
raw << char
|
|
172
|
+
self.phase = :zero
|
|
173
|
+
when /[0-9]/
|
|
174
|
+
raw << char
|
|
175
|
+
self.phase = :int
|
|
176
|
+
else
|
|
177
|
+
return false
|
|
178
|
+
end
|
|
179
|
+
when :sign
|
|
180
|
+
case char
|
|
181
|
+
when '0'
|
|
182
|
+
raw << char
|
|
183
|
+
self.phase = :zero
|
|
184
|
+
when /[0-9]/
|
|
185
|
+
raw << char
|
|
186
|
+
self.phase = :int
|
|
187
|
+
when '.'
|
|
188
|
+
raw << char
|
|
189
|
+
self.phase = :frac_start
|
|
190
|
+
else
|
|
191
|
+
return false
|
|
192
|
+
end
|
|
193
|
+
when :zero
|
|
194
|
+
if char.match?(/[0-9]/)
|
|
195
|
+
self.invalid = true
|
|
196
|
+
return false
|
|
197
|
+
elsif char == '.'
|
|
198
|
+
raw << char
|
|
199
|
+
self.phase = :frac_start
|
|
200
|
+
elsif %w[e E].include?(char)
|
|
201
|
+
raw << char
|
|
202
|
+
self.phase = :exp_start
|
|
203
|
+
else
|
|
204
|
+
return false
|
|
205
|
+
end
|
|
206
|
+
when :int
|
|
207
|
+
if char.match?(/[0-9]/)
|
|
208
|
+
raw << char
|
|
209
|
+
elsif char == '.'
|
|
210
|
+
raw << char
|
|
211
|
+
self.phase = :frac_start
|
|
212
|
+
elsif %w[e E].include?(char)
|
|
213
|
+
raw << char
|
|
214
|
+
self.phase = :exp_start
|
|
215
|
+
else
|
|
216
|
+
return false
|
|
217
|
+
end
|
|
218
|
+
when :frac_start
|
|
219
|
+
return false unless char.match?(/[0-9]/)
|
|
220
|
+
|
|
221
|
+
raw << char
|
|
222
|
+
self.phase = :frac
|
|
223
|
+
when :frac
|
|
224
|
+
if char.match?(/[0-9]/)
|
|
225
|
+
raw << char
|
|
226
|
+
elsif %w[e E].include?(char)
|
|
227
|
+
raw << char
|
|
228
|
+
self.phase = :exp_start
|
|
229
|
+
else
|
|
230
|
+
return false
|
|
231
|
+
end
|
|
232
|
+
when :exp_start
|
|
233
|
+
if ['+', '-'].include?(char)
|
|
234
|
+
raw << char
|
|
235
|
+
self.phase = :exp_sign
|
|
236
|
+
elsif char.match?(/[0-9]/)
|
|
237
|
+
raw << char
|
|
238
|
+
self.phase = :exp
|
|
239
|
+
else
|
|
240
|
+
return false
|
|
241
|
+
end
|
|
242
|
+
when :exp_sign
|
|
243
|
+
return false unless char.match?(/[0-9]/)
|
|
244
|
+
|
|
245
|
+
raw << char
|
|
246
|
+
self.phase = :exp
|
|
247
|
+
when :exp
|
|
248
|
+
return false unless char.match?(/[0-9]/)
|
|
249
|
+
|
|
250
|
+
raw << char
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
true
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
def completed_literal
|
|
257
|
+
literal = raw.dup
|
|
258
|
+
|
|
259
|
+
case phase
|
|
260
|
+
when :sign
|
|
261
|
+
literal = '0'
|
|
262
|
+
when :frac_start
|
|
263
|
+
literal = literal == '-.' ? '-0.0' : "#{literal}0"
|
|
264
|
+
when :exp_start, :exp_sign
|
|
265
|
+
literal = "#{literal}0"
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
literal = "0#{literal}" if literal.start_with?('.')
|
|
269
|
+
literal = '0' if literal.empty? || literal == '-'
|
|
270
|
+
literal
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
def parsed_value
|
|
274
|
+
literal = completed_literal
|
|
275
|
+
literal.match?(/[.eE]/) ? literal.to_f : literal.to_i
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
def invalid?
|
|
279
|
+
invalid
|
|
280
|
+
end
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
class KeywordToken < Struct.new(:slot, :target, :matched, keyword_init: true)
|
|
284
|
+
def initialize(target:, slot: nil, matched: 0)
|
|
285
|
+
super
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
def append(char)
|
|
289
|
+
return false if matched >= target.length
|
|
290
|
+
return false unless char.downcase == target[matched]
|
|
291
|
+
|
|
292
|
+
self.matched += 1
|
|
293
|
+
true
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
def parsed_value
|
|
297
|
+
case target
|
|
298
|
+
when 'true'
|
|
299
|
+
true
|
|
300
|
+
when 'false'
|
|
301
|
+
false
|
|
302
|
+
end
|
|
303
|
+
end
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
module_function
|
|
307
|
+
|
|
308
|
+
def scan_string(input, index, token)
|
|
309
|
+
strict = token.is_a?(ParsedStringToken)
|
|
310
|
+
|
|
311
|
+
while index < input.length
|
|
312
|
+
char = input[index]
|
|
313
|
+
|
|
314
|
+
if token.unicode_digits
|
|
315
|
+
if char.match?(/[0-9a-fA-F]/)
|
|
316
|
+
token.append_unicode_digit(char)
|
|
317
|
+
index += 1
|
|
318
|
+
|
|
319
|
+
if token.unicode_digits.length == 4
|
|
320
|
+
status = token.finish_unicode_escape!
|
|
321
|
+
token.escape_state = nil
|
|
322
|
+
token.unicode_digits = nil
|
|
323
|
+
return [index, :invalid_unicode] if status == :invalid_unicode
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
next
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
token.invalid_unicode!
|
|
330
|
+
token.terminate!
|
|
331
|
+
return [index, :invalid_unicode]
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
if token.escape_state == :backslash
|
|
335
|
+
if strict && token.pending_high_surrogate && char != 'u'
|
|
336
|
+
return [index, :invalid_unicode]
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
if char == 'u'
|
|
340
|
+
token.start_unicode_escape!
|
|
341
|
+
index += 1
|
|
342
|
+
next
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
return [index, :invalid_escape] unless token.valid_simple_escape?(char)
|
|
346
|
+
|
|
347
|
+
token.append_simple_escape(char)
|
|
348
|
+
token.escape_state = nil
|
|
349
|
+
index += 1
|
|
350
|
+
next
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
case char
|
|
354
|
+
when '\\'
|
|
355
|
+
token.start_escape!
|
|
356
|
+
index += 1
|
|
357
|
+
when '"'
|
|
358
|
+
if strict && token.pending_high_surrogate
|
|
359
|
+
return [index, :invalid_unicode]
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
token.terminate!
|
|
363
|
+
return [index + 1, :terminated]
|
|
364
|
+
else
|
|
365
|
+
if strict
|
|
366
|
+
return [index, :invalid_control_character] if char.ord < 0x20
|
|
367
|
+
return [index, :invalid_unicode] if token.pending_high_surrogate
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
token.append_char(char)
|
|
371
|
+
index += 1
|
|
372
|
+
end
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
[index, :incomplete]
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
def scan_number_literal(input, index)
|
|
379
|
+
start_index = index
|
|
380
|
+
token = NumberToken.new
|
|
381
|
+
|
|
382
|
+
while index < input.length && token.append(input[index])
|
|
383
|
+
index += 1
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
[token.completed_literal, index - start_index]
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
def scan_keyword_literal(input, index, target_keyword)
|
|
390
|
+
start_index = index
|
|
391
|
+
token = KeywordToken.new(target: target_keyword)
|
|
392
|
+
|
|
393
|
+
while index < input.length && token.append(input[index])
|
|
394
|
+
index += 1
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
return [input[start_index], 1] if token.matched.zero?
|
|
398
|
+
|
|
399
|
+
[target_keyword, index - start_index]
|
|
400
|
+
end
|
|
401
|
+
end
|
|
402
|
+
end
|