json_completer 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,448 @@
1
+ # frozen_string_literal: true
2
+
3
+ class JsonCompleter
4
+ module Scanners
5
+ class CompletionStringToken < Struct.new(:buffer, :escape_state, :unicode_digits, keyword_init: true)
6
+ def initialize(buffer: nil, escape_state: nil, unicode_digits: nil)
7
+ buffer ||= StringIO.new
8
+ buffer << '"' if buffer.string.empty?
9
+ super
10
+ end
11
+
12
+ def start_escape!
13
+ buffer << '\\'
14
+ self.escape_state = :backslash
15
+ end
16
+
17
+ def append_slice(input, start_index, length)
18
+ buffer << input.byteslice(start_index, length)
19
+ end
20
+
21
+ # completion keeps escape bytes verbatim, so convert the ASCII byte back into a 1-byte string.
22
+ def append_simple_escape(byte)
23
+ buffer << byte.chr(Encoding::UTF_8)
24
+ end
25
+
26
+ def valid_simple_escape?(_byte)
27
+ true
28
+ end
29
+
30
+ def start_unicode_escape!
31
+ self.unicode_digits = String.new
32
+ buffer << 'u'
33
+ end
34
+
35
+ def append_unicode_digit(byte)
36
+ unicode_digits << byte
37
+ buffer << byte.chr(Encoding::UTF_8)
38
+ end
39
+
40
+ def finish_unicode_escape!; end
41
+
42
+ def invalid_unicode!
43
+ current = buffer.string
44
+ current = current.sub(/\\u[0-9a-fA-F]*\z/, '')
45
+ self.buffer = StringIO.new
46
+ buffer << current
47
+ self.unicode_digits = nil
48
+ self.escape_state = nil
49
+ end
50
+
51
+ def terminate!
52
+ buffer << '"'
53
+ end
54
+
55
+ def finalized_incomplete_value
56
+ value = buffer.string.dup
57
+ trailing_backslashes = 0
58
+ index = value.length - 1
59
+
60
+ while index >= 0 && value[index] == '\\'
61
+ trailing_backslashes += 1
62
+ index -= 1
63
+ end
64
+
65
+ value = value[0...-1] if trailing_backslashes.odd?
66
+ value = value.sub(/\\u[0-9a-fA-F]{0,3}\z/, '')
67
+ "#{value}\""
68
+ end
69
+ end
70
+
71
+ class ParsedStringToken < Struct.new(
72
+ :role, :slot, :context, :buffer, :escape_state, :unicode_digits, :pending_high_surrogate, :visible_key,
73
+ :visible_key_replaced_value, :visible_key_replaced_present,
74
+ keyword_init: true
75
+ )
76
+ def initialize(
77
+ role:, slot: nil, context: nil, buffer: nil, escape_state: nil, unicode_digits: nil,
78
+ pending_high_surrogate: nil, visible_key: nil, visible_key_replaced_value: nil, visible_key_replaced_present: false
79
+ )
80
+ super
81
+ self.buffer ||= String.new
82
+ end
83
+
84
+ def start_escape!
85
+ self.escape_state = :backslash
86
+ end
87
+
88
+ def append_slice(input, start_index, length)
89
+ buffer << input.byteslice(start_index, length)
90
+ end
91
+
92
+ # ASCII escape bytes: 98/102/110/114/116 = b/f/n/r/t.
93
+ def append_simple_escape(byte)
94
+ buffer << case byte
95
+ when 98
96
+ "\b"
97
+ when 102
98
+ "\f"
99
+ when 110
100
+ "\n"
101
+ when 114
102
+ "\r"
103
+ when 116
104
+ "\t"
105
+ else
106
+ byte
107
+ end
108
+ end
109
+
110
+ def valid_simple_escape?(byte)
111
+ case byte
112
+ when 34, 92, 47, 98, 102, 110, 114, 116
113
+ true
114
+ else
115
+ false
116
+ end
117
+ end
118
+
119
+ def start_unicode_escape!
120
+ self.unicode_digits = String.new
121
+ end
122
+
123
+ def append_unicode_digit(byte)
124
+ unicode_digits << byte
125
+ end
126
+
127
+ def finish_unicode_escape!
128
+ codepoint = unicode_digits.to_i(16)
129
+
130
+ if pending_high_surrogate
131
+ unless codepoint.between?(0xDC00, 0xDFFF)
132
+ self.pending_high_surrogate = nil
133
+ return :invalid_unicode
134
+ end
135
+
136
+ combined = 0x10000 + ((pending_high_surrogate - 0xD800) << 10) + (codepoint - 0xDC00)
137
+ buffer << combined.chr(Encoding::UTF_8)
138
+ self.pending_high_surrogate = nil
139
+ return :ok
140
+ end
141
+
142
+ if codepoint.between?(0xD800, 0xDBFF)
143
+ self.pending_high_surrogate = codepoint
144
+ elsif codepoint.between?(0xDC00, 0xDFFF)
145
+ return :invalid_unicode
146
+ else
147
+ buffer << codepoint.chr(Encoding::UTF_8)
148
+ end
149
+ :ok
150
+ rescue RangeError
151
+ self.pending_high_surrogate = nil
152
+ :invalid_unicode
153
+ end
154
+
155
+ def invalid_unicode!
156
+ self.escape_state = nil
157
+ self.unicode_digits = nil
158
+ self.pending_high_surrogate = nil
159
+ end
160
+
161
+ def terminate!; end
162
+ end
163
+
164
+ class NumberToken < Struct.new(:slot, :raw, :phase, :invalid, keyword_init: true)
165
+ def initialize(slot: nil, raw: nil, phase: nil, invalid: false)
166
+ super
167
+ self.raw ||= String.new
168
+ end
169
+
170
+ # append_byte consumes ASCII bytes, not 1-character strings:
171
+ # 45 = -, 46 = ., 48..57 = 0..9, 69/101 = E/e.
172
+ def append_byte(byte)
173
+ case phase
174
+ when nil
175
+ case byte
176
+ when 45
177
+ raw << byte
178
+ self.phase = :sign
179
+ when 48
180
+ raw << byte
181
+ self.phase = :zero
182
+ when 49..57
183
+ raw << byte
184
+ self.phase = :int
185
+ else
186
+ return false
187
+ end
188
+ when :sign
189
+ case byte
190
+ when 48
191
+ raw << byte
192
+ self.phase = :zero
193
+ when 49..57
194
+ raw << byte
195
+ self.phase = :int
196
+ when 46
197
+ raw << byte
198
+ self.phase = :frac_start
199
+ else
200
+ return false
201
+ end
202
+ when :zero
203
+ if Scanners.digit_byte?(byte)
204
+ self.invalid = true
205
+ return false
206
+ elsif byte == 46
207
+ raw << byte
208
+ self.phase = :frac_start
209
+ elsif Scanners.exponent_byte?(byte)
210
+ raw << byte
211
+ self.phase = :exp_start
212
+ else
213
+ return false
214
+ end
215
+ when :int
216
+ if Scanners.digit_byte?(byte)
217
+ raw << byte
218
+ elsif byte == 46
219
+ raw << byte
220
+ self.phase = :frac_start
221
+ elsif Scanners.exponent_byte?(byte)
222
+ raw << byte
223
+ self.phase = :exp_start
224
+ else
225
+ return false
226
+ end
227
+ when :frac_start
228
+ return false unless Scanners.digit_byte?(byte)
229
+
230
+ raw << byte
231
+ self.phase = :frac
232
+ when :frac
233
+ if Scanners.digit_byte?(byte)
234
+ raw << byte
235
+ elsif Scanners.exponent_byte?(byte)
236
+ raw << byte
237
+ self.phase = :exp_start
238
+ else
239
+ return false
240
+ end
241
+ when :exp_start
242
+ case byte
243
+ when 43, 45
244
+ raw << byte
245
+ self.phase = :exp_sign
246
+ when 48..57
247
+ raw << byte
248
+ self.phase = :exp
249
+ else
250
+ return false
251
+ end
252
+ when :exp_sign
253
+ return false unless Scanners.digit_byte?(byte)
254
+
255
+ raw << byte
256
+ self.phase = :exp
257
+ when :exp
258
+ return false unless Scanners.digit_byte?(byte)
259
+
260
+ raw << byte
261
+ end
262
+
263
+ true
264
+ end
265
+
266
+ def completed_literal
267
+ literal = raw.dup
268
+
269
+ case phase
270
+ when :sign
271
+ literal = '0'
272
+ when :frac_start
273
+ literal = literal == '-.' ? '-0.0' : "#{literal}0"
274
+ when :exp_start, :exp_sign
275
+ literal = "#{literal}0"
276
+ end
277
+
278
+ literal = "0#{literal}" if literal.start_with?('.')
279
+ literal = '0' if literal.empty? || literal == '-'
280
+ literal
281
+ end
282
+
283
+ def parsed_value
284
+ literal = completed_literal
285
+ literal.match?(/[.eE]/) ? literal.to_f : literal.to_i
286
+ end
287
+
288
+ def invalid?
289
+ invalid
290
+ end
291
+ end
292
+
293
+ class KeywordToken < Struct.new(:slot, :target, :matched, keyword_init: true)
294
+ def initialize(target:, slot: nil, matched: 0)
295
+ super
296
+ end
297
+
298
+ def append_byte(byte)
299
+ return false if matched >= target.length
300
+ return false unless (byte | 0x20) == target.getbyte(matched)
301
+
302
+ self.matched += 1
303
+ true
304
+ end
305
+
306
+ def parsed_value
307
+ case target
308
+ when 'true'
309
+ true
310
+ when 'false'
311
+ false
312
+ end
313
+ end
314
+ end
315
+
316
+ module_function
317
+
318
+ def scan_string(input, index, token)
319
+ strict = token.is_a?(ParsedStringToken)
320
+ # JSON string syntax is ASCII, so scanning bytes is safe here: multibyte UTF-8 content is
321
+ # treated as opaque payload and copied via byteslice until we hit an ASCII delimiter/escape.
322
+ length = input.bytesize
323
+ segment_start = index
324
+
325
+ while index < length
326
+ byte = input.getbyte(index)
327
+
328
+ if token.unicode_digits
329
+ if hex_digit_byte?(byte)
330
+ token.append_unicode_digit(byte)
331
+ index += 1
332
+
333
+ if token.unicode_digits.length == 4
334
+ status = token.finish_unicode_escape!
335
+ token.escape_state = nil
336
+ token.unicode_digits = nil
337
+ return [index, :invalid_unicode] if status == :invalid_unicode
338
+ end
339
+
340
+ segment_start = index
341
+ next
342
+ end
343
+
344
+ token.invalid_unicode!
345
+ token.terminate!
346
+ return [index, :invalid_unicode]
347
+ end
348
+
349
+ if token.escape_state == :backslash
350
+ if strict && token.pending_high_surrogate && byte != 117
351
+ return [index, :invalid_unicode]
352
+ end
353
+
354
+ if byte == 117
355
+ token.start_unicode_escape!
356
+ index += 1
357
+ segment_start = index
358
+ next
359
+ end
360
+
361
+ return [index, :invalid_escape] unless token.valid_simple_escape?(byte)
362
+
363
+ token.append_simple_escape(byte)
364
+ token.escape_state = nil
365
+ index += 1
366
+ segment_start = index
367
+ next
368
+ end
369
+
370
+ if strict && token.pending_high_surrogate && byte != 92
371
+ return [index, :invalid_unicode]
372
+ end
373
+
374
+ if byte == 34
375
+ token.append_slice(input, segment_start, index - segment_start) if index > segment_start
376
+
377
+ if strict && token.pending_high_surrogate
378
+ return [index, :invalid_unicode]
379
+ end
380
+
381
+ token.terminate!
382
+ return [index + 1, :terminated]
383
+ end
384
+
385
+ if byte == 92
386
+ token.append_slice(input, segment_start, index - segment_start) if index > segment_start
387
+ token.start_escape!
388
+ index += 1
389
+ segment_start = index
390
+ next
391
+ end
392
+
393
+ if strict && byte < 0x20
394
+ token.append_slice(input, segment_start, index - segment_start) if index > segment_start
395
+ return [index, :invalid_control_character]
396
+ end
397
+
398
+ index += 1
399
+ end
400
+
401
+ token.append_slice(input, segment_start, index - segment_start) if index > segment_start
402
+ [index, :incomplete]
403
+ end
404
+
405
+ def scan_number_literal(input, index)
406
+ start_index = index
407
+ token = NumberToken.new
408
+ length = input.bytesize
409
+
410
+ while index < length && token.append_byte(input.getbyte(index))
411
+ index += 1
412
+ end
413
+
414
+ [token.completed_literal, index - start_index]
415
+ end
416
+
417
+ def scan_keyword_literal(input, index, target_keyword)
418
+ start_index = index
419
+ token = KeywordToken.new(target: target_keyword)
420
+ length = input.bytesize
421
+
422
+ while index < length && token.append_byte(input.getbyte(index))
423
+ index += 1
424
+ end
425
+
426
+ return [input.byteslice(start_index, 1), 1] if token.matched.zero?
427
+
428
+ [target_keyword, index - start_index]
429
+ end
430
+
431
+ def digit_byte?(byte)
432
+ byte.between?(48, 57)
433
+ end
434
+
435
+ def exponent_byte?(byte)
436
+ case byte
437
+ when 69, 101
438
+ true
439
+ else
440
+ false
441
+ end
442
+ end
443
+
444
+ def hex_digit_byte?(byte)
445
+ digit_byte?(byte) || byte.between?(65, 70) || byte.between?(97, 102)
446
+ end
447
+ end
448
+ end