json_completer 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -0
- data/lib/json_completer/completion_engine.rb +58 -40
- data/lib/json_completer/parser_engine.rb +83 -42
- data/lib/json_completer/scanners.rb +132 -86
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5d460af0d48e2cecf87411ba30d2a6aeac00fe38208d0222bf8b7218e373a2cc
|
|
4
|
+
data.tar.gz: 448401c51bc04e0a38fae036d3a64d94e5090846c39f69d081a8032b0b58e80a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 256f6ba460ef729a9babe9f9355f0d888c3c7f3dc64e3b4c85c2ae69ad6cb6c3a4b107aced36fe55b04e06302034b672f63c94c6db03b074a0462223eee8d5d1
|
|
7
|
+
data.tar.gz: 101f08a619d56129398b751815077897e3f557d581b76d075e41f57098e93ec92aa14d9f7edb1c88dbb8179f26d3aaf0fde3d81cfa986d2c912aa70b77294074
|
data/README.md
CHANGED
|
@@ -64,6 +64,8 @@ result3 = completer.parse('{"users": [{"name": "Alice"}, {"name": "Bob"}]}')
|
|
|
64
64
|
# => {"users" => [{"name" => "Alice"}, {"name" => "Bob"}]}
|
|
65
65
|
```
|
|
66
66
|
|
|
67
|
+
Stateful `JsonCompleter` instances assume append-only input. If earlier bytes change, create a new instance; truncation to a shorter prefix still resets state automatically.
|
|
68
|
+
|
|
67
69
|
### String Output with `.complete`
|
|
68
70
|
|
|
69
71
|
Use `.complete` when you specifically need completed JSON text instead of parsed Ruby objects:
|
|
@@ -83,6 +85,7 @@ This is the second-tier option when another layer expects JSON text and you want
|
|
|
83
85
|
- **Zero reprocessing**: Maintains parsing state to avoid reparsing previously processed data
|
|
84
86
|
- **Linear complexity**: Each chunk processed in O(n) time where n = new data size, not total size
|
|
85
87
|
- **Memory efficient**: Uses token-based accumulation with minimal state overhead
|
|
88
|
+
- **Byte-oriented string scanning**: Walks JSON input as bytes and copies contiguous non-escape string content in slices to reduce per-character overhead on long streamed strings
|
|
86
89
|
- **Context preservation**: Tracks nested structures without full document analysis
|
|
87
90
|
|
|
88
91
|
### Common Use Cases
|
|
@@ -4,29 +4,31 @@ class JsonCompleter
|
|
|
4
4
|
module CompletionEngine
|
|
5
5
|
def complete(partial_json)
|
|
6
6
|
input = partial_json
|
|
7
|
+
# Same byte-oriented trick as parse: compare ASCII JSON syntax as integers and avoid
|
|
8
|
+
# allocating transient 1-character strings in the streaming loop.
|
|
9
|
+
input_length = input.bytesize
|
|
7
10
|
|
|
8
|
-
if @state.nil? || @state.input_length >
|
|
11
|
+
if @state.nil? || @state.input_length > input_length
|
|
9
12
|
@state = ParsingState.new
|
|
10
13
|
end
|
|
11
14
|
|
|
12
15
|
return input if input.empty?
|
|
13
16
|
return input if valid_json_primitive_or_document?(input)
|
|
14
17
|
|
|
15
|
-
if @state.input_length ==
|
|
18
|
+
if @state.input_length == input_length && !@state.output_tokens.empty?
|
|
16
19
|
return finalize_completion(@state.output_tokens.dup, @state.context_stack.dup, @state.incomplete_string_token)
|
|
17
20
|
end
|
|
18
21
|
|
|
19
22
|
output_tokens = @state.output_tokens.dup
|
|
20
23
|
context_stack = @state.context_stack.dup
|
|
21
24
|
index = @state.last_index
|
|
22
|
-
length = input.length
|
|
23
25
|
incomplete_string_token = @state.incomplete_string_token
|
|
24
26
|
|
|
25
27
|
if incomplete_string_token && output_tokens.last&.start_with?('"') && output_tokens.last.end_with?('"')
|
|
26
28
|
output_tokens.pop
|
|
27
29
|
end
|
|
28
30
|
|
|
29
|
-
while index <
|
|
31
|
+
while index < input_length
|
|
30
32
|
if incomplete_string_token && index == @state.last_index
|
|
31
33
|
index, status = Scanners.scan_string(input, index, incomplete_string_token)
|
|
32
34
|
|
|
@@ -38,32 +40,16 @@ class JsonCompleter
|
|
|
38
40
|
next
|
|
39
41
|
end
|
|
40
42
|
|
|
41
|
-
|
|
43
|
+
byte = input.getbyte(index)
|
|
42
44
|
last_significant_char_in_output = get_last_significant_char(output_tokens)
|
|
43
45
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
output_tokens <<
|
|
49
|
-
context_stack << '{'
|
|
50
|
-
index += 1
|
|
51
|
-
when '['
|
|
52
|
-
ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
|
|
53
|
-
ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)
|
|
54
|
-
output_tokens << char
|
|
55
|
-
context_stack << '['
|
|
56
|
-
index += 1
|
|
57
|
-
when '}'
|
|
58
|
-
remove_trailing_comma(output_tokens)
|
|
59
|
-
output_tokens << char
|
|
60
|
-
context_stack.pop if !context_stack.empty? && context_stack.last == '{'
|
|
46
|
+
# ASCII byte values: 9/10/13/32 = whitespace, 34 = ", 44 = ,, 45 = -, 58 = :,
|
|
47
|
+
# 91/93 = [] , 102/110/116 = f/n/t, 123/125 = {}.
|
|
48
|
+
case byte
|
|
49
|
+
when 9, 10, 13, 32
|
|
50
|
+
output_tokens << input.byteslice(index, 1)
|
|
61
51
|
index += 1
|
|
62
|
-
when
|
|
63
|
-
output_tokens << char
|
|
64
|
-
context_stack.pop if !context_stack.empty? && context_stack.last == '['
|
|
65
|
-
index += 1
|
|
66
|
-
when '"'
|
|
52
|
+
when 34
|
|
67
53
|
ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
|
|
68
54
|
ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)
|
|
69
55
|
|
|
@@ -75,30 +61,62 @@ class JsonCompleter
|
|
|
75
61
|
else
|
|
76
62
|
incomplete_string_token = string_token
|
|
77
63
|
end
|
|
78
|
-
when
|
|
64
|
+
when 44
|
|
65
|
+
remove_trailing_comma(output_tokens)
|
|
66
|
+
output_tokens << ','
|
|
67
|
+
index += 1
|
|
68
|
+
when 45, 48..57
|
|
69
|
+
ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
|
|
70
|
+
ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)
|
|
71
|
+
|
|
72
|
+
num_str, consumed = Scanners.scan_number_literal(input, index)
|
|
73
|
+
output_tokens << num_str
|
|
74
|
+
index += consumed
|
|
75
|
+
when 58
|
|
79
76
|
remove_trailing_comma(output_tokens) if last_significant_char_in_output == ','
|
|
80
|
-
output_tokens <<
|
|
77
|
+
output_tokens << ':'
|
|
81
78
|
index += 1
|
|
82
|
-
when
|
|
83
|
-
|
|
84
|
-
output_tokens
|
|
79
|
+
when 91
|
|
80
|
+
ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
|
|
81
|
+
ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)
|
|
82
|
+
output_tokens << '['
|
|
83
|
+
context_stack << '['
|
|
84
|
+
index += 1
|
|
85
|
+
when 93
|
|
86
|
+
output_tokens << ']'
|
|
87
|
+
context_stack.pop if !context_stack.empty? && context_stack.last == '['
|
|
85
88
|
index += 1
|
|
86
|
-
when
|
|
89
|
+
when 102
|
|
87
90
|
ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
|
|
88
91
|
ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)
|
|
89
92
|
|
|
90
|
-
keyword_val, consumed = Scanners.scan_keyword_literal(input, index, KEYWORD_MAP[
|
|
93
|
+
keyword_val, consumed = Scanners.scan_keyword_literal(input, index, KEYWORD_MAP['f'])
|
|
91
94
|
output_tokens << keyword_val
|
|
92
95
|
index += consumed
|
|
93
|
-
when
|
|
96
|
+
when 110
|
|
94
97
|
ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
|
|
95
98
|
ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)
|
|
96
99
|
|
|
97
|
-
|
|
98
|
-
output_tokens <<
|
|
100
|
+
keyword_val, consumed = Scanners.scan_keyword_literal(input, index, KEYWORD_MAP['n'])
|
|
101
|
+
output_tokens << keyword_val
|
|
102
|
+
index += consumed
|
|
103
|
+
when 116
|
|
104
|
+
ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
|
|
105
|
+
ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)
|
|
106
|
+
|
|
107
|
+
keyword_val, consumed = Scanners.scan_keyword_literal(input, index, KEYWORD_MAP['t'])
|
|
108
|
+
output_tokens << keyword_val
|
|
99
109
|
index += consumed
|
|
100
|
-
when
|
|
101
|
-
output_tokens
|
|
110
|
+
when 123
|
|
111
|
+
ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
|
|
112
|
+
ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)
|
|
113
|
+
output_tokens << '{'
|
|
114
|
+
context_stack << '{'
|
|
115
|
+
index += 1
|
|
116
|
+
when 125
|
|
117
|
+
remove_trailing_comma(output_tokens)
|
|
118
|
+
output_tokens << '}'
|
|
119
|
+
context_stack.pop if !context_stack.empty? && context_stack.last == '{'
|
|
102
120
|
index += 1
|
|
103
121
|
else
|
|
104
122
|
index += 1
|
|
@@ -109,7 +127,7 @@ class JsonCompleter
|
|
|
109
127
|
output_tokens: output_tokens,
|
|
110
128
|
context_stack: context_stack,
|
|
111
129
|
last_index: index,
|
|
112
|
-
input_length:
|
|
130
|
+
input_length: input_length,
|
|
113
131
|
incomplete_string_token: incomplete_string_token
|
|
114
132
|
)
|
|
115
133
|
|
|
@@ -4,72 +4,79 @@ class JsonCompleter
|
|
|
4
4
|
module ParserEngine
|
|
5
5
|
def parse(partial_json)
|
|
6
6
|
input = partial_json
|
|
7
|
+
# The hot path works on raw bytes, not 1-character Ruby strings. JSON punctuation is ASCII,
|
|
8
|
+
# so getbyte/bytesize let us compare cheap integers while multibyte UTF-8 payload stays intact.
|
|
9
|
+
input_length = input.bytesize
|
|
7
10
|
|
|
8
11
|
if @parse_state.nil? ||
|
|
9
|
-
@parse_state.input_length >
|
|
10
|
-
(@parse_state.
|
|
12
|
+
@parse_state.input_length > input_length ||
|
|
13
|
+
(@parse_state.input_length < input_length && reset_parse_state_for_input_growth?(input))
|
|
14
|
+
@parse_state = self.class.new_parse_state
|
|
15
|
+
elsif @parse_state.input_length == input_length
|
|
16
|
+
if @parse_state.input_snapshot == input
|
|
17
|
+
finalize_parse_result
|
|
18
|
+
return @parse_state.root
|
|
19
|
+
end
|
|
20
|
+
|
|
11
21
|
@parse_state = self.class.new_parse_state
|
|
12
22
|
end
|
|
13
23
|
|
|
14
24
|
return nil if input.empty?
|
|
15
25
|
|
|
16
26
|
begin
|
|
17
|
-
if @parse_state.input_length == input.length
|
|
18
|
-
finalize_parse_result
|
|
19
|
-
return @parse_state.root
|
|
20
|
-
end
|
|
21
|
-
|
|
22
27
|
prepare_parse_state_for_incremental_input
|
|
23
28
|
|
|
24
29
|
index = @parse_state.last_index
|
|
25
|
-
while index <
|
|
30
|
+
while index < input_length
|
|
26
31
|
if @parse_state.token_state
|
|
27
32
|
index = continue_parse_token(input, index)
|
|
28
33
|
next
|
|
29
34
|
end
|
|
30
35
|
|
|
31
|
-
|
|
32
|
-
if top_level_value_complete? &&
|
|
36
|
+
byte = input.getbyte(index)
|
|
37
|
+
if top_level_value_complete? && !whitespace_byte?(byte)
|
|
33
38
|
raise ParseError, 'unexpected token after top-level value'
|
|
34
39
|
end
|
|
35
40
|
|
|
36
|
-
|
|
37
|
-
|
|
41
|
+
# ASCII byte values: 9/10/13/32 = whitespace, 34 = ", 44 = ,, 45 = -, 58 = :,
|
|
42
|
+
# 91/93 = [] , 102/110/116 = f/n/t, 123/125 = {}.
|
|
43
|
+
case byte
|
|
44
|
+
when 9, 10, 13, 32
|
|
38
45
|
index += 1
|
|
39
|
-
when
|
|
40
|
-
|
|
46
|
+
when 34
|
|
47
|
+
start_parse_string_token
|
|
41
48
|
index += 1
|
|
42
|
-
when
|
|
43
|
-
|
|
49
|
+
when 44
|
|
50
|
+
parse_comma!
|
|
44
51
|
index += 1
|
|
45
|
-
when
|
|
46
|
-
|
|
52
|
+
when 45, 48..57
|
|
53
|
+
start_parse_number_token(byte)
|
|
47
54
|
index += 1
|
|
48
|
-
when
|
|
49
|
-
|
|
55
|
+
when 58
|
|
56
|
+
parse_colon!
|
|
50
57
|
index += 1
|
|
51
|
-
when
|
|
52
|
-
|
|
58
|
+
when 91
|
|
59
|
+
start_parse_container([])
|
|
53
60
|
index += 1
|
|
54
|
-
when
|
|
55
|
-
|
|
61
|
+
when 93
|
|
62
|
+
close_parse_array!
|
|
56
63
|
index += 1
|
|
57
|
-
when
|
|
58
|
-
|
|
64
|
+
when 102, 110, 116
|
|
65
|
+
start_parse_keyword_token(byte)
|
|
59
66
|
index += 1
|
|
60
|
-
when
|
|
61
|
-
|
|
67
|
+
when 123
|
|
68
|
+
start_parse_container({})
|
|
62
69
|
index += 1
|
|
63
|
-
when
|
|
64
|
-
|
|
70
|
+
when 125
|
|
71
|
+
close_parse_object!
|
|
65
72
|
index += 1
|
|
66
73
|
else
|
|
67
|
-
raise ParseError, "unexpected token #{
|
|
74
|
+
raise ParseError, "unexpected token #{input.byteslice(index, 1).inspect}"
|
|
68
75
|
end
|
|
69
76
|
end
|
|
70
77
|
|
|
71
78
|
@parse_state.last_index = index
|
|
72
|
-
@parse_state.input_length =
|
|
79
|
+
@parse_state.input_length = input_length
|
|
73
80
|
@parse_state.input_snapshot = input
|
|
74
81
|
finalize_parse_result
|
|
75
82
|
@parse_state.root
|
|
@@ -172,10 +179,10 @@ class JsonCompleter
|
|
|
172
179
|
@parse_state.token_state = nil
|
|
173
180
|
end
|
|
174
181
|
|
|
175
|
-
def start_parse_number_token(
|
|
182
|
+
def start_parse_number_token(first_byte)
|
|
176
183
|
slot = parse_value_slot!
|
|
177
184
|
token = Scanners::NumberToken.new(slot: slot)
|
|
178
|
-
token.
|
|
185
|
+
token.append_byte(first_byte)
|
|
179
186
|
assign_parse_slot(slot, token.parsed_value)
|
|
180
187
|
transition_after_parse_value(slot)
|
|
181
188
|
@parse_state.token_state = token
|
|
@@ -183,21 +190,22 @@ class JsonCompleter
|
|
|
183
190
|
|
|
184
191
|
def continue_parse_number_token(input, index)
|
|
185
192
|
token = @parse_state.token_state
|
|
193
|
+
length = input.bytesize
|
|
186
194
|
|
|
187
|
-
while index <
|
|
195
|
+
while index < length && token.append_byte(input.getbyte(index))
|
|
188
196
|
assign_parse_slot(token.slot, token.parsed_value)
|
|
189
197
|
index += 1
|
|
190
198
|
end
|
|
191
199
|
|
|
192
200
|
raise ParseError, 'invalid number literal' if token.invalid?
|
|
193
201
|
|
|
194
|
-
@parse_state.token_state = nil if index <
|
|
202
|
+
@parse_state.token_state = nil if index < length
|
|
195
203
|
index
|
|
196
204
|
end
|
|
197
205
|
|
|
198
|
-
def start_parse_keyword_token(
|
|
206
|
+
def start_parse_keyword_token(first_byte)
|
|
199
207
|
slot = parse_value_slot!
|
|
200
|
-
token = Scanners::KeywordToken.new(slot: slot, target:
|
|
208
|
+
token = Scanners::KeywordToken.new(slot: slot, target: keyword_target_for_byte(first_byte), matched: 1)
|
|
201
209
|
assign_parse_slot(slot, token.parsed_value)
|
|
202
210
|
transition_after_parse_value(slot)
|
|
203
211
|
@parse_state.token_state = token
|
|
@@ -205,14 +213,15 @@ class JsonCompleter
|
|
|
205
213
|
|
|
206
214
|
def continue_parse_keyword_token(input, index)
|
|
207
215
|
token = @parse_state.token_state
|
|
216
|
+
length = input.bytesize
|
|
208
217
|
|
|
209
|
-
while index <
|
|
218
|
+
while index < length && token.matched < token.target.length && token.append_byte(input.getbyte(index))
|
|
210
219
|
index += 1
|
|
211
220
|
end
|
|
212
221
|
|
|
213
|
-
raise ParseError, 'invalid keyword literal' if token.matched < token.target.length && index <
|
|
222
|
+
raise ParseError, 'invalid keyword literal' if token.matched < token.target.length && index < length
|
|
214
223
|
|
|
215
|
-
@parse_state.token_state = nil if index <
|
|
224
|
+
@parse_state.token_state = nil if index < length || token.matched == token.target.length
|
|
216
225
|
index
|
|
217
226
|
end
|
|
218
227
|
|
|
@@ -238,7 +247,6 @@ class JsonCompleter
|
|
|
238
247
|
|
|
239
248
|
context.mode = :key_or_end
|
|
240
249
|
context.current_key = nil
|
|
241
|
-
|
|
242
250
|
end
|
|
243
251
|
end
|
|
244
252
|
|
|
@@ -339,6 +347,39 @@ class JsonCompleter
|
|
|
339
347
|
token.visible_key_replaced_value = token.context.container[current_key]
|
|
340
348
|
token.context.container[current_key] = nil
|
|
341
349
|
end
|
|
350
|
+
|
|
351
|
+
def reset_parse_state_for_input_growth?(input)
|
|
352
|
+
return false unless @parse_state.input_snapshot
|
|
353
|
+
return false unless prefix_validation_required?
|
|
354
|
+
|
|
355
|
+
!input.start_with?(@parse_state.input_snapshot)
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
def prefix_validation_required?
|
|
359
|
+
@parse_state.context_stack.empty?
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
def keyword_target_for_byte(byte)
|
|
363
|
+
case byte
|
|
364
|
+
when 102
|
|
365
|
+
'false'
|
|
366
|
+
when 110
|
|
367
|
+
'null'
|
|
368
|
+
when 116
|
|
369
|
+
'true'
|
|
370
|
+
else
|
|
371
|
+
raise ParseError, "unexpected keyword token byte: #{byte}"
|
|
372
|
+
end
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
def whitespace_byte?(byte)
|
|
376
|
+
case byte
|
|
377
|
+
when 9, 10, 13, 32
|
|
378
|
+
true
|
|
379
|
+
else
|
|
380
|
+
false
|
|
381
|
+
end
|
|
382
|
+
end
|
|
342
383
|
end
|
|
343
384
|
|
|
344
385
|
include ParserEngine
|
|
@@ -14,15 +14,16 @@ class JsonCompleter
|
|
|
14
14
|
self.escape_state = :backslash
|
|
15
15
|
end
|
|
16
16
|
|
|
17
|
-
def
|
|
18
|
-
buffer <<
|
|
17
|
+
def append_slice(input, start_index, length)
|
|
18
|
+
buffer << input.byteslice(start_index, length)
|
|
19
19
|
end
|
|
20
20
|
|
|
21
|
-
|
|
22
|
-
|
|
21
|
+
# completion keeps escape bytes verbatim, so convert the ASCII byte back into a 1-byte string.
|
|
22
|
+
def append_simple_escape(byte)
|
|
23
|
+
buffer << byte.chr(Encoding::UTF_8)
|
|
23
24
|
end
|
|
24
25
|
|
|
25
|
-
def valid_simple_escape?(
|
|
26
|
+
def valid_simple_escape?(_byte)
|
|
26
27
|
true
|
|
27
28
|
end
|
|
28
29
|
|
|
@@ -31,9 +32,9 @@ class JsonCompleter
|
|
|
31
32
|
buffer << 'u'
|
|
32
33
|
end
|
|
33
34
|
|
|
34
|
-
def append_unicode_digit(
|
|
35
|
-
unicode_digits <<
|
|
36
|
-
buffer <<
|
|
35
|
+
def append_unicode_digit(byte)
|
|
36
|
+
unicode_digits << byte
|
|
37
|
+
buffer << byte.chr(Encoding::UTF_8)
|
|
37
38
|
end
|
|
38
39
|
|
|
39
40
|
def finish_unicode_escape!; end
|
|
@@ -84,37 +85,43 @@ class JsonCompleter
|
|
|
84
85
|
self.escape_state = :backslash
|
|
85
86
|
end
|
|
86
87
|
|
|
87
|
-
def
|
|
88
|
-
buffer <<
|
|
88
|
+
def append_slice(input, start_index, length)
|
|
89
|
+
buffer << input.byteslice(start_index, length)
|
|
89
90
|
end
|
|
90
91
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
92
|
+
# ASCII escape bytes: 98/102/110/114/116 = b/f/n/r/t.
|
|
93
|
+
def append_simple_escape(byte)
|
|
94
|
+
buffer << case byte
|
|
95
|
+
when 98
|
|
94
96
|
"\b"
|
|
95
|
-
when
|
|
97
|
+
when 102
|
|
96
98
|
"\f"
|
|
97
|
-
when
|
|
99
|
+
when 110
|
|
98
100
|
"\n"
|
|
99
|
-
when
|
|
101
|
+
when 114
|
|
100
102
|
"\r"
|
|
101
|
-
when
|
|
103
|
+
when 116
|
|
102
104
|
"\t"
|
|
103
105
|
else
|
|
104
|
-
|
|
106
|
+
byte
|
|
105
107
|
end
|
|
106
108
|
end
|
|
107
109
|
|
|
108
|
-
def valid_simple_escape?(
|
|
109
|
-
|
|
110
|
+
def valid_simple_escape?(byte)
|
|
111
|
+
case byte
|
|
112
|
+
when 34, 92, 47, 98, 102, 110, 114, 116
|
|
113
|
+
true
|
|
114
|
+
else
|
|
115
|
+
false
|
|
116
|
+
end
|
|
110
117
|
end
|
|
111
118
|
|
|
112
119
|
def start_unicode_escape!
|
|
113
120
|
self.unicode_digits = String.new
|
|
114
121
|
end
|
|
115
122
|
|
|
116
|
-
def append_unicode_digit(
|
|
117
|
-
unicode_digits <<
|
|
123
|
+
def append_unicode_digit(byte)
|
|
124
|
+
unicode_digits << byte
|
|
118
125
|
end
|
|
119
126
|
|
|
120
127
|
def finish_unicode_escape!
|
|
@@ -160,94 +167,97 @@ class JsonCompleter
|
|
|
160
167
|
self.raw ||= String.new
|
|
161
168
|
end
|
|
162
169
|
|
|
163
|
-
|
|
170
|
+
# append_byte consumes ASCII bytes, not 1-character strings:
|
|
171
|
+
# 45 = -, 46 = ., 48..57 = 0..9, 69/101 = E/e.
|
|
172
|
+
def append_byte(byte)
|
|
164
173
|
case phase
|
|
165
174
|
when nil
|
|
166
|
-
case
|
|
167
|
-
when
|
|
168
|
-
raw <<
|
|
175
|
+
case byte
|
|
176
|
+
when 45
|
|
177
|
+
raw << byte
|
|
169
178
|
self.phase = :sign
|
|
170
|
-
when
|
|
171
|
-
raw <<
|
|
179
|
+
when 48
|
|
180
|
+
raw << byte
|
|
172
181
|
self.phase = :zero
|
|
173
|
-
when
|
|
174
|
-
raw <<
|
|
182
|
+
when 49..57
|
|
183
|
+
raw << byte
|
|
175
184
|
self.phase = :int
|
|
176
185
|
else
|
|
177
186
|
return false
|
|
178
187
|
end
|
|
179
188
|
when :sign
|
|
180
|
-
case
|
|
181
|
-
when
|
|
182
|
-
raw <<
|
|
189
|
+
case byte
|
|
190
|
+
when 48
|
|
191
|
+
raw << byte
|
|
183
192
|
self.phase = :zero
|
|
184
|
-
when
|
|
185
|
-
raw <<
|
|
193
|
+
when 49..57
|
|
194
|
+
raw << byte
|
|
186
195
|
self.phase = :int
|
|
187
|
-
when
|
|
188
|
-
raw <<
|
|
196
|
+
when 46
|
|
197
|
+
raw << byte
|
|
189
198
|
self.phase = :frac_start
|
|
190
199
|
else
|
|
191
200
|
return false
|
|
192
201
|
end
|
|
193
202
|
when :zero
|
|
194
|
-
if
|
|
203
|
+
if Scanners.digit_byte?(byte)
|
|
195
204
|
self.invalid = true
|
|
196
205
|
return false
|
|
197
|
-
elsif
|
|
198
|
-
raw <<
|
|
206
|
+
elsif byte == 46
|
|
207
|
+
raw << byte
|
|
199
208
|
self.phase = :frac_start
|
|
200
|
-
elsif
|
|
201
|
-
raw <<
|
|
209
|
+
elsif Scanners.exponent_byte?(byte)
|
|
210
|
+
raw << byte
|
|
202
211
|
self.phase = :exp_start
|
|
203
212
|
else
|
|
204
213
|
return false
|
|
205
214
|
end
|
|
206
215
|
when :int
|
|
207
|
-
if
|
|
208
|
-
raw <<
|
|
209
|
-
elsif
|
|
210
|
-
raw <<
|
|
216
|
+
if Scanners.digit_byte?(byte)
|
|
217
|
+
raw << byte
|
|
218
|
+
elsif byte == 46
|
|
219
|
+
raw << byte
|
|
211
220
|
self.phase = :frac_start
|
|
212
|
-
elsif
|
|
213
|
-
raw <<
|
|
221
|
+
elsif Scanners.exponent_byte?(byte)
|
|
222
|
+
raw << byte
|
|
214
223
|
self.phase = :exp_start
|
|
215
224
|
else
|
|
216
225
|
return false
|
|
217
226
|
end
|
|
218
227
|
when :frac_start
|
|
219
|
-
return false unless
|
|
228
|
+
return false unless Scanners.digit_byte?(byte)
|
|
220
229
|
|
|
221
|
-
raw <<
|
|
230
|
+
raw << byte
|
|
222
231
|
self.phase = :frac
|
|
223
232
|
when :frac
|
|
224
|
-
if
|
|
225
|
-
raw <<
|
|
226
|
-
elsif
|
|
227
|
-
raw <<
|
|
233
|
+
if Scanners.digit_byte?(byte)
|
|
234
|
+
raw << byte
|
|
235
|
+
elsif Scanners.exponent_byte?(byte)
|
|
236
|
+
raw << byte
|
|
228
237
|
self.phase = :exp_start
|
|
229
238
|
else
|
|
230
239
|
return false
|
|
231
240
|
end
|
|
232
241
|
when :exp_start
|
|
233
|
-
|
|
234
|
-
|
|
242
|
+
case byte
|
|
243
|
+
when 43, 45
|
|
244
|
+
raw << byte
|
|
235
245
|
self.phase = :exp_sign
|
|
236
|
-
|
|
237
|
-
raw <<
|
|
246
|
+
when 48..57
|
|
247
|
+
raw << byte
|
|
238
248
|
self.phase = :exp
|
|
239
249
|
else
|
|
240
250
|
return false
|
|
241
251
|
end
|
|
242
252
|
when :exp_sign
|
|
243
|
-
return false unless
|
|
253
|
+
return false unless Scanners.digit_byte?(byte)
|
|
244
254
|
|
|
245
|
-
raw <<
|
|
255
|
+
raw << byte
|
|
246
256
|
self.phase = :exp
|
|
247
257
|
when :exp
|
|
248
|
-
return false unless
|
|
258
|
+
return false unless Scanners.digit_byte?(byte)
|
|
249
259
|
|
|
250
|
-
raw <<
|
|
260
|
+
raw << byte
|
|
251
261
|
end
|
|
252
262
|
|
|
253
263
|
true
|
|
@@ -285,9 +295,9 @@ class JsonCompleter
|
|
|
285
295
|
super
|
|
286
296
|
end
|
|
287
297
|
|
|
288
|
-
def
|
|
298
|
+
def append_byte(byte)
|
|
289
299
|
return false if matched >= target.length
|
|
290
|
-
return false unless
|
|
300
|
+
return false unless (byte | 0x20) == target.getbyte(matched)
|
|
291
301
|
|
|
292
302
|
self.matched += 1
|
|
293
303
|
true
|
|
@@ -307,13 +317,17 @@ class JsonCompleter
|
|
|
307
317
|
|
|
308
318
|
def scan_string(input, index, token)
|
|
309
319
|
strict = token.is_a?(ParsedStringToken)
|
|
320
|
+
# JSON string syntax is ASCII, so scanning bytes is safe here: multibyte UTF-8 content is
|
|
321
|
+
# treated as opaque payload and copied via byteslice until we hit an ASCII delimiter/escape.
|
|
322
|
+
length = input.bytesize
|
|
323
|
+
segment_start = index
|
|
310
324
|
|
|
311
|
-
while index <
|
|
312
|
-
|
|
325
|
+
while index < length
|
|
326
|
+
byte = input.getbyte(index)
|
|
313
327
|
|
|
314
328
|
if token.unicode_digits
|
|
315
|
-
if
|
|
316
|
-
token.append_unicode_digit(
|
|
329
|
+
if hex_digit_byte?(byte)
|
|
330
|
+
token.append_unicode_digit(byte)
|
|
317
331
|
index += 1
|
|
318
332
|
|
|
319
333
|
if token.unicode_digits.length == 4
|
|
@@ -323,6 +337,7 @@ class JsonCompleter
|
|
|
323
337
|
return [index, :invalid_unicode] if status == :invalid_unicode
|
|
324
338
|
end
|
|
325
339
|
|
|
340
|
+
segment_start = index
|
|
326
341
|
next
|
|
327
342
|
end
|
|
328
343
|
|
|
@@ -332,54 +347,67 @@ class JsonCompleter
|
|
|
332
347
|
end
|
|
333
348
|
|
|
334
349
|
if token.escape_state == :backslash
|
|
335
|
-
if strict && token.pending_high_surrogate &&
|
|
350
|
+
if strict && token.pending_high_surrogate && byte != 117
|
|
336
351
|
return [index, :invalid_unicode]
|
|
337
352
|
end
|
|
338
353
|
|
|
339
|
-
if
|
|
354
|
+
if byte == 117
|
|
340
355
|
token.start_unicode_escape!
|
|
341
356
|
index += 1
|
|
357
|
+
segment_start = index
|
|
342
358
|
next
|
|
343
359
|
end
|
|
344
360
|
|
|
345
|
-
return [index, :invalid_escape] unless token.valid_simple_escape?(
|
|
361
|
+
return [index, :invalid_escape] unless token.valid_simple_escape?(byte)
|
|
346
362
|
|
|
347
|
-
token.append_simple_escape(
|
|
363
|
+
token.append_simple_escape(byte)
|
|
348
364
|
token.escape_state = nil
|
|
349
365
|
index += 1
|
|
366
|
+
segment_start = index
|
|
350
367
|
next
|
|
351
368
|
end
|
|
352
369
|
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
370
|
+
if strict && token.pending_high_surrogate && byte != 92
|
|
371
|
+
return [index, :invalid_unicode]
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
if byte == 34
|
|
375
|
+
token.append_slice(input, segment_start, index - segment_start) if index > segment_start
|
|
376
|
+
|
|
358
377
|
if strict && token.pending_high_surrogate
|
|
359
378
|
return [index, :invalid_unicode]
|
|
360
379
|
end
|
|
361
380
|
|
|
362
381
|
token.terminate!
|
|
363
382
|
return [index + 1, :terminated]
|
|
364
|
-
|
|
365
|
-
if strict
|
|
366
|
-
return [index, :invalid_control_character] if char.ord < 0x20
|
|
367
|
-
return [index, :invalid_unicode] if token.pending_high_surrogate
|
|
368
|
-
end
|
|
383
|
+
end
|
|
369
384
|
|
|
370
|
-
|
|
385
|
+
if byte == 92
|
|
386
|
+
token.append_slice(input, segment_start, index - segment_start) if index > segment_start
|
|
387
|
+
token.start_escape!
|
|
371
388
|
index += 1
|
|
389
|
+
segment_start = index
|
|
390
|
+
next
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
if strict && byte < 0x20
|
|
394
|
+
token.append_slice(input, segment_start, index - segment_start) if index > segment_start
|
|
395
|
+
return [index, :invalid_control_character]
|
|
372
396
|
end
|
|
397
|
+
|
|
398
|
+
index += 1
|
|
373
399
|
end
|
|
374
400
|
|
|
401
|
+
token.append_slice(input, segment_start, index - segment_start) if index > segment_start
|
|
375
402
|
[index, :incomplete]
|
|
376
403
|
end
|
|
377
404
|
|
|
378
405
|
def scan_number_literal(input, index)
|
|
379
406
|
start_index = index
|
|
380
407
|
token = NumberToken.new
|
|
408
|
+
length = input.bytesize
|
|
381
409
|
|
|
382
|
-
while index <
|
|
410
|
+
while index < length && token.append_byte(input.getbyte(index))
|
|
383
411
|
index += 1
|
|
384
412
|
end
|
|
385
413
|
|
|
@@ -389,14 +417,32 @@ class JsonCompleter
|
|
|
389
417
|
def scan_keyword_literal(input, index, target_keyword)
|
|
390
418
|
start_index = index
|
|
391
419
|
token = KeywordToken.new(target: target_keyword)
|
|
420
|
+
length = input.bytesize
|
|
392
421
|
|
|
393
|
-
while index <
|
|
422
|
+
while index < length && token.append_byte(input.getbyte(index))
|
|
394
423
|
index += 1
|
|
395
424
|
end
|
|
396
425
|
|
|
397
|
-
return [input
|
|
426
|
+
return [input.byteslice(start_index, 1), 1] if token.matched.zero?
|
|
398
427
|
|
|
399
428
|
[target_keyword, index - start_index]
|
|
400
429
|
end
|
|
430
|
+
|
|
431
|
+
def digit_byte?(byte)
|
|
432
|
+
byte.between?(48, 57)
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
def exponent_byte?(byte)
|
|
436
|
+
case byte
|
|
437
|
+
when 69, 101
|
|
438
|
+
true
|
|
439
|
+
else
|
|
440
|
+
false
|
|
441
|
+
end
|
|
442
|
+
end
|
|
443
|
+
|
|
444
|
+
def hex_digit_byte?(byte)
|
|
445
|
+
digit_byte?(byte) || byte.between?(65, 70) || byte.between?(97, 102)
|
|
446
|
+
end
|
|
401
447
|
end
|
|
402
448
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: json_completer
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Aha! (www.aha.io)
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-03-
|
|
11
|
+
date: 2026-03-15 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rspec
|