json_completer 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +38 -21
- data/lib/json_completer/completion_engine.rb +241 -0
- data/lib/json_completer/parser_engine.rb +386 -0
- data/lib/json_completer/scanners.rb +448 -0
- data/lib/json_completer.rb +36 -688
- metadata +5 -2
data/lib/json_completer.rb
CHANGED
|
@@ -1,726 +1,74 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'set'
|
|
3
4
|
require 'stringio'
|
|
4
5
|
|
|
5
|
-
# JsonCompleter attempts to turn partial JSON strings into valid JSON.
|
|
6
|
-
# It handles incomplete primitives, missing values, and unclosed structures.
|
|
7
6
|
class JsonCompleter
|
|
7
|
+
ParseError = Class.new(StandardError)
|
|
8
|
+
|
|
8
9
|
STRUCTURE_CHARS = ['[', '{', ',', ':'].to_set.freeze
|
|
9
10
|
KEYWORD_MAP = { 't' => 'true', 'f' => 'false', 'n' => 'null' }.freeze
|
|
10
11
|
VALID_PRIMITIVES = %w[true false null].to_set.freeze
|
|
11
12
|
|
|
12
|
-
# Parsing state for incremental processing
|
|
13
13
|
ParsingState = Struct.new(
|
|
14
|
-
:output_tokens, :context_stack, :last_index, :input_length,
|
|
15
|
-
:
|
|
16
|
-
:incomplete_string_escape_state, keyword_init: true
|
|
14
|
+
:output_tokens, :context_stack, :last_index, :input_length, :incomplete_string_token,
|
|
15
|
+
keyword_init: true
|
|
17
16
|
) do
|
|
18
17
|
def initialize(
|
|
19
|
-
output_tokens: [], context_stack: [], last_index: 0, input_length: 0,
|
|
20
|
-
incomplete_string_start: nil, incomplete_string_buffer: nil,
|
|
21
|
-
incomplete_string_escape_state: nil
|
|
18
|
+
output_tokens: [], context_stack: [], last_index: 0, input_length: 0, incomplete_string_token: nil
|
|
22
19
|
)
|
|
23
20
|
super
|
|
24
21
|
end
|
|
25
22
|
end
|
|
26
23
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
def initialize(state = self.class.new_state)
|
|
37
|
-
@state = state
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
# Incrementally completes JSON using previous parsing state to avoid reprocessing.
|
|
41
|
-
#
|
|
42
|
-
# @param partial_json [String] The current partial JSON string (full accumulated input).
|
|
43
|
-
# @return [String] Completed JSON.
|
|
44
|
-
def complete(partial_json)
|
|
45
|
-
input = partial_json
|
|
46
|
-
|
|
47
|
-
# Initialize or reuse state
|
|
48
|
-
if @state.nil? || @state.input_length > input.length
|
|
49
|
-
# Fresh start or input was truncated - start over
|
|
50
|
-
@state = ParsingState.new
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
return input if input.empty?
|
|
54
|
-
return input if valid_json_primitive_or_document?(input)
|
|
55
|
-
|
|
56
|
-
# If input hasn't grown since last time, just return completed version of existing state
|
|
57
|
-
if @state.input_length == input.length && !@state.output_tokens.empty?
|
|
58
|
-
return finalize_completion(@state.output_tokens.dup, @state.context_stack.dup)
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
# Handle incomplete string from previous state
|
|
62
|
-
output_tokens = @state.output_tokens.dup
|
|
63
|
-
context_stack = @state.context_stack.dup
|
|
64
|
-
index = @state.last_index
|
|
65
|
-
length = input.length
|
|
66
|
-
incomplete_string_start = nil
|
|
67
|
-
incomplete_string_buffer = nil
|
|
68
|
-
incomplete_string_escape_state = nil
|
|
69
|
-
|
|
70
|
-
# If we had an incomplete string, continue from where we left off
|
|
71
|
-
if @state.incomplete_string_start
|
|
72
|
-
incomplete_string_start = @state.incomplete_string_start
|
|
73
|
-
incomplete_string_buffer = @state.incomplete_string_buffer || StringIO.new('"')
|
|
74
|
-
incomplete_string_escape_state = @state.incomplete_string_escape_state
|
|
75
|
-
# Remove the auto-completed string from output_tokens since we'll add the real one
|
|
76
|
-
output_tokens.pop if output_tokens.last&.start_with?('"') && output_tokens.last.end_with?('"')
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
# Process from the current index
|
|
80
|
-
while index < length
|
|
81
|
-
# Special case: continuing an incomplete string
|
|
82
|
-
if incomplete_string_buffer && index == @state.last_index
|
|
83
|
-
str_value, new_index, terminated, new_buffer, new_escape_state = continue_parsing_string(
|
|
84
|
-
input, incomplete_string_buffer, incomplete_string_escape_state
|
|
85
|
-
)
|
|
86
|
-
if terminated
|
|
87
|
-
output_tokens << str_value
|
|
88
|
-
incomplete_string_start = nil
|
|
89
|
-
incomplete_string_buffer = nil
|
|
90
|
-
incomplete_string_escape_state = nil
|
|
91
|
-
# Continue processing from where string ended
|
|
92
|
-
index = new_index
|
|
93
|
-
else
|
|
94
|
-
# String still incomplete, save state
|
|
95
|
-
incomplete_string_buffer = new_buffer
|
|
96
|
-
incomplete_string_escape_state = new_escape_state
|
|
97
|
-
# We've consumed everything
|
|
98
|
-
index = length
|
|
99
|
-
end
|
|
100
|
-
next
|
|
101
|
-
end
|
|
102
|
-
|
|
103
|
-
char = input[index]
|
|
104
|
-
last_significant_char_in_output = get_last_significant_char(output_tokens)
|
|
105
|
-
|
|
106
|
-
case char
|
|
107
|
-
when '{'
|
|
108
|
-
ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
|
|
109
|
-
ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)
|
|
110
|
-
output_tokens << char
|
|
111
|
-
context_stack << '{'
|
|
112
|
-
index += 1
|
|
113
|
-
when '['
|
|
114
|
-
ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
|
|
115
|
-
ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)
|
|
116
|
-
output_tokens << char
|
|
117
|
-
context_stack << '['
|
|
118
|
-
index += 1
|
|
119
|
-
when '}'
|
|
120
|
-
# Do not repair missing object values - preserve invalid JSON
|
|
121
|
-
remove_trailing_comma(output_tokens)
|
|
122
|
-
output_tokens << char
|
|
123
|
-
context_stack.pop if !context_stack.empty? && context_stack.last == '{'
|
|
124
|
-
index += 1
|
|
125
|
-
when ']'
|
|
126
|
-
# Do not repair trailing commas in arrays - preserve invalid JSON
|
|
127
|
-
output_tokens << char
|
|
128
|
-
context_stack.pop if !context_stack.empty? && context_stack.last == '['
|
|
129
|
-
index += 1
|
|
130
|
-
when '"' # Start of a string (key or value)
|
|
131
|
-
# Start of a new string (incomplete strings are handled at the top of the loop)
|
|
132
|
-
ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
|
|
133
|
-
ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)
|
|
134
|
-
|
|
135
|
-
string_start_index = index
|
|
136
|
-
str_value, consumed, terminated, new_buffer, new_escape_state = parse_string_with_state(input, index)
|
|
137
|
-
|
|
138
|
-
if terminated
|
|
139
|
-
output_tokens << str_value
|
|
140
|
-
incomplete_string_start = nil
|
|
141
|
-
incomplete_string_buffer = nil
|
|
142
|
-
incomplete_string_escape_state = nil
|
|
143
|
-
else
|
|
144
|
-
# String incomplete, save state for next call
|
|
145
|
-
# Don't add to output_tokens yet - will be added during finalization
|
|
146
|
-
incomplete_string_start = string_start_index
|
|
147
|
-
incomplete_string_buffer = new_buffer
|
|
148
|
-
incomplete_string_escape_state = new_escape_state
|
|
149
|
-
end
|
|
150
|
-
index += consumed
|
|
151
|
-
when ':'
|
|
152
|
-
# If the char before ':' was a comma, it's likely {"a":1, :"b":2} which is invalid.
|
|
153
|
-
# Or if it was an opening brace/bracket.
|
|
154
|
-
# Standard JSON doesn't allow this, but we aim to fix.
|
|
155
|
-
# A colon should typically follow a string key.
|
|
156
|
-
# If last char in output was a comma, remove it.
|
|
157
|
-
remove_trailing_comma(output_tokens) if last_significant_char_in_output == ','
|
|
158
|
-
output_tokens << char
|
|
159
|
-
index += 1
|
|
160
|
-
when ','
|
|
161
|
-
# Handle cases like `[,` or `{,` or `,,` but do NOT repair `{"key":,` (missing object values)
|
|
162
|
-
# if last_significant_char_in_output && STRUCTURE_CHARS.include?(last_significant_char_in_output) && last_significant_char_in_output != ':'
|
|
163
|
-
# output_tokens << 'null'
|
|
164
|
-
# end
|
|
165
|
-
remove_trailing_comma(output_tokens) # Avoid double commas
|
|
166
|
-
output_tokens << char
|
|
167
|
-
index += 1
|
|
168
|
-
when 't', 'f', 'n' # true, false, null
|
|
169
|
-
ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
|
|
170
|
-
ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)
|
|
171
|
-
|
|
172
|
-
keyword_val, consumed = consume_and_complete_keyword(input, index, KEYWORD_MAP[char.downcase])
|
|
173
|
-
output_tokens << keyword_val
|
|
174
|
-
index += consumed
|
|
175
|
-
when '-', '0'..'9' # Number
|
|
176
|
-
ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
|
|
177
|
-
ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)
|
|
178
|
-
|
|
179
|
-
num_str, consumed = parse_number(input, index)
|
|
180
|
-
output_tokens << num_str
|
|
181
|
-
index += consumed
|
|
182
|
-
when /\s/ # Whitespace
|
|
183
|
-
# Preserve whitespace as-is
|
|
184
|
-
output_tokens << char
|
|
185
|
-
index += 1
|
|
186
|
-
else # Unknown characters
|
|
187
|
-
# For now, skip unknown characters as they are not part of JSON structure.
|
|
188
|
-
# More advanced handling could try to wrap them in strings if contextually appropriate.
|
|
189
|
-
index += 1
|
|
190
|
-
end
|
|
191
|
-
end
|
|
192
|
-
|
|
193
|
-
# Update state
|
|
194
|
-
updated_state = ParsingState.new(
|
|
195
|
-
output_tokens: output_tokens,
|
|
196
|
-
context_stack: context_stack,
|
|
197
|
-
last_index: index,
|
|
198
|
-
input_length: length,
|
|
199
|
-
incomplete_string_start: incomplete_string_start,
|
|
200
|
-
incomplete_string_buffer: incomplete_string_buffer,
|
|
201
|
-
incomplete_string_escape_state: incomplete_string_escape_state
|
|
24
|
+
ParseState = Struct.new(
|
|
25
|
+
:root, :root_assigned, :context_stack, :last_index, :input_length, :token_state, :input_snapshot,
|
|
26
|
+
keyword_init: true
|
|
27
|
+
) do
|
|
28
|
+
def initialize(
|
|
29
|
+
root: nil, root_assigned: false, context_stack: [], last_index: 0, input_length: 0, token_state: nil,
|
|
30
|
+
input_snapshot: nil
|
|
202
31
|
)
|
|
203
|
-
|
|
204
|
-
# Return completed JSON and updated state
|
|
205
|
-
completed_json = finalize_completion(output_tokens.dup, context_stack.dup, incomplete_string_buffer)
|
|
206
|
-
@state = updated_state
|
|
207
|
-
|
|
208
|
-
completed_json
|
|
209
|
-
end
|
|
210
|
-
|
|
211
|
-
private
|
|
212
|
-
|
|
213
|
-
# Finalizes the completion by handling post-processing and cleanup
|
|
214
|
-
def finalize_completion(output_tokens, context_stack, incomplete_string_buffer = nil)
|
|
215
|
-
# If we have an incomplete string buffer, add it with closing quote
|
|
216
|
-
if incomplete_string_buffer
|
|
217
|
-
buffer_str = incomplete_string_buffer.string
|
|
218
|
-
# Remove incomplete escape sequences at the end
|
|
219
|
-
|
|
220
|
-
# Count consecutive trailing backslashes
|
|
221
|
-
trailing_backslashes = 0
|
|
222
|
-
idx = buffer_str.length - 1
|
|
223
|
-
while idx >= 0 && buffer_str[idx] == '\\'
|
|
224
|
-
trailing_backslashes += 1
|
|
225
|
-
idx -= 1
|
|
226
|
-
end
|
|
227
|
-
|
|
228
|
-
# If odd number of trailing backslashes, remove the last one (incomplete escape)
|
|
229
|
-
# If even number, they're all paired as escaped backslashes, don't remove any
|
|
230
|
-
buffer_str = buffer_str[0...-1] if trailing_backslashes.odd?
|
|
231
|
-
|
|
232
|
-
# Check for incomplete unicode escape after handling backslashes
|
|
233
|
-
if buffer_str =~ /\\u[0-9a-fA-F]{0,3}\z/ # Incomplete unicode
|
|
234
|
-
buffer_str = buffer_str.sub(/\\u[0-9a-fA-F]{0,3}\z/, '')
|
|
235
|
-
end
|
|
236
|
-
|
|
237
|
-
# Always add closing quote for incomplete strings
|
|
238
|
-
# (incomplete_string_buffer only exists when string wasn't terminated)
|
|
239
|
-
buffer_str += '"'
|
|
240
|
-
output_tokens << buffer_str
|
|
241
|
-
end
|
|
242
|
-
|
|
243
|
-
# Post-loop cleanup and final completions
|
|
244
|
-
last_sig_char_final = get_last_significant_char(output_tokens)
|
|
245
|
-
|
|
246
|
-
# If the last significant character suggests an incomplete structure:
|
|
247
|
-
unless context_stack.empty?
|
|
248
|
-
current_ctx = context_stack.last
|
|
249
|
-
if current_ctx == '{' # Inside an object
|
|
250
|
-
if last_sig_char_final == '"' # Just a key, e.g., {"key"
|
|
251
|
-
# Check if this is a key (not a value) by looking at the context
|
|
252
|
-
# If the previous significant character before this string was '{' or ',', it's a key
|
|
253
|
-
prev_sig_char = get_previous_significant_char(output_tokens)
|
|
254
|
-
output_tokens << ':' << 'null' if ['{', ','].include?(prev_sig_char)
|
|
255
|
-
elsif last_sig_char_final == ':' # Key with colon, e.g., {"key":
|
|
256
|
-
output_tokens << 'null'
|
|
257
|
-
end
|
|
258
|
-
elsif current_ctx == '[' # Inside an array
|
|
259
|
-
output_tokens << 'null' if last_sig_char_final == ',' # Value then comma, e.g., [1,
|
|
260
|
-
end
|
|
261
|
-
end
|
|
262
|
-
|
|
263
|
-
# Close any remaining open structures
|
|
264
|
-
until context_stack.empty?
|
|
265
|
-
opener = context_stack.pop
|
|
266
|
-
remove_trailing_comma(output_tokens) # Clean up before closing
|
|
267
|
-
output_tokens << (opener == '{' ? '}' : ']')
|
|
268
|
-
end
|
|
269
|
-
|
|
270
|
-
# Join tokens. A simple join might not be ideal for formatting.
|
|
271
|
-
# A more sophisticated join would handle spaces around colons/commas.
|
|
272
|
-
# For basic validity, this should be okay.
|
|
273
|
-
reassembled_json = output_tokens.join
|
|
274
|
-
|
|
275
|
-
# Final check: if the reassembled JSON is just a standalone comma or colon, it's invalid.
|
|
276
|
-
# Return something more sensible like "null" or empty string.
|
|
277
|
-
return 'null' if reassembled_json.match?(/\A\s*[,:]\s*\z/)
|
|
278
|
-
|
|
279
|
-
reassembled_json
|
|
280
|
-
end
|
|
281
|
-
|
|
282
|
-
# Parses a new JSON string and returns parsing state for incremental processing
|
|
283
|
-
# Returns [string_value, consumed_characters, was_terminated, buffer, escape_state]
|
|
284
|
-
def parse_string_with_state(input, index)
|
|
285
|
-
start_index = index
|
|
286
|
-
output_str = StringIO.new
|
|
287
|
-
# Initial quote
|
|
288
|
-
output_str << input[index]
|
|
289
|
-
index += 1
|
|
290
|
-
terminated = false
|
|
291
|
-
escape_state = nil
|
|
292
|
-
|
|
293
|
-
while index < input.length
|
|
294
|
-
char = input[index]
|
|
295
|
-
|
|
296
|
-
if escape_state == :backslash
|
|
297
|
-
# We're in an escape sequence
|
|
298
|
-
if char == 'u'
|
|
299
|
-
escape_state = { type: :unicode, hex: String.new }
|
|
300
|
-
output_str << 'u' # Don't double the backslash
|
|
301
|
-
index += 1
|
|
302
|
-
else
|
|
303
|
-
# Regular escape sequence
|
|
304
|
-
output_str << char
|
|
305
|
-
index += 1
|
|
306
|
-
escape_state = nil
|
|
307
|
-
end
|
|
308
|
-
elsif escape_state.is_a?(Hash) && escape_state[:type] == :unicode
|
|
309
|
-
# Collecting unicode hex digits
|
|
310
|
-
if char.match?(/[0-9a-fA-F]/)
|
|
311
|
-
escape_state[:hex] << char
|
|
312
|
-
output_str << char
|
|
313
|
-
index += 1
|
|
314
|
-
if escape_state[:hex].length == 4
|
|
315
|
-
# Unicode escape complete
|
|
316
|
-
escape_state = nil
|
|
317
|
-
end
|
|
318
|
-
else
|
|
319
|
-
# Invalid unicode escape - don't include it and close string
|
|
320
|
-
# Remove the incomplete unicode escape
|
|
321
|
-
str_so_far = output_str.string
|
|
322
|
-
if str_so_far =~ /\\u[0-9a-fA-F]*\z/
|
|
323
|
-
str_so_far = str_so_far.sub(/\\u[0-9a-fA-F]*\z/, '')
|
|
324
|
-
output_str = StringIO.new(str_so_far)
|
|
325
|
-
end
|
|
326
|
-
output_str << '"'
|
|
327
|
-
return [output_str.string, index - start_index, false, nil, nil]
|
|
328
|
-
end
|
|
329
|
-
elsif char == '\\'
|
|
330
|
-
output_str << char
|
|
331
|
-
escape_state = :backslash
|
|
332
|
-
index += 1
|
|
333
|
-
elsif char == '"'
|
|
334
|
-
output_str << char
|
|
335
|
-
terminated = true
|
|
336
|
-
index += 1
|
|
337
|
-
break
|
|
338
|
-
else
|
|
339
|
-
output_str << char
|
|
340
|
-
index += 1
|
|
341
|
-
end
|
|
342
|
-
end
|
|
343
|
-
|
|
344
|
-
if terminated
|
|
345
|
-
[output_str.string, index - start_index, true, nil, nil]
|
|
346
|
-
else
|
|
347
|
-
# String incomplete - DON'T add closing quote here, it will be added during finalization
|
|
348
|
-
[output_str.string, index - start_index, false, output_str, escape_state]
|
|
349
|
-
end
|
|
350
|
-
end
|
|
351
|
-
|
|
352
|
-
# Continues parsing an incomplete string from saved state
|
|
353
|
-
# Returns [string_value, new_index, was_terminated, buffer, escape_state]
|
|
354
|
-
def continue_parsing_string(input, buffer, escape_state)
|
|
355
|
-
# Buffer should not have closing quote - we removed it from parse_string_with_state
|
|
356
|
-
|
|
357
|
-
index = @state.last_index
|
|
358
|
-
terminated = false
|
|
359
|
-
|
|
360
|
-
while index < input.length
|
|
361
|
-
char = input[index]
|
|
362
|
-
|
|
363
|
-
if escape_state == :backslash
|
|
364
|
-
# We're in an escape sequence
|
|
365
|
-
if char == 'u'
|
|
366
|
-
escape_state = { type: :unicode, hex: String.new }
|
|
367
|
-
buffer << 'u' # Don't double the backslash
|
|
368
|
-
index += 1
|
|
369
|
-
else
|
|
370
|
-
# Regular escape sequence
|
|
371
|
-
buffer << char
|
|
372
|
-
index += 1
|
|
373
|
-
escape_state = nil
|
|
374
|
-
end
|
|
375
|
-
elsif escape_state.is_a?(Hash) && escape_state[:type] == :unicode
|
|
376
|
-
# Collecting unicode hex digits
|
|
377
|
-
if char.match?(/[0-9a-fA-F]/)
|
|
378
|
-
escape_state[:hex] << char
|
|
379
|
-
buffer << char
|
|
380
|
-
index += 1
|
|
381
|
-
if escape_state[:hex].length == 4
|
|
382
|
-
# Unicode escape complete
|
|
383
|
-
escape_state = nil
|
|
384
|
-
end
|
|
385
|
-
else
|
|
386
|
-
# Invalid unicode escape - don't include it and close string
|
|
387
|
-
# Remove the incomplete unicode escape
|
|
388
|
-
str_so_far = buffer.string
|
|
389
|
-
if str_so_far =~ /\\u[0-9a-fA-F]*\z/
|
|
390
|
-
str_so_far = str_so_far.sub(/\\u[0-9a-fA-F]*\z/, '')
|
|
391
|
-
buffer = StringIO.new(str_so_far)
|
|
392
|
-
end
|
|
393
|
-
buffer << '"'
|
|
394
|
-
return [buffer.string, index, false, nil, nil]
|
|
395
|
-
end
|
|
396
|
-
elsif char == '\\'
|
|
397
|
-
buffer << char
|
|
398
|
-
escape_state = :backslash
|
|
399
|
-
index += 1
|
|
400
|
-
elsif char == '"'
|
|
401
|
-
buffer << char
|
|
402
|
-
terminated = true
|
|
403
|
-
index += 1
|
|
404
|
-
break
|
|
405
|
-
else
|
|
406
|
-
buffer << char
|
|
407
|
-
index += 1
|
|
408
|
-
end
|
|
409
|
-
end
|
|
410
|
-
|
|
411
|
-
if terminated
|
|
412
|
-
[buffer.string, index, true, nil, nil]
|
|
413
|
-
else
|
|
414
|
-
# String still incomplete - DON'T add quote here
|
|
415
|
-
[buffer.string, index, false, buffer, escape_state]
|
|
416
|
-
end
|
|
417
|
-
end
|
|
418
|
-
|
|
419
|
-
# Parses a JSON string starting at the given index.
|
|
420
|
-
# Handles unterminated strings by closing them.
|
|
421
|
-
# Returns [string_value, consumed_characters, was_terminated]
|
|
422
|
-
def parse_string_with_termination_info(input, index)
|
|
423
|
-
start_index = index
|
|
424
|
-
output_str = StringIO.new
|
|
425
|
-
output_str << input[index] # Initial quote
|
|
426
|
-
index += 1
|
|
427
|
-
terminated = false
|
|
428
|
-
|
|
429
|
-
while index < input.length
|
|
430
|
-
char = input[index]
|
|
431
|
-
|
|
432
|
-
if char == '\\' && index + 1 < input.length
|
|
433
|
-
next_char = input[index + 1]
|
|
434
|
-
if next_char == 'u'
|
|
435
|
-
# Handle unicode escape sequence
|
|
436
|
-
index += 2 # Skip '\u'
|
|
437
|
-
hex_digits = String.new
|
|
438
|
-
|
|
439
|
-
# Collect up to 4 hex digits
|
|
440
|
-
while hex_digits.length < 4 && index < input.length && input[index].match?(/[0-9a-fA-F]/)
|
|
441
|
-
hex_digits << input[index]
|
|
442
|
-
index += 1
|
|
443
|
-
end
|
|
444
|
-
|
|
445
|
-
if hex_digits.length == 4
|
|
446
|
-
# Complete unicode escape
|
|
447
|
-
output_str << '\\u' << hex_digits
|
|
448
|
-
else
|
|
449
|
-
# Incomplete unicode escape - remove it entirely and close string
|
|
450
|
-
output_str << '"'
|
|
451
|
-
return [output_str.string, index - start_index, false]
|
|
452
|
-
end
|
|
453
|
-
else
|
|
454
|
-
# Regular escape sequence
|
|
455
|
-
output_str << char << next_char
|
|
456
|
-
index += 2
|
|
457
|
-
end
|
|
458
|
-
elsif char == '"'
|
|
459
|
-
output_str << char
|
|
460
|
-
terminated = true
|
|
461
|
-
index += 1
|
|
462
|
-
break
|
|
463
|
-
else
|
|
464
|
-
output_str << char
|
|
465
|
-
index += 1
|
|
466
|
-
end
|
|
467
|
-
end
|
|
468
|
-
|
|
469
|
-
output_str << '"' unless terminated # Close if unterminated
|
|
470
|
-
[output_str.string, index - start_index, terminated]
|
|
471
|
-
end
|
|
472
|
-
|
|
473
|
-
# Parses a JSON string starting at the given index.
|
|
474
|
-
# Handles unterminated strings by closing them.
|
|
475
|
-
def parse_string(input, index)
|
|
476
|
-
start_index = index
|
|
477
|
-
output_str = StringIO.new
|
|
478
|
-
output_str << input[index] # Initial quote
|
|
479
|
-
index += 1
|
|
480
|
-
terminated = false
|
|
481
|
-
|
|
482
|
-
while index < input.length
|
|
483
|
-
char = input[index]
|
|
484
|
-
|
|
485
|
-
if char == '\\' && index + 1 < input.length
|
|
486
|
-
next_char = input[index + 1]
|
|
487
|
-
if next_char == 'u'
|
|
488
|
-
# Handle unicode escape sequence
|
|
489
|
-
index += 2 # Skip '\u'
|
|
490
|
-
hex_digits = String.new
|
|
491
|
-
|
|
492
|
-
# Collect up to 4 hex digits
|
|
493
|
-
while hex_digits.length < 4 && index < input.length && input[index].match?(/[0-9a-fA-F]/)
|
|
494
|
-
hex_digits << input[index]
|
|
495
|
-
index += 1
|
|
496
|
-
end
|
|
497
|
-
|
|
498
|
-
if hex_digits.length == 4
|
|
499
|
-
# Complete unicode escape
|
|
500
|
-
output_str << '\\u' << hex_digits
|
|
501
|
-
else
|
|
502
|
-
# Incomplete unicode escape - remove it entirely and close string
|
|
503
|
-
output_str << '"'
|
|
504
|
-
return [output_str.string, index - start_index]
|
|
505
|
-
end
|
|
506
|
-
else
|
|
507
|
-
# Regular escape sequence
|
|
508
|
-
output_str << char << next_char
|
|
509
|
-
index += 2
|
|
510
|
-
end
|
|
511
|
-
elsif char == '"'
|
|
512
|
-
output_str << char
|
|
513
|
-
terminated = true
|
|
514
|
-
index += 1
|
|
515
|
-
break
|
|
516
|
-
else
|
|
517
|
-
output_str << char
|
|
518
|
-
index += 1
|
|
519
|
-
end
|
|
520
|
-
end
|
|
521
|
-
|
|
522
|
-
output_str << '"' unless terminated # Close if unterminated
|
|
523
|
-
[output_str.string, index - start_index]
|
|
524
|
-
end
|
|
525
|
-
|
|
526
|
-
# Parses a JSON number starting at the given index.
|
|
527
|
-
# Completes numbers like "1." to "1.0".
|
|
528
|
-
def parse_number(input, index)
|
|
529
|
-
start_index = index
|
|
530
|
-
num_str = StringIO.new
|
|
531
|
-
|
|
532
|
-
# Optional leading minus
|
|
533
|
-
if input[index] == '-'
|
|
534
|
-
num_str << input[index]
|
|
535
|
-
index += 1
|
|
536
|
-
end
|
|
537
|
-
|
|
538
|
-
# Integer part
|
|
539
|
-
digits_before_dot = false
|
|
540
|
-
while index < input.length && input[index] >= '0' && input[index] <= '9'
|
|
541
|
-
num_str << input[index]
|
|
542
|
-
index += 1
|
|
543
|
-
digits_before_dot = true
|
|
544
|
-
end
|
|
545
|
-
|
|
546
|
-
# Decimal part
|
|
547
|
-
has_dot = false
|
|
548
|
-
if index < input.length && input[index] == '.'
|
|
549
|
-
has_dot = true
|
|
550
|
-
num_str << input[index]
|
|
551
|
-
index += 1
|
|
552
|
-
digits_after_dot = false
|
|
553
|
-
while index < input.length && input[index] >= '0' && input[index] <= '9'
|
|
554
|
-
num_str << input[index]
|
|
555
|
-
index += 1
|
|
556
|
-
digits_after_dot = true
|
|
557
|
-
end
|
|
558
|
-
num_str << '0' unless digits_after_dot # Append '0' if it's just "X." or "."
|
|
559
|
-
end
|
|
560
|
-
|
|
561
|
-
# If it was just "." or "-."
|
|
562
|
-
current_val = num_str.string
|
|
563
|
-
if current_val == '.'
|
|
564
|
-
num_str = StringIO.new # Reset
|
|
565
|
-
num_str << '0.0'
|
|
566
|
-
elsif current_val == '-.'
|
|
567
|
-
num_str = StringIO.new # Reset
|
|
568
|
-
num_str << '-0.0'
|
|
569
|
-
elsif current_val == '-' # Only a minus sign
|
|
570
|
-
num_str = StringIO.new # Reset
|
|
571
|
-
num_str << '0' # Or -0, but JSON standard usually serializes -0 as 0
|
|
572
|
-
elsif !digits_before_dot && has_dot # e.g. ".5" -> "0.5"
|
|
573
|
-
val = num_str.string
|
|
574
|
-
num_str = StringIO.new
|
|
575
|
-
num_str << '0' << val
|
|
576
|
-
end
|
|
577
|
-
|
|
578
|
-
# Exponent part
|
|
579
|
-
if index < input.length && (input[index].downcase == 'e')
|
|
580
|
-
# Check if there was a number before 'e'
|
|
581
|
-
temp_num_val = num_str.string
|
|
582
|
-
if temp_num_val.empty? || temp_num_val == '-' || temp_num_val == '.' || temp_num_val == '-.'
|
|
583
|
-
# Invalid start for exponent, stop number parsing here
|
|
584
|
-
return [
|
|
585
|
-
if temp_num_val == '-'
|
|
586
|
-
'0'
|
|
587
|
-
else
|
|
588
|
-
(temp_num_val.include?('.') ? temp_num_val + '0' : temp_num_val)
|
|
589
|
-
end,
|
|
590
|
-
index - start_index
|
|
591
|
-
]
|
|
592
|
-
end
|
|
593
|
-
|
|
594
|
-
num_str << input[index] # 'e' or 'E'
|
|
595
|
-
index += 1
|
|
596
|
-
if index < input.length && ['+', '-'].include?(input[index])
|
|
597
|
-
num_str << input[index]
|
|
598
|
-
index += 1
|
|
599
|
-
end
|
|
600
|
-
exponent_digits = false
|
|
601
|
-
while index < input.length && input[index] >= '0' && input[index] <= '9'
|
|
602
|
-
num_str << input[index]
|
|
603
|
-
index += 1
|
|
604
|
-
exponent_digits = true
|
|
605
|
-
end
|
|
606
|
-
# If 'e' was added but no digits followed, it's incomplete.
|
|
607
|
-
# JSON requires digits after 'e'. We might strip 'e' or add '0'.
|
|
608
|
-
# For robustness, let's add '0' if exponent is present but lacks digits.
|
|
609
|
-
num_str << '0' unless exponent_digits
|
|
32
|
+
super
|
|
610
33
|
end
|
|
611
|
-
|
|
612
|
-
final_num_str = num_str.string
|
|
613
|
-
# If the number is empty (e.g. bad start) or just "-", default to "0"
|
|
614
|
-
return ['0', index - start_index] if final_num_str.empty? || final_num_str == '-'
|
|
615
|
-
|
|
616
|
-
[final_num_str, index - start_index]
|
|
617
34
|
end
|
|
618
35
|
|
|
619
|
-
|
|
620
|
-
# and returns the completed keyword and number of characters consumed.
|
|
621
|
-
def consume_and_complete_keyword(input, index, target_keyword)
|
|
622
|
-
consumed_count = 0
|
|
623
|
-
(0...target_keyword.length).each do |k_idx|
|
|
624
|
-
break if index + k_idx >= input.length
|
|
625
|
-
|
|
626
|
-
break unless input[index + k_idx].downcase == target_keyword[k_idx]
|
|
627
|
-
|
|
628
|
-
consumed_count += 1
|
|
36
|
+
ParseSlot = Struct.new(:container, :key, :root, keyword_init: true)
|
|
629
37
|
|
|
630
|
-
|
|
38
|
+
ObjectContext = Struct.new(:container, :mode, :current_key, keyword_init: true) do
|
|
39
|
+
def initialize(container:, mode: :key_or_end, current_key: nil)
|
|
40
|
+
super
|
|
631
41
|
end
|
|
632
|
-
# If at least the first char matched, we complete to the target_keyword
|
|
633
|
-
return [target_keyword, consumed_count] if consumed_count.positive?
|
|
634
|
-
|
|
635
|
-
# Fallback (should not be reached if called correctly, i.e., input[index] is t,f, or n)
|
|
636
|
-
# This indicates the char was not the start of the expected keyword.
|
|
637
|
-
# This case should be handled by the main loop's "else" (skip unknown char).
|
|
638
|
-
# For safety, if it's called, treat the single char as a token to be skipped later.
|
|
639
|
-
[input[index], 1]
|
|
640
42
|
end
|
|
641
43
|
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
token = output_tokens[i]
|
|
646
|
-
stripped_token = token.strip
|
|
647
|
-
return stripped_token[-1] unless stripped_token.empty?
|
|
44
|
+
ArrayContext = Struct.new(:container, :mode, :provisional_index, keyword_init: true) do
|
|
45
|
+
def initialize(container:, mode: :value_or_end, provisional_index: nil)
|
|
46
|
+
super
|
|
648
47
|
end
|
|
649
|
-
nil
|
|
650
48
|
end
|
|
651
49
|
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
significant_chars = []
|
|
655
|
-
(output_tokens.length - 1).downto(0) do |i|
|
|
656
|
-
token = output_tokens[i]
|
|
657
|
-
stripped_token = token.strip
|
|
658
|
-
unless stripped_token.empty?
|
|
659
|
-
significant_chars << stripped_token[-1]
|
|
660
|
-
return significant_chars[1] if significant_chars.length >= 2
|
|
661
|
-
end
|
|
662
|
-
end
|
|
663
|
-
nil
|
|
50
|
+
def self.complete(partial_json)
|
|
51
|
+
new.complete(partial_json)
|
|
664
52
|
end
|
|
665
53
|
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
return if output_tokens.empty? || context_stack.empty? || last_sig_char.nil?
|
|
669
|
-
|
|
670
|
-
# No comma needed right after an opener, a colon, or another comma.
|
|
671
|
-
return if STRUCTURE_CHARS.include?(last_sig_char)
|
|
672
|
-
|
|
673
|
-
# If last_sig_char indicates a completed value/key:
|
|
674
|
-
# (e.g., string quote, true/false/null end, number, or closing bracket/brace)
|
|
675
|
-
# Add a comma if we are in an array or object.
|
|
676
|
-
return unless context_stack.last == '[' || (context_stack.last == '{' && last_sig_char != ':')
|
|
677
|
-
|
|
678
|
-
output_tokens << ','
|
|
54
|
+
def self.parse(partial_json)
|
|
55
|
+
new.parse(partial_json)
|
|
679
56
|
end
|
|
680
57
|
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
return if output_tokens.empty? || context_stack.empty? || last_sig_char.nil?
|
|
684
|
-
|
|
685
|
-
return unless context_stack.last == '{' && last_sig_char == '"' # In object, and last thing was a key (string)
|
|
686
|
-
|
|
687
|
-
output_tokens << ':'
|
|
58
|
+
def self.new_state
|
|
59
|
+
ParsingState.new
|
|
688
60
|
end
|
|
689
61
|
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
last_token_idx = -1
|
|
693
|
-
(output_tokens.length - 1).downto(0) do |i|
|
|
694
|
-
unless output_tokens[i].strip.empty?
|
|
695
|
-
last_token_idx = i
|
|
696
|
-
break
|
|
697
|
-
end
|
|
698
|
-
end
|
|
699
|
-
|
|
700
|
-
return unless last_token_idx != -1 && output_tokens[last_token_idx].strip == ','
|
|
701
|
-
|
|
702
|
-
output_tokens.slice!(last_token_idx)
|
|
703
|
-
# Also remove any whitespace tokens that were before this comma and are now effectively trailing
|
|
704
|
-
while last_token_idx.positive? && output_tokens[last_token_idx - 1].strip.empty?
|
|
705
|
-
output_tokens.slice!(last_token_idx - 1)
|
|
706
|
-
last_token_idx -= 1
|
|
707
|
-
end
|
|
62
|
+
def self.new_parse_state
|
|
63
|
+
ParseState.new
|
|
708
64
|
end
|
|
709
65
|
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
# Check for simple primitives first
|
|
714
|
-
return true if VALID_PRIMITIVES.include?(str)
|
|
715
|
-
# Check for valid number (simplified regex, full JSON number is complex)
|
|
716
|
-
# Allows integers, floats, but not ending with '.' or 'e'/'E' without digits
|
|
717
|
-
if str.match?(/\A-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?\z/) &&
|
|
718
|
-
!str.end_with?('.') && !str.match?(/[eE][+-]?$/)
|
|
719
|
-
return true
|
|
720
|
-
end
|
|
721
|
-
# Check for valid string literal
|
|
722
|
-
return true if str.match?(/\A"(?:[^"\\]|\\.)*"\z/)
|
|
723
|
-
|
|
724
|
-
false
|
|
66
|
+
def initialize(state = self.class.new_state, parse_state = self.class.new_parse_state)
|
|
67
|
+
@state = state
|
|
68
|
+
@parse_state = parse_state
|
|
725
69
|
end
|
|
726
70
|
end
|
|
71
|
+
|
|
72
|
+
require_relative 'json_completer/scanners'
|
|
73
|
+
require_relative 'json_completer/completion_engine'
|
|
74
|
+
require_relative 'json_completer/parser_engine'
|