json-repair 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +7 -7
- data/CHANGELOG.md +5 -1
- data/README.md +10 -3
- data/lib/json/repair/version.rb +1 -1
- data/lib/json/repair.rb +4 -6
- data/lib/json/repairer.rb +645 -0
- data/sig/json/repair.rbs +2 -2
- metadata +3 -3
- data/lib/json/repair/repairer.rb +0 -647
data/lib/json/repair/repairer.rb
DELETED
@@ -1,647 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative 'string_utils'
|
4
|
-
|
5
|
-
module JSON
|
6
|
-
module Repair
|
7
|
-
class Repairer
|
8
|
-
include StringUtils
|
9
|
-
|
10
|
-
CONTROL_CHARACTERS = {
|
11
|
-
"\b" => '\b',
|
12
|
-
"\f" => '\f',
|
13
|
-
"\n" => '\n',
|
14
|
-
"\r" => '\r',
|
15
|
-
"\t" => '\t'
|
16
|
-
}.freeze
|
17
|
-
|
18
|
-
ESCAPE_CHARACTERS = {
|
19
|
-
'"' => '"',
|
20
|
-
'\\' => '\\',
|
21
|
-
'/' => '/',
|
22
|
-
'b' => "\b",
|
23
|
-
'f' => "\f",
|
24
|
-
'n' => "\n",
|
25
|
-
'r' => "\r",
|
26
|
-
't' => "\t"
|
27
|
-
}.freeze
|
28
|
-
|
29
|
-
def initialize(json)
|
30
|
-
@json = json
|
31
|
-
@index = 0
|
32
|
-
@output = ''
|
33
|
-
end
|
34
|
-
|
35
|
-
def repair
|
36
|
-
processed = parse_value
|
37
|
-
|
38
|
-
throw_unexpected_end unless processed
|
39
|
-
|
40
|
-
processed_comma = parse_character(COMMA)
|
41
|
-
parse_whitespace_and_skip_comments if processed_comma
|
42
|
-
|
43
|
-
if start_of_value?(@json[@index]) && ends_with_comma_or_newline?(@output)
|
44
|
-
# start of a new value after end of the root level object: looks like
|
45
|
-
# newline delimited JSON -> turn into a root level array
|
46
|
-
unless processed_comma
|
47
|
-
# repair missing comma
|
48
|
-
@output = insert_before_last_whitespace(@output, ',')
|
49
|
-
end
|
50
|
-
|
51
|
-
parse_newline_delimited_json
|
52
|
-
elsif processed_comma
|
53
|
-
# repair: remove trailing comma
|
54
|
-
@output = strip_last_occurrence(@output, ',')
|
55
|
-
end
|
56
|
-
|
57
|
-
# repair redundant end quotes
|
58
|
-
while @json[@index] == CLOSING_BRACE || @json[@index] == CLOSING_BRACKET
|
59
|
-
@index += 1
|
60
|
-
parse_whitespace_and_skip_comments
|
61
|
-
end
|
62
|
-
|
63
|
-
if @index >= @json.length
|
64
|
-
# reached the end of the document properly
|
65
|
-
return @output
|
66
|
-
end
|
67
|
-
|
68
|
-
throw_unexpected_character
|
69
|
-
end
|
70
|
-
|
71
|
-
private
|
72
|
-
|
73
|
-
def parse_value
|
74
|
-
parse_whitespace_and_skip_comments
|
75
|
-
process = parse_object || parse_array || parse_string || parse_number || parse_keywords || parse_unquoted_string
|
76
|
-
parse_whitespace_and_skip_comments
|
77
|
-
|
78
|
-
process
|
79
|
-
end
|
80
|
-
|
81
|
-
def parse_whitespace
|
82
|
-
whitespace = ''
|
83
|
-
while @json[@index] && (whitespace?(@json[@index]) || special_whitespace?(@json[@index]))
|
84
|
-
whitespace += whitespace?(@json[@index]) ? @json[@index] : ' '
|
85
|
-
|
86
|
-
@index += 1
|
87
|
-
end
|
88
|
-
|
89
|
-
unless whitespace.empty?
|
90
|
-
@output += whitespace
|
91
|
-
return true
|
92
|
-
end
|
93
|
-
|
94
|
-
false
|
95
|
-
end
|
96
|
-
|
97
|
-
def parse_comment
|
98
|
-
if @json[@index] == '/' && @json[@index + 1] == '*'
|
99
|
-
# Block comment
|
100
|
-
@index += 2
|
101
|
-
@index += 1 until @json[@index].nil? || (@json[@index] == '*' && @json[@index + 1] == '/')
|
102
|
-
@index += 2
|
103
|
-
true
|
104
|
-
elsif @json[@index] == '/' && @json[@index + 1] == '/'
|
105
|
-
# Line comment
|
106
|
-
@index += 2
|
107
|
-
@index += 1 until @json[@index].nil? || @json[@index] == "\n"
|
108
|
-
true
|
109
|
-
else
|
110
|
-
false
|
111
|
-
end
|
112
|
-
end
|
113
|
-
|
114
|
-
# Parse an object like '{"key": "value"}'
|
115
|
-
def parse_object
|
116
|
-
return false unless @json[@index] == OPENING_BRACE
|
117
|
-
|
118
|
-
@output += '{'
|
119
|
-
@index += 1
|
120
|
-
parse_whitespace_and_skip_comments
|
121
|
-
|
122
|
-
# repair: skip leading comma like in {, message: "hi"}
|
123
|
-
parse_whitespace_and_skip_comments if skip_character(COMMA)
|
124
|
-
|
125
|
-
initial = true
|
126
|
-
while @index < @json.length && @json[@index] != CLOSING_BRACE
|
127
|
-
processed_comma = true
|
128
|
-
if initial
|
129
|
-
initial = false
|
130
|
-
else
|
131
|
-
processed_comma = parse_character(COMMA)
|
132
|
-
unless processed_comma
|
133
|
-
# repair missing comma
|
134
|
-
@output = insert_before_last_whitespace(@output, ',')
|
135
|
-
end
|
136
|
-
parse_whitespace_and_skip_comments
|
137
|
-
end
|
138
|
-
|
139
|
-
skip_ellipsis
|
140
|
-
|
141
|
-
processed_key = parse_string || parse_unquoted_string
|
142
|
-
unless processed_key
|
143
|
-
if @json[@index] == CLOSING_BRACE || @json[@index] == OPENING_BRACE ||
|
144
|
-
@json[@index] == CLOSING_BRACKET || @json[@index] == OPENING_BRACKET ||
|
145
|
-
@json[@index].nil?
|
146
|
-
# repair trailing comma
|
147
|
-
@output = strip_last_occurrence(@output, ',')
|
148
|
-
else
|
149
|
-
throw_object_key_expected
|
150
|
-
end
|
151
|
-
break
|
152
|
-
end
|
153
|
-
|
154
|
-
parse_whitespace_and_skip_comments
|
155
|
-
processed_colon = parse_character(COLON)
|
156
|
-
truncated_text = @index >= @json.length
|
157
|
-
unless processed_colon
|
158
|
-
if start_of_value?(@json[@index]) || truncated_text
|
159
|
-
# repair missing colon
|
160
|
-
@output = insert_before_last_whitespace(@output, ':')
|
161
|
-
else
|
162
|
-
throw_colon_expected
|
163
|
-
end
|
164
|
-
end
|
165
|
-
|
166
|
-
processed_value = parse_value
|
167
|
-
unless processed_value
|
168
|
-
if processed_colon || truncated_text
|
169
|
-
# repair missing object value
|
170
|
-
@output += 'null'
|
171
|
-
else
|
172
|
-
throw_colon_expected
|
173
|
-
end
|
174
|
-
end
|
175
|
-
end
|
176
|
-
|
177
|
-
if @json[@index] == CLOSING_BRACE
|
178
|
-
@output += '}'
|
179
|
-
@index += 1
|
180
|
-
else
|
181
|
-
# repair missing end bracket
|
182
|
-
@output = insert_before_last_whitespace(@output, '}')
|
183
|
-
end
|
184
|
-
|
185
|
-
true
|
186
|
-
end
|
187
|
-
|
188
|
-
def skip_character(char)
|
189
|
-
if @json[@index] == char
|
190
|
-
@index += 1
|
191
|
-
true
|
192
|
-
else
|
193
|
-
false
|
194
|
-
end
|
195
|
-
end
|
196
|
-
|
197
|
-
# Skip ellipsis like "[1,2,3,...]" or "[1,2,3,...,9]" or "[...,7,8,9]"
|
198
|
-
# or a similar construct in objects.
|
199
|
-
def skip_ellipsis
|
200
|
-
parse_whitespace_and_skip_comments
|
201
|
-
|
202
|
-
if @json[@index] == DOT &&
|
203
|
-
@json[@index + 1] == DOT &&
|
204
|
-
@json[@index + 2] == DOT
|
205
|
-
# repair: remove the ellipsis (three dots) and optionally a comma
|
206
|
-
@index += 3
|
207
|
-
parse_whitespace_and_skip_comments
|
208
|
-
skip_character(COMMA)
|
209
|
-
end
|
210
|
-
end
|
211
|
-
|
212
|
-
# Parse a string enclosed by double quotes "...". Can contain escaped quotes
|
213
|
-
# Repair strings enclosed in single quotes or special quotes
|
214
|
-
# Repair an escaped string
|
215
|
-
#
|
216
|
-
# The function can run in two stages:
|
217
|
-
# - First, it assumes the string has a valid end quote
|
218
|
-
# - If it turns out that the string does not have a valid end quote followed
|
219
|
-
# by a delimiter (which should be the case), the function runs again in a
|
220
|
-
# more conservative way, stopping the string at the first next delimiter
|
221
|
-
# and fixing the string by inserting a quote there.
|
222
|
-
def parse_string(stop_at_delimiter: false)
|
223
|
-
if @json[@index] == BACKSLASH
|
224
|
-
# repair: remove the first escape character
|
225
|
-
@index += 1
|
226
|
-
skip_escape_chars = true
|
227
|
-
end
|
228
|
-
|
229
|
-
if quote?(@json[@index])
|
230
|
-
# double quotes are correct JSON,
|
231
|
-
# single quotes come from JavaScript for example, we assume it will have a correct single end quote too
|
232
|
-
# otherwise, we will match any double-quote-like start with a double-quote-like end,
|
233
|
-
# or any single-quote-like start with a single-quote-like end
|
234
|
-
is_end_quote = if double_quote?(@json[@index])
|
235
|
-
method(:double_quote?)
|
236
|
-
elsif single_quote?(@json[@index])
|
237
|
-
method(:single_quote?)
|
238
|
-
elsif single_quote_like?(@json[@index])
|
239
|
-
method(:single_quote_like?)
|
240
|
-
else
|
241
|
-
method(:double_quote_like?)
|
242
|
-
end
|
243
|
-
|
244
|
-
i_before = @index
|
245
|
-
o_before = @output.length
|
246
|
-
|
247
|
-
str = '"'
|
248
|
-
@index += 1
|
249
|
-
|
250
|
-
loop do
|
251
|
-
if @index >= @json.length
|
252
|
-
# end of text, we are missing an end quote
|
253
|
-
|
254
|
-
i_prev = prev_non_whitespace_index(@index - 1)
|
255
|
-
if !stop_at_delimiter && delimiter?(@json[i_prev])
|
256
|
-
# if the text ends with a delimiter, like ["hello],
|
257
|
-
# so the missing end quote should be inserted before this delimiter
|
258
|
-
# retry parsing the string, stopping at the first next delimiter
|
259
|
-
@index = i_before
|
260
|
-
@output = @output[0...o_before]
|
261
|
-
|
262
|
-
return parse_string(stop_at_delimiter: true)
|
263
|
-
end
|
264
|
-
|
265
|
-
# repair missing quote
|
266
|
-
str = insert_before_last_whitespace(str, '"')
|
267
|
-
@output += str
|
268
|
-
|
269
|
-
return true
|
270
|
-
elsif is_end_quote.call(@json[@index])
|
271
|
-
# end quote
|
272
|
-
i_quote = @index
|
273
|
-
o_quote = str.length
|
274
|
-
str += '"'
|
275
|
-
@index += 1
|
276
|
-
@output += str
|
277
|
-
|
278
|
-
parse_whitespace_and_skip_comments
|
279
|
-
|
280
|
-
if stop_at_delimiter ||
|
281
|
-
@index >= @json.length ||
|
282
|
-
delimiter?(@json[@index]) ||
|
283
|
-
quote?(@json[@index]) ||
|
284
|
-
digit?(@json[@index])
|
285
|
-
# The quote is followed by the end of the text, a delimiter, or a next value
|
286
|
-
parse_concatenated_string
|
287
|
-
|
288
|
-
return true
|
289
|
-
end
|
290
|
-
|
291
|
-
if delimiter?(@json[prev_non_whitespace_index(i_quote - 1)])
|
292
|
-
# This is not the right end quote: it is preceded by a delimiter,
|
293
|
-
# and NOT followed by a delimiter. So, there is an end quote missing
|
294
|
-
# parse the string again and then stop at the first next delimiter
|
295
|
-
@index = i_before
|
296
|
-
@output = @output[...o_before]
|
297
|
-
|
298
|
-
return parse_string(stop_at_delimiter: true)
|
299
|
-
end
|
300
|
-
|
301
|
-
# revert to right after the quote but before any whitespace, and continue parsing the string
|
302
|
-
@output = @output[...o_before]
|
303
|
-
@index = i_quote + 1
|
304
|
-
|
305
|
-
# repair unescaped quote
|
306
|
-
str = "#{str[...o_quote]}\\#{str[o_quote..]}"
|
307
|
-
elsif stop_at_delimiter && delimiter?(@json[@index])
|
308
|
-
# we're in the mode to stop the string at the first delimiter
|
309
|
-
# because there is an end quote missing
|
310
|
-
|
311
|
-
# repair missing quote
|
312
|
-
str = insert_before_last_whitespace(str, '"')
|
313
|
-
@output += str
|
314
|
-
|
315
|
-
parse_concatenated_string
|
316
|
-
|
317
|
-
return true
|
318
|
-
elsif @json[@index] == BACKSLASH
|
319
|
-
# handle escaped content like \n or \u2605
|
320
|
-
char = @json[@index + 1]
|
321
|
-
escape_char = ESCAPE_CHARACTERS[char]
|
322
|
-
if escape_char
|
323
|
-
str += @json[@index, 2]
|
324
|
-
@index += 2
|
325
|
-
elsif char == 'u'
|
326
|
-
j = 2
|
327
|
-
j += 1 while j < 6 && @json[@index + j] && hex?(@json[@index + j])
|
328
|
-
if j == 6
|
329
|
-
str += @json[@index, 6]
|
330
|
-
@index += 6
|
331
|
-
elsif @index + j >= @json.length
|
332
|
-
# repair invalid or truncated unicode char at the end of the text
|
333
|
-
# by removing the unicode char and ending the string here
|
334
|
-
@index = @json.length
|
335
|
-
else
|
336
|
-
throw_invalid_unicode_character
|
337
|
-
end
|
338
|
-
else
|
339
|
-
# repair invalid escape character: remove it
|
340
|
-
str += char
|
341
|
-
@index += 2
|
342
|
-
end
|
343
|
-
else
|
344
|
-
# handle regular characters
|
345
|
-
char = @json[@index]
|
346
|
-
|
347
|
-
if char == DOUBLE_QUOTE && @json[@index - 1] != BACKSLASH
|
348
|
-
# repair unescaped double quote
|
349
|
-
str += "\\#{char}"
|
350
|
-
elsif control_character?(char)
|
351
|
-
# unescaped control character
|
352
|
-
str += CONTROL_CHARACTERS[char]
|
353
|
-
else
|
354
|
-
throw_invalid_character(char) unless valid_string_character?(char)
|
355
|
-
str += char
|
356
|
-
end
|
357
|
-
|
358
|
-
@index += 1
|
359
|
-
end
|
360
|
-
|
361
|
-
if skip_escape_chars
|
362
|
-
# repair: skipped escape character (nothing to do)
|
363
|
-
skip_escape_character
|
364
|
-
end
|
365
|
-
end
|
366
|
-
end
|
367
|
-
|
368
|
-
false
|
369
|
-
end
|
370
|
-
|
371
|
-
# Repair an unquoted string by adding quotes around it
|
372
|
-
# Repair a MongoDB function call like NumberLong("2")
|
373
|
-
# Repair a JSONP function call like callback({...});
|
374
|
-
def parse_unquoted_string
|
375
|
-
start = @index
|
376
|
-
@index += 1 while @index < @json.length && !delimiter_except_slash?(@json[@index]) && !quote?(@json[@index])
|
377
|
-
return if @index <= start
|
378
|
-
|
379
|
-
if @json[@index] == '(' && function_name?(@json[start...@index].strip)
|
380
|
-
# Repair a MongoDB function call like NumberLong("2")
|
381
|
-
# Repair a JSONP function call like callback({...});
|
382
|
-
@index += 1
|
383
|
-
|
384
|
-
parse_value
|
385
|
-
|
386
|
-
if @json[@index] == ')'
|
387
|
-
# Repair: skip close bracket of function call
|
388
|
-
@index += 1
|
389
|
-
# Repair: skip semicolon after JSONP call
|
390
|
-
@index += 1 if @json[@index] == ';'
|
391
|
-
end
|
392
|
-
else
|
393
|
-
# Repair unquoted string
|
394
|
-
# Also, repair undefined into null
|
395
|
-
|
396
|
-
# First, go back to prevent getting trailing whitespaces in the string
|
397
|
-
@index -= 1 while whitespace?(@json[@index - 1]) && @index.positive?
|
398
|
-
|
399
|
-
symbol = @json[start...@index]
|
400
|
-
@output += symbol == 'undefined' ? 'null' : symbol.inspect
|
401
|
-
|
402
|
-
if @json[@index] == '"'
|
403
|
-
# We had a missing start quote, but now we encountered the end quote, so we can skip that one
|
404
|
-
@index += 1
|
405
|
-
end
|
406
|
-
end
|
407
|
-
|
408
|
-
true
|
409
|
-
end
|
410
|
-
|
411
|
-
def parse_character(char)
|
412
|
-
if @json[@index] == char
|
413
|
-
@output += @json[@index]
|
414
|
-
@index += 1
|
415
|
-
true
|
416
|
-
else
|
417
|
-
false
|
418
|
-
end
|
419
|
-
end
|
420
|
-
|
421
|
-
def parse_whitespace_and_skip_comments
|
422
|
-
start = @index
|
423
|
-
|
424
|
-
changed = parse_whitespace
|
425
|
-
loop do
|
426
|
-
changed = parse_comment
|
427
|
-
changed = parse_whitespace if changed
|
428
|
-
break unless changed
|
429
|
-
end
|
430
|
-
|
431
|
-
@index > start
|
432
|
-
end
|
433
|
-
|
434
|
-
# Parse a number like 2.4 or 2.4e6
|
435
|
-
def parse_number
|
436
|
-
start = @index
|
437
|
-
if @json[@index] == '-'
|
438
|
-
@index += 1
|
439
|
-
if at_end_of_number?
|
440
|
-
repair_number_ending_with_numeric_symbol(start)
|
441
|
-
return true
|
442
|
-
end
|
443
|
-
unless digit?(@json[@index])
|
444
|
-
@index = start
|
445
|
-
return false
|
446
|
-
end
|
447
|
-
end
|
448
|
-
|
449
|
-
# Note that in JSON leading zeros like "00789" are not allowed.
|
450
|
-
# We will allow all leading zeros here though and at the end of parse_number
|
451
|
-
# check against trailing zeros and repair that if needed.
|
452
|
-
# Leading zeros can have meaning, so we should not clear them.
|
453
|
-
@index += 1 while digit?(@json[@index])
|
454
|
-
|
455
|
-
if @json[@index] == '.'
|
456
|
-
@index += 1
|
457
|
-
if at_end_of_number?
|
458
|
-
repair_number_ending_with_numeric_symbol(start)
|
459
|
-
return true
|
460
|
-
end
|
461
|
-
unless digit?(@json[@index])
|
462
|
-
@index = start
|
463
|
-
return false
|
464
|
-
end
|
465
|
-
@index += 1 while digit?(@json[@index])
|
466
|
-
end
|
467
|
-
|
468
|
-
if @json[@index] && @json[@index].downcase == 'e'
|
469
|
-
@index += 1
|
470
|
-
@index += 1 if ['-', '+'].include?(@json[@index])
|
471
|
-
if at_end_of_number?
|
472
|
-
repair_number_ending_with_numeric_symbol(start)
|
473
|
-
return true
|
474
|
-
end
|
475
|
-
unless digit?(@json[@index])
|
476
|
-
@index = start
|
477
|
-
return false
|
478
|
-
end
|
479
|
-
@index += 1 while digit?(@json[@index])
|
480
|
-
end
|
481
|
-
|
482
|
-
# if we're not at the end of the number by this point, allow this to be parsed as another type
|
483
|
-
unless at_end_of_number?
|
484
|
-
@index = start
|
485
|
-
return false
|
486
|
-
end
|
487
|
-
|
488
|
-
if @index > start
|
489
|
-
# repair a number with leading zeros like "00789"
|
490
|
-
num = @json[start...@index]
|
491
|
-
has_invalid_leading_zero = num.match?(/^0\d/)
|
492
|
-
|
493
|
-
@output += has_invalid_leading_zero ? "\"#{num}\"" : num
|
494
|
-
return true
|
495
|
-
end
|
496
|
-
|
497
|
-
false
|
498
|
-
end
|
499
|
-
|
500
|
-
def at_end_of_number?
|
501
|
-
@index >= @json.length || delimiter?(@json[@index]) || whitespace?(@json[@index])
|
502
|
-
end
|
503
|
-
|
504
|
-
# Parse an array like '["item1", "item2", ...]'
|
505
|
-
def parse_array
|
506
|
-
if @json[@index] == OPENING_BRACKET
|
507
|
-
@output += '['
|
508
|
-
@index += 1
|
509
|
-
parse_whitespace_and_skip_comments
|
510
|
-
|
511
|
-
# repair: skip leading comma like in [,1,2,3]
|
512
|
-
parse_whitespace_and_skip_comments if skip_character(COMMA)
|
513
|
-
|
514
|
-
initial = true
|
515
|
-
while @index < @json.length && @json[@index] != CLOSING_BRACKET
|
516
|
-
if initial
|
517
|
-
initial = false
|
518
|
-
else
|
519
|
-
processed_comma = parse_character(COMMA)
|
520
|
-
# repair missing comma
|
521
|
-
@output = insert_before_last_whitespace(@output, ',') unless processed_comma
|
522
|
-
end
|
523
|
-
|
524
|
-
skip_ellipsis
|
525
|
-
|
526
|
-
processed_value = parse_value
|
527
|
-
next if processed_value
|
528
|
-
|
529
|
-
# repair trailing comma
|
530
|
-
@output = strip_last_occurrence(@output, ',')
|
531
|
-
break
|
532
|
-
end
|
533
|
-
|
534
|
-
if @json[@index] == CLOSING_BRACKET
|
535
|
-
@output += ']'
|
536
|
-
@index += 1
|
537
|
-
else
|
538
|
-
# repair missing closing array bracket
|
539
|
-
@output = insert_before_last_whitespace(@output, ']')
|
540
|
-
end
|
541
|
-
|
542
|
-
true
|
543
|
-
else
|
544
|
-
false
|
545
|
-
end
|
546
|
-
end
|
547
|
-
|
548
|
-
def prev_non_whitespace_index(start)
|
549
|
-
prev = start
|
550
|
-
prev -= 1 while prev.positive? && whitespace?(@json[prev])
|
551
|
-
prev
|
552
|
-
end
|
553
|
-
|
554
|
-
# Repair concatenated strings like "hello" + "world", change this into "helloworld"
|
555
|
-
def parse_concatenated_string
|
556
|
-
processed = false
|
557
|
-
|
558
|
-
parse_whitespace_and_skip_comments
|
559
|
-
while @json[@index] == PLUS
|
560
|
-
processed = true
|
561
|
-
@index += 1
|
562
|
-
parse_whitespace_and_skip_comments
|
563
|
-
|
564
|
-
# repair: remove the end quote of the first string
|
565
|
-
@output = strip_last_occurrence(@output, '"', strip_remaining_text: true)
|
566
|
-
start = @output.length
|
567
|
-
parsed_str = parse_string
|
568
|
-
@output = if parsed_str
|
569
|
-
# repair: remove the start quote of the second string
|
570
|
-
remove_at_index(@output, start, 1)
|
571
|
-
else
|
572
|
-
# repair: remove the '+' because it is not followed by a string
|
573
|
-
insert_before_last_whitespace(@output, '"')
|
574
|
-
end
|
575
|
-
end
|
576
|
-
|
577
|
-
processed
|
578
|
-
end
|
579
|
-
|
580
|
-
def repair_number_ending_with_numeric_symbol(start)
|
581
|
-
# repair numbers cut off at the end
|
582
|
-
# this will only be called when we end after a '.', '-', or 'e' and does not
|
583
|
-
# change the number more than it needs to make it valid JSON
|
584
|
-
@output += "#{@json[start...@index]}0"
|
585
|
-
end
|
586
|
-
|
587
|
-
# Parse and repair Newline Delimited JSON (NDJSON):
|
588
|
-
# multiple JSON objects separated by a newline character
|
589
|
-
def parse_newline_delimited_json
|
590
|
-
# repair NDJSON
|
591
|
-
initial = true
|
592
|
-
processed_value = true
|
593
|
-
while processed_value
|
594
|
-
if initial
|
595
|
-
initial = false
|
596
|
-
else
|
597
|
-
# parse optional comma, insert when missing
|
598
|
-
processed_comma = parse_character(COMMA)
|
599
|
-
unless processed_comma
|
600
|
-
# repair: add missing comma
|
601
|
-
@output = insert_before_last_whitespace(@output, ',')
|
602
|
-
end
|
603
|
-
end
|
604
|
-
|
605
|
-
processed_value = parse_value
|
606
|
-
end
|
607
|
-
|
608
|
-
unless processed_value
|
609
|
-
# repair: remove trailing comma
|
610
|
-
@output = strip_last_occurrence(@output, ',')
|
611
|
-
end
|
612
|
-
|
613
|
-
# repair: wrap the output inside array brackets
|
614
|
-
@output = "[\n#{@output}\n]"
|
615
|
-
end
|
616
|
-
|
617
|
-
def skip_escape_character
|
618
|
-
skip_character(BACKSLASH)
|
619
|
-
end
|
620
|
-
|
621
|
-
def throw_invalid_character(char)
|
622
|
-
raise JSONRepairError, "Invalid character #{char.inspect} at index #{@index}"
|
623
|
-
end
|
624
|
-
|
625
|
-
def throw_unexpected_character
|
626
|
-
raise JSONRepairError, "Unexpected character #{@json[@index].inspect} at index #{@index}"
|
627
|
-
end
|
628
|
-
|
629
|
-
def throw_unexpected_end
|
630
|
-
raise JSONRepairError, 'Unexpected end of json string'
|
631
|
-
end
|
632
|
-
|
633
|
-
def throw_object_key_expected
|
634
|
-
raise JSONRepairError, 'Object key expected'
|
635
|
-
end
|
636
|
-
|
637
|
-
def throw_colon_expected
|
638
|
-
raise JSONRepairError, 'Colon expected'
|
639
|
-
end
|
640
|
-
|
641
|
-
def throw_invalid_unicode_character
|
642
|
-
chars = @json[@index, 6]
|
643
|
-
raise JSONRepairError, "Invalid unicode character #{chars.inspect} at index #{@index}"
|
644
|
-
end
|
645
|
-
end
|
646
|
-
end
|
647
|
-
end
|