json-repair 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +0 -0
- data/README.md +1 -1
- data/Steepfile +6 -0
- data/lib/json/repair/string_utils.rb +45 -20
- data/lib/json/repair/version.rb +1 -1
- data/lib/json/repairer.rb +295 -174
- data/sig/json/repair/string_utils.rbs +165 -0
- data/sig/json/repair.rbs +5 -2
- data/sig/json/repairer.rbs +103 -0
- metadata +6 -6
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: db2b6fb7849a2e75329405c1f85fa7de836b0fa2f079623032571f42d359514d
|
|
4
|
+
data.tar.gz: 1c845714c4c443bad3c9277a2ceae6cef8ff346125f52f89473aaa50b9ff2132
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 53929154af31033e2f380ed89979430f4339c97c94c088b6f85da27ac251d658b98840e44085c4ba9b4972bab75c1bb0f8ad750beddd4bb79e439efb135e0386
|
|
7
|
+
data.tar.gz: b4b5150aee81c518eaee8847bb2f5d8d8131a15719bb93badce465a2d447ddc361888155b2d33125fdd69d2568424c08440772c61a7f7f5b35922a4d1270adf8
|
data/CHANGELOG.md
CHANGED
|
Binary file
|
data/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# JSON::Repair [](https://badge.fury.io/rb/json-repair) [](https://github.com/sashazykov/json-repair-rb/actions)
|
|
1
|
+
# JSON::Repair [](https://badge.fury.io/rb/json-repair) [](https://github.com/sashazykov/json-repair-rb/actions) [](https://stand-with-ukraine.pp.ua)
|
|
2
2
|
|
|
3
3
|
This is a Ruby gem designed to repair broken JSON strings. Inspired by and based on the [jsonrepair js library](https://github.com/josdejong/jsonrepair/). It efficiently handles and corrects malformed JSON data, making it especially useful in scenarios where JSON output from LLMs might not strictly adhere to JSON standards. Whether it's missing quotes, misplaced commas, or unexpected characters, it ensures that the JSON data is valid and can be parsed correctly.
|
|
4
4
|
|
data/Steepfile
ADDED
|
@@ -35,21 +35,28 @@ module JSON
|
|
|
35
35
|
LOWERCASE_E = 'e' # 0x65
|
|
36
36
|
UPPERCASE_F = 'F' # 0x46
|
|
37
37
|
LOWERCASE_F = 'f' # 0x66
|
|
38
|
-
NON_BREAKING_SPACE =
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
38
|
+
NON_BREAKING_SPACE = ' ' # 0xa0
|
|
39
|
+
MONGOLIAN_VOWEL_SEPARATOR = '' # 0x180e
|
|
40
|
+
EN_QUAD = ' ' # 0x2000
|
|
41
|
+
ZERO_WIDTH_SPACE = '' # 0x200b
|
|
42
|
+
NARROW_NO_BREAK_SPACE = ' ' # 0x202f
|
|
43
|
+
MEDIUM_MATHEMATICAL_SPACE = ' ' # 0x205f
|
|
44
|
+
IDEOGRAPHIC_SPACE = ' ' # 0x3000
|
|
45
|
+
ZERO_WIDTH_NO_BREAK_SPACE = '' # 0xfeff
|
|
46
|
+
DOUBLE_QUOTE_LEFT = '“' # 0x201c
|
|
47
|
+
DOUBLE_QUOTE_RIGHT = '”' # 0x201d
|
|
48
|
+
QUOTE_LEFT = '‘' # 0x2018
|
|
49
|
+
QUOTE_RIGHT = '’' # 0x2019
|
|
48
50
|
GRAVE_ACCENT = '`' # 0x0060
|
|
49
|
-
ACUTE_ACCENT =
|
|
51
|
+
ACUTE_ACCENT = '´' # 0x00b4
|
|
50
52
|
|
|
51
53
|
REGEX_DELIMITER = %r{^[,:\[\]/{}()\n+]+$}
|
|
54
|
+
REGEX_UNQUOTED_STRING_DELIMITER = %r{^[,\[\]/{}\n+]+$}
|
|
52
55
|
REGEX_START_OF_VALUE = /^[\[{\w-]$/
|
|
56
|
+
# matches "https://" and other schemas
|
|
57
|
+
REGEX_URL_START = %r{^(http|https|ftp|mailto|file|data|irc)://$}
|
|
58
|
+
# matches all valid URL characters EXCEPT "[", "]", and "," (important JSON delimiters)
|
|
59
|
+
REGEX_URL_CHAR = %r{^[A-Za-z0-9\-._~:/?#@!$&'()*+;=]$}
|
|
53
60
|
|
|
54
61
|
# Functions to check character chars
|
|
55
62
|
def hex?(char)
|
|
@@ -70,8 +77,19 @@ module JSON
|
|
|
70
77
|
REGEX_DELIMITER.match?(char)
|
|
71
78
|
end
|
|
72
79
|
|
|
73
|
-
def
|
|
74
|
-
|
|
80
|
+
def unquoted_string_delimiter?(char)
|
|
81
|
+
REGEX_UNQUOTED_STRING_DELIMITER.match?(char)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
REGEX_FUNCTION_NAME_CHAR_START = /\A[a-zA-Z_$]\z/
|
|
85
|
+
REGEX_FUNCTION_NAME_CHAR = /\A[a-zA-Z0-9_$]\z/
|
|
86
|
+
|
|
87
|
+
def function_name_char_start?(char)
|
|
88
|
+
!char.nil? && REGEX_FUNCTION_NAME_CHAR_START.match?(char)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def function_name_char?(char)
|
|
92
|
+
!char.nil? && REGEX_FUNCTION_NAME_CHAR.match?(char)
|
|
75
93
|
end
|
|
76
94
|
|
|
77
95
|
def start_of_value?(char)
|
|
@@ -86,11 +104,22 @@ module JSON
|
|
|
86
104
|
[SPACE, NEWLINE, TAB, RETURN].include?(char)
|
|
87
105
|
end
|
|
88
106
|
|
|
107
|
+
def whitespace_except_newline?(char)
|
|
108
|
+
[SPACE, TAB, RETURN].include?(char)
|
|
109
|
+
end
|
|
110
|
+
|
|
89
111
|
def special_whitespace?(char)
|
|
112
|
+
return false unless char
|
|
113
|
+
|
|
90
114
|
[
|
|
91
|
-
NON_BREAKING_SPACE,
|
|
115
|
+
NON_BREAKING_SPACE,
|
|
116
|
+
MONGOLIAN_VOWEL_SEPARATOR,
|
|
117
|
+
NARROW_NO_BREAK_SPACE,
|
|
118
|
+
MEDIUM_MATHEMATICAL_SPACE,
|
|
119
|
+
IDEOGRAPHIC_SPACE,
|
|
120
|
+
ZERO_WIDTH_NO_BREAK_SPACE
|
|
92
121
|
].include?(char) ||
|
|
93
|
-
(char >= EN_QUAD && char <=
|
|
122
|
+
(char >= EN_QUAD && char <= ZERO_WIDTH_SPACE)
|
|
94
123
|
end
|
|
95
124
|
|
|
96
125
|
def quote?(char)
|
|
@@ -149,7 +178,7 @@ module JSON
|
|
|
149
178
|
|
|
150
179
|
def parse_keyword(name, value)
|
|
151
180
|
if @json[@index, name.length] == name
|
|
152
|
-
@output
|
|
181
|
+
@output << value
|
|
153
182
|
@index += name.length
|
|
154
183
|
true
|
|
155
184
|
else
|
|
@@ -161,10 +190,6 @@ module JSON
|
|
|
161
190
|
text[0...start] + text[start + count..]
|
|
162
191
|
end
|
|
163
192
|
|
|
164
|
-
def function_name?(text)
|
|
165
|
-
/^\w+$/.match?(text)
|
|
166
|
-
end
|
|
167
|
-
|
|
168
193
|
def ends_with_comma_or_newline?(text)
|
|
169
194
|
/[,\n][ \t\r]*$/.match?(text)
|
|
170
195
|
end
|
data/lib/json/repair/version.rb
CHANGED
data/lib/json/repairer.rb
CHANGED
|
@@ -25,17 +25,24 @@ module JSON
|
|
|
25
25
|
't' => "\t"
|
|
26
26
|
}.freeze
|
|
27
27
|
|
|
28
|
+
MARKDOWN_OPEN_BLOCKS = ['```', '[```', '{```'].freeze
|
|
29
|
+
MARKDOWN_CLOSE_BLOCKS = ['```', '```]', '```}'].freeze
|
|
30
|
+
|
|
28
31
|
def initialize(json)
|
|
29
32
|
@json = json
|
|
30
33
|
@index = 0
|
|
31
|
-
@output = ''
|
|
34
|
+
@output = +''
|
|
32
35
|
end
|
|
33
36
|
|
|
34
37
|
def repair
|
|
38
|
+
parse_markdown_code_block(MARKDOWN_OPEN_BLOCKS)
|
|
39
|
+
|
|
35
40
|
processed = parse_value
|
|
36
41
|
|
|
37
42
|
throw_unexpected_end unless processed
|
|
38
43
|
|
|
44
|
+
parse_markdown_code_block(MARKDOWN_CLOSE_BLOCKS)
|
|
45
|
+
|
|
39
46
|
processed_comma = parse_character(COMMA)
|
|
40
47
|
parse_whitespace_and_skip_comments if processed_comma
|
|
41
48
|
|
|
@@ -71,22 +78,45 @@ module JSON
|
|
|
71
78
|
|
|
72
79
|
def parse_value
|
|
73
80
|
parse_whitespace_and_skip_comments
|
|
74
|
-
process = parse_object ||
|
|
81
|
+
process = parse_object ||
|
|
82
|
+
parse_array ||
|
|
83
|
+
parse_string ||
|
|
84
|
+
parse_number ||
|
|
85
|
+
parse_keywords ||
|
|
86
|
+
parse_unquoted_string(false) ||
|
|
87
|
+
parse_regex
|
|
75
88
|
parse_whitespace_and_skip_comments
|
|
76
89
|
|
|
77
90
|
process
|
|
78
91
|
end
|
|
79
92
|
|
|
80
|
-
def
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
93
|
+
def parse_whitespace_and_skip_comments(skip_newline: true)
|
|
94
|
+
start = @index
|
|
95
|
+
|
|
96
|
+
changed = parse_whitespace(skip_newline: skip_newline)
|
|
97
|
+
loop do
|
|
98
|
+
changed = parse_comment
|
|
99
|
+
changed = parse_whitespace(skip_newline: skip_newline) if changed
|
|
100
|
+
break unless changed
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
@index > start
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def parse_whitespace(skip_newline: true)
|
|
107
|
+
whitespace = +''
|
|
108
|
+
while @json[@index] && (
|
|
109
|
+
(skip_newline ? whitespace?(@json[@index]) : whitespace_except_newline?(@json[@index])) ||
|
|
110
|
+
special_whitespace?(@json[@index])
|
|
111
|
+
)
|
|
112
|
+
ws = skip_newline ? whitespace?(@json[@index]) : whitespace_except_newline?(@json[@index])
|
|
113
|
+
whitespace << (ws ? @json[@index] : ' ')
|
|
84
114
|
|
|
85
115
|
@index += 1
|
|
86
116
|
end
|
|
87
117
|
|
|
88
118
|
unless whitespace.empty?
|
|
89
|
-
@output
|
|
119
|
+
@output << whitespace
|
|
90
120
|
return true
|
|
91
121
|
end
|
|
92
122
|
|
|
@@ -110,11 +140,41 @@ module JSON
|
|
|
110
140
|
end
|
|
111
141
|
end
|
|
112
142
|
|
|
143
|
+
# Find and skip over a Markdown fenced code block:
|
|
144
|
+
# ``` ... ```
|
|
145
|
+
# or
|
|
146
|
+
# ```json ... ```
|
|
147
|
+
def parse_markdown_code_block(blocks)
|
|
148
|
+
return false unless skip_markdown_code_block(blocks)
|
|
149
|
+
|
|
150
|
+
if function_name_char_start?(@json[@index])
|
|
151
|
+
# strip the optional language specifier like "json"
|
|
152
|
+
@index += 1 while @index < @json.length && function_name_char?(@json[@index])
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
parse_whitespace_and_skip_comments
|
|
156
|
+
|
|
157
|
+
true
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def skip_markdown_code_block(blocks)
|
|
161
|
+
parse_whitespace(skip_newline: true)
|
|
162
|
+
|
|
163
|
+
blocks.each do |block|
|
|
164
|
+
if @json[@index, block.length] == block
|
|
165
|
+
@index += block.length
|
|
166
|
+
return true
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
false
|
|
171
|
+
end
|
|
172
|
+
|
|
113
173
|
# Parse an object like '{"key": "value"}'
|
|
114
174
|
def parse_object
|
|
115
175
|
return false unless @json[@index] == OPENING_BRACE
|
|
116
176
|
|
|
117
|
-
@output
|
|
177
|
+
@output << '{'
|
|
118
178
|
@index += 1
|
|
119
179
|
parse_whitespace_and_skip_comments
|
|
120
180
|
|
|
@@ -137,7 +197,7 @@ module JSON
|
|
|
137
197
|
|
|
138
198
|
skip_ellipsis
|
|
139
199
|
|
|
140
|
-
processed_key = parse_string || parse_unquoted_string
|
|
200
|
+
processed_key = parse_string || parse_unquoted_string(true)
|
|
141
201
|
unless processed_key
|
|
142
202
|
if @json[@index] == CLOSING_BRACE || @json[@index] == OPENING_BRACE ||
|
|
143
203
|
@json[@index] == CLOSING_BRACKET || @json[@index] == OPENING_BRACKET ||
|
|
@@ -166,7 +226,7 @@ module JSON
|
|
|
166
226
|
unless processed_value
|
|
167
227
|
if processed_colon || truncated_text
|
|
168
228
|
# repair missing object value
|
|
169
|
-
@output
|
|
229
|
+
@output << 'null'
|
|
170
230
|
else
|
|
171
231
|
throw_colon_expected
|
|
172
232
|
end
|
|
@@ -174,7 +234,7 @@ module JSON
|
|
|
174
234
|
end
|
|
175
235
|
|
|
176
236
|
if @json[@index] == CLOSING_BRACE
|
|
177
|
-
@output
|
|
237
|
+
@output << '}'
|
|
178
238
|
@index += 1
|
|
179
239
|
else
|
|
180
240
|
# repair missing end bracket
|
|
@@ -217,199 +277,273 @@ module JSON
|
|
|
217
277
|
# - If it turns out that the string does not have a valid end quote followed
|
|
218
278
|
# by a delimiter (which should be the case), the function runs again in a
|
|
219
279
|
# more conservative way, stopping the string at the first next delimiter
|
|
220
|
-
# and fixing the string by inserting a quote there
|
|
221
|
-
|
|
222
|
-
|
|
280
|
+
# and fixing the string by inserting a quote there, or stopping at a
|
|
281
|
+
# stop index detected in the first iteration.
|
|
282
|
+
def parse_string(stop_at_delimiter: false, stop_at_index: -1)
|
|
283
|
+
skip_escape_chars = @json[@index] == BACKSLASH
|
|
284
|
+
if skip_escape_chars
|
|
223
285
|
# repair: remove the first escape character
|
|
224
286
|
@index += 1
|
|
225
|
-
skip_escape_chars = true
|
|
226
287
|
end
|
|
227
288
|
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
289
|
+
return false unless quote?(@json[@index])
|
|
290
|
+
|
|
291
|
+
# double quotes are correct JSON,
|
|
292
|
+
# single quotes come from JavaScript for example, we assume it will have a correct single end quote too
|
|
293
|
+
# otherwise, we will match any double-quote-like start with a double-quote-like end,
|
|
294
|
+
# or any single-quote-like start with a single-quote-like end
|
|
295
|
+
is_end_quote = if double_quote?(@json[@index])
|
|
296
|
+
method(:double_quote?)
|
|
297
|
+
elsif single_quote?(@json[@index])
|
|
298
|
+
method(:single_quote?)
|
|
299
|
+
elsif single_quote_like?(@json[@index])
|
|
300
|
+
method(:single_quote_like?)
|
|
301
|
+
else
|
|
302
|
+
method(:double_quote_like?)
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
i_before = @index
|
|
306
|
+
o_before = @output.length
|
|
307
|
+
|
|
308
|
+
str = +'"'
|
|
309
|
+
@index += 1
|
|
248
310
|
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
311
|
+
loop do
|
|
312
|
+
if @index >= @json.length
|
|
313
|
+
# end of text, we are missing an end quote
|
|
314
|
+
|
|
315
|
+
i_prev = prev_non_whitespace_index(@index - 1)
|
|
316
|
+
if !stop_at_delimiter && delimiter?(@json[i_prev])
|
|
317
|
+
# if the text ends with a delimiter, like ["hello],
|
|
318
|
+
# so the missing end quote should be inserted before this delimiter
|
|
319
|
+
# retry parsing the string, stopping at the first next delimiter
|
|
320
|
+
@index = i_before
|
|
321
|
+
@output = @output[0...o_before]
|
|
322
|
+
|
|
323
|
+
return parse_string(stop_at_delimiter: true)
|
|
324
|
+
end
|
|
252
325
|
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
# so the missing end quote should be inserted before this delimiter
|
|
257
|
-
# retry parsing the string, stopping at the first next delimiter
|
|
258
|
-
@index = i_before
|
|
259
|
-
@output = @output[0...o_before]
|
|
326
|
+
# repair missing quote
|
|
327
|
+
str = insert_before_last_whitespace(str, '"')
|
|
328
|
+
@output << str
|
|
260
329
|
|
|
261
|
-
|
|
262
|
-
|
|
330
|
+
return true
|
|
331
|
+
end
|
|
263
332
|
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
333
|
+
if @index == stop_at_index
|
|
334
|
+
# use the stop index detected in the first iteration, and repair end quote
|
|
335
|
+
str = insert_before_last_whitespace(str, '"')
|
|
336
|
+
@output << str
|
|
267
337
|
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
# end quote
|
|
271
|
-
i_quote = @index
|
|
272
|
-
o_quote = str.length
|
|
273
|
-
str += '"'
|
|
274
|
-
@index += 1
|
|
275
|
-
@output += str
|
|
338
|
+
return true
|
|
339
|
+
end
|
|
276
340
|
|
|
277
|
-
|
|
341
|
+
if is_end_quote.call(@json[@index])
|
|
342
|
+
# end quote
|
|
343
|
+
# let us check what is before and after the quote to verify whether this is a legit end quote
|
|
344
|
+
i_quote = @index
|
|
345
|
+
o_quote = str.length
|
|
346
|
+
str << '"'
|
|
347
|
+
@index += 1
|
|
348
|
+
@output << str
|
|
278
349
|
|
|
279
|
-
|
|
280
|
-
@index >= @json.length ||
|
|
281
|
-
delimiter?(@json[@index]) ||
|
|
282
|
-
quote?(@json[@index]) ||
|
|
283
|
-
digit?(@json[@index])
|
|
284
|
-
# The quote is followed by the end of the text, a delimiter, or a next value
|
|
285
|
-
parse_concatenated_string
|
|
350
|
+
parse_whitespace_and_skip_comments(skip_newline: false)
|
|
286
351
|
|
|
287
|
-
|
|
288
|
-
|
|
352
|
+
if stop_at_delimiter ||
|
|
353
|
+
@index >= @json.length ||
|
|
354
|
+
delimiter?(@json[@index]) ||
|
|
355
|
+
quote?(@json[@index]) ||
|
|
356
|
+
digit?(@json[@index])
|
|
357
|
+
# The quote is followed by the end of the text, a delimiter, or a next value
|
|
358
|
+
parse_concatenated_string
|
|
289
359
|
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
# and NOT followed by a delimiter. So, there is an end quote missing
|
|
293
|
-
# parse the string again and then stop at the first next delimiter
|
|
294
|
-
@index = i_before
|
|
295
|
-
@output = @output[...o_before]
|
|
360
|
+
return true
|
|
361
|
+
end
|
|
296
362
|
|
|
297
|
-
|
|
298
|
-
|
|
363
|
+
i_prev_char = prev_non_whitespace_index(i_quote - 1)
|
|
364
|
+
prev_char = @json[i_prev_char]
|
|
365
|
+
|
|
366
|
+
if prev_char == ','
|
|
367
|
+
# A comma followed by a quote, like '{"a":"b,c,"d":"e"}'.
|
|
368
|
+
# We assume that the quote is a start quote, and that the end quote
|
|
369
|
+
# should have been located right before the comma but is missing.
|
|
370
|
+
@index = i_before
|
|
371
|
+
@output = @output[0...o_before]
|
|
299
372
|
|
|
300
|
-
|
|
373
|
+
return parse_string(stop_at_delimiter: false, stop_at_index: i_prev_char)
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
if delimiter?(prev_char)
|
|
377
|
+
# This is not the right end quote: it is preceded by a delimiter,
|
|
378
|
+
# and NOT followed by a delimiter. So, there is an end quote missing
|
|
379
|
+
# parse the string again and then stop at the first next delimiter
|
|
380
|
+
@index = i_before
|
|
301
381
|
@output = @output[...o_before]
|
|
302
|
-
@index = i_quote + 1
|
|
303
382
|
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
elsif stop_at_delimiter && delimiter?(@json[@index])
|
|
307
|
-
# we're in the mode to stop the string at the first delimiter
|
|
308
|
-
# because there is an end quote missing
|
|
383
|
+
return parse_string(stop_at_delimiter: true)
|
|
384
|
+
end
|
|
309
385
|
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
386
|
+
# revert to right after the quote but before any whitespace, and continue parsing the string
|
|
387
|
+
@output = @output[...o_before]
|
|
388
|
+
@index = i_quote + 1
|
|
389
|
+
|
|
390
|
+
# repair unescaped quote
|
|
391
|
+
str = "#{str[...o_quote]}\\#{str[o_quote..]}"
|
|
392
|
+
elsif stop_at_delimiter && unquoted_string_delimiter?(@json[@index])
|
|
393
|
+
# we're in the mode to stop the string at the first delimiter
|
|
394
|
+
# because there is an end quote missing
|
|
395
|
+
|
|
396
|
+
# test start of an url like "https://..." (this would be parsed as a comment)
|
|
397
|
+
if @json[@index - 1] == ':' &&
|
|
398
|
+
REGEX_URL_START.match?(@json[(i_before + 1)..(@index + 1)] || '')
|
|
399
|
+
while @index < @json.length && REGEX_URL_CHAR.match?(@json[@index])
|
|
400
|
+
str << @json[@index]
|
|
401
|
+
@index += 1
|
|
402
|
+
end
|
|
403
|
+
end
|
|
313
404
|
|
|
314
|
-
|
|
405
|
+
# repair missing quote
|
|
406
|
+
str = insert_before_last_whitespace(str, '"')
|
|
407
|
+
@output << str
|
|
315
408
|
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
end
|
|
409
|
+
parse_concatenated_string
|
|
410
|
+
|
|
411
|
+
return true
|
|
412
|
+
elsif @json[@index] == BACKSLASH
|
|
413
|
+
# handle escaped content like \n or ★
|
|
414
|
+
char = @json[@index + 1]
|
|
415
|
+
escape_char = ESCAPE_CHARACTERS[char]
|
|
416
|
+
if escape_char
|
|
417
|
+
str << @json[@index, 2]
|
|
418
|
+
@index += 2
|
|
419
|
+
elsif char == 'u'
|
|
420
|
+
j = 2
|
|
421
|
+
j += 1 while j < 6 && @json[@index + j] && hex?(@json[@index + j])
|
|
422
|
+
if j == 6
|
|
423
|
+
str << @json[@index, 6]
|
|
424
|
+
@index += 6
|
|
425
|
+
elsif @index + j >= @json.length
|
|
426
|
+
# repair invalid or truncated unicode char at the end of the text
|
|
427
|
+
# by removing the unicode char and ending the string here
|
|
428
|
+
@index = @json.length
|
|
337
429
|
else
|
|
338
|
-
|
|
339
|
-
str += char
|
|
340
|
-
@index += 2
|
|
430
|
+
throw_invalid_unicode_character
|
|
341
431
|
end
|
|
432
|
+
elsif char == "\n"
|
|
433
|
+
# repair a backslash escaped newline (like in Bash scripts)
|
|
434
|
+
str << '\n'
|
|
435
|
+
@index += 2
|
|
342
436
|
else
|
|
343
|
-
#
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
if char == DOUBLE_QUOTE && @json[@index - 1] != BACKSLASH
|
|
347
|
-
# repair unescaped double quote
|
|
348
|
-
str += "\\#{char}"
|
|
349
|
-
elsif control_character?(char)
|
|
350
|
-
# unescaped control character
|
|
351
|
-
str += CONTROL_CHARACTERS[char]
|
|
352
|
-
else
|
|
353
|
-
throw_invalid_character(char) unless valid_string_character?(char)
|
|
354
|
-
str += char
|
|
355
|
-
end
|
|
356
|
-
|
|
357
|
-
@index += 1
|
|
437
|
+
# repair invalid escape character: remove it
|
|
438
|
+
str << char
|
|
439
|
+
@index += 2
|
|
358
440
|
end
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
441
|
+
else
|
|
442
|
+
# handle regular characters
|
|
443
|
+
char = @json[@index]
|
|
444
|
+
|
|
445
|
+
if char == DOUBLE_QUOTE && @json[@index - 1] != BACKSLASH
|
|
446
|
+
# repair unescaped double quote
|
|
447
|
+
str << "\\#{char}"
|
|
448
|
+
elsif control_character?(char)
|
|
449
|
+
# unescaped control character
|
|
450
|
+
str << CONTROL_CHARACTERS[char]
|
|
451
|
+
else
|
|
452
|
+
throw_invalid_character(char) unless valid_string_character?(char)
|
|
453
|
+
str << char
|
|
363
454
|
end
|
|
455
|
+
@index += 1
|
|
364
456
|
end
|
|
365
|
-
end
|
|
366
457
|
|
|
367
|
-
|
|
458
|
+
if skip_escape_chars
|
|
459
|
+
# repair: skipped escape character (nothing to do)
|
|
460
|
+
skip_escape_character
|
|
461
|
+
end
|
|
462
|
+
end
|
|
368
463
|
end
|
|
369
464
|
|
|
370
465
|
# Repair an unquoted string by adding quotes around it
|
|
371
466
|
# Repair a MongoDB function call like NumberLong("2")
|
|
372
467
|
# Repair a JSONP function call like callback({...});
|
|
373
|
-
def parse_unquoted_string
|
|
468
|
+
def parse_unquoted_string(is_key)
|
|
469
|
+
# NOTE: that the symbol can end with whitespaces: we stop at the next delimiter
|
|
470
|
+
# also, note that we allow strings to contain a slash / in order to support repairing regular expressions
|
|
374
471
|
start = @index
|
|
375
|
-
@index += 1 while @index < @json.length && !delimiter_except_slash?(@json[@index]) && !quote?(@json[@index])
|
|
376
|
-
return if @index <= start
|
|
377
472
|
|
|
378
|
-
if
|
|
379
|
-
|
|
380
|
-
# Repair a JSONP function call like callback({...});
|
|
381
|
-
@index += 1
|
|
473
|
+
if function_name_char_start?(@json[@index])
|
|
474
|
+
@index += 1 while @index < @json.length && function_name_char?(@json[@index])
|
|
382
475
|
|
|
383
|
-
|
|
476
|
+
j = @index
|
|
477
|
+
j += 1 while whitespace?(@json[j])
|
|
384
478
|
|
|
385
|
-
if @json[
|
|
386
|
-
#
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
@index += 1 if @json[@index] == ';'
|
|
390
|
-
end
|
|
391
|
-
else
|
|
392
|
-
# Repair unquoted string
|
|
393
|
-
# Also, repair undefined into null
|
|
479
|
+
if @json[j] == '('
|
|
480
|
+
# repair a MongoDB function call like NumberLong("2")
|
|
481
|
+
# repair a JSONP function call like callback({...});
|
|
482
|
+
@index = j + 1
|
|
394
483
|
|
|
395
|
-
|
|
396
|
-
@index -= 1 while whitespace?(@json[@index - 1]) && @index.positive?
|
|
484
|
+
parse_value
|
|
397
485
|
|
|
398
|
-
|
|
399
|
-
|
|
486
|
+
if @json[@index] == ')'
|
|
487
|
+
# Repair: skip close bracket of function call
|
|
488
|
+
@index += 1
|
|
489
|
+
# Repair: skip semicolon after JSONP call
|
|
490
|
+
@index += 1 if @json[@index] == ';'
|
|
491
|
+
end
|
|
400
492
|
|
|
401
|
-
|
|
402
|
-
# We had a missing start quote, but now we encountered the end quote, so we can skip that one
|
|
403
|
-
@index += 1
|
|
493
|
+
return true
|
|
404
494
|
end
|
|
405
495
|
end
|
|
406
496
|
|
|
497
|
+
while @index < @json.length &&
|
|
498
|
+
!unquoted_string_delimiter?(@json[@index]) &&
|
|
499
|
+
!quote?(@json[@index]) &&
|
|
500
|
+
(!is_key || @json[@index] != ':')
|
|
501
|
+
@index += 1
|
|
502
|
+
end
|
|
503
|
+
|
|
504
|
+
# test start of an url like "https://..." (this would be parsed as a comment)
|
|
505
|
+
if @json[@index - 1] == ':' &&
|
|
506
|
+
REGEX_URL_START.match?(@json[start...(@index + 2)] || '')
|
|
507
|
+
@index += 1 while @index < @json.length && REGEX_URL_CHAR.match?(@json[@index])
|
|
508
|
+
end
|
|
509
|
+
|
|
510
|
+
return false if @index <= start
|
|
511
|
+
|
|
512
|
+
# Repair unquoted string
|
|
513
|
+
# Also, repair undefined into null
|
|
514
|
+
|
|
515
|
+
# First, go back to prevent getting trailing whitespaces in the string
|
|
516
|
+
@index -= 1 while @index.positive? && whitespace?(@json[@index - 1])
|
|
517
|
+
|
|
518
|
+
symbol = @json[start...@index]
|
|
519
|
+
@output << (symbol == 'undefined' ? 'null' : symbol.inspect)
|
|
520
|
+
|
|
521
|
+
if @json[@index] == '"'
|
|
522
|
+
# We had a missing start quote, but now we encountered the end quote, so we can skip that one
|
|
523
|
+
@index += 1
|
|
524
|
+
end
|
|
525
|
+
|
|
526
|
+
true
|
|
527
|
+
end
|
|
528
|
+
|
|
529
|
+
# Parse a regular expression literal like /foo/ or /foo\/bar/
|
|
530
|
+
def parse_regex
|
|
531
|
+
return false unless @json[@index] == '/'
|
|
532
|
+
|
|
533
|
+
start = @index
|
|
534
|
+
@index += 1
|
|
535
|
+
|
|
536
|
+
@index += 1 while @index < @json.length && (@json[@index] != '/' || @json[@index - 1] == BACKSLASH)
|
|
537
|
+
@index += 1
|
|
538
|
+
|
|
539
|
+
@output << @json[start...@index].inspect
|
|
540
|
+
|
|
407
541
|
true
|
|
408
542
|
end
|
|
409
543
|
|
|
410
544
|
def parse_character(char)
|
|
411
545
|
if @json[@index] == char
|
|
412
|
-
@output
|
|
546
|
+
@output << @json[@index]
|
|
413
547
|
@index += 1
|
|
414
548
|
true
|
|
415
549
|
else
|
|
@@ -417,19 +551,6 @@ module JSON
|
|
|
417
551
|
end
|
|
418
552
|
end
|
|
419
553
|
|
|
420
|
-
def parse_whitespace_and_skip_comments
|
|
421
|
-
start = @index
|
|
422
|
-
|
|
423
|
-
changed = parse_whitespace
|
|
424
|
-
loop do
|
|
425
|
-
changed = parse_comment
|
|
426
|
-
changed = parse_whitespace if changed
|
|
427
|
-
break unless changed
|
|
428
|
-
end
|
|
429
|
-
|
|
430
|
-
@index > start
|
|
431
|
-
end
|
|
432
|
-
|
|
433
554
|
# Parse a number like 2.4 or 2.4e6
|
|
434
555
|
def parse_number
|
|
435
556
|
start = @index
|
|
@@ -489,7 +610,7 @@ module JSON
|
|
|
489
610
|
num = @json[start...@index]
|
|
490
611
|
has_invalid_leading_zero = num.match?(/^0\d/)
|
|
491
612
|
|
|
492
|
-
@output
|
|
613
|
+
@output << (has_invalid_leading_zero ? "\"#{num}\"" : num)
|
|
493
614
|
return true
|
|
494
615
|
end
|
|
495
616
|
|
|
@@ -503,7 +624,7 @@ module JSON
|
|
|
503
624
|
# Parse an array like '["item1", "item2", ...]'
|
|
504
625
|
def parse_array
|
|
505
626
|
if @json[@index] == OPENING_BRACKET
|
|
506
|
-
@output
|
|
627
|
+
@output << '['
|
|
507
628
|
@index += 1
|
|
508
629
|
parse_whitespace_and_skip_comments
|
|
509
630
|
|
|
@@ -531,7 +652,7 @@ module JSON
|
|
|
531
652
|
end
|
|
532
653
|
|
|
533
654
|
if @json[@index] == CLOSING_BRACKET
|
|
534
|
-
@output
|
|
655
|
+
@output << ']'
|
|
535
656
|
@index += 1
|
|
536
657
|
else
|
|
537
658
|
# repair missing closing array bracket
|
|
@@ -580,7 +701,7 @@ module JSON
|
|
|
580
701
|
# repair numbers cut off at the end
|
|
581
702
|
# this will only be called when we end after a '.', '-', or 'e' and does not
|
|
582
703
|
# change the number more than it needs to make it valid JSON
|
|
583
|
-
@output
|
|
704
|
+
@output << "#{@json[start...@index]}0"
|
|
584
705
|
end
|
|
585
706
|
|
|
586
707
|
# Parse and repair Newline Delimited JSON (NDJSON):
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
module JSON
|
|
2
|
+
module Repair
|
|
3
|
+
module StringUtils
|
|
4
|
+
@output: untyped
|
|
5
|
+
|
|
6
|
+
@index: untyped
|
|
7
|
+
|
|
8
|
+
# Constants for character chars
|
|
9
|
+
BACKSLASH: "\\"
|
|
10
|
+
|
|
11
|
+
SLASH: "/"
|
|
12
|
+
|
|
13
|
+
ASTERISK: "*"
|
|
14
|
+
|
|
15
|
+
OPENING_BRACE: "{"
|
|
16
|
+
|
|
17
|
+
CLOSING_BRACE: "}"
|
|
18
|
+
|
|
19
|
+
OPENING_BRACKET: "["
|
|
20
|
+
|
|
21
|
+
CLOSING_BRACKET: "]"
|
|
22
|
+
|
|
23
|
+
OPEN_PARENTHESIS: "("
|
|
24
|
+
|
|
25
|
+
CLOSE_PARENTHESIS: ")"
|
|
26
|
+
|
|
27
|
+
SPACE: " "
|
|
28
|
+
|
|
29
|
+
NEWLINE: "\n"
|
|
30
|
+
|
|
31
|
+
TAB: "\t"
|
|
32
|
+
|
|
33
|
+
RETURN: "\r"
|
|
34
|
+
|
|
35
|
+
BACKSPACE: "\b"
|
|
36
|
+
|
|
37
|
+
FORM_FEED: "\f"
|
|
38
|
+
|
|
39
|
+
DOUBLE_QUOTE: "\""
|
|
40
|
+
|
|
41
|
+
PLUS: "+"
|
|
42
|
+
|
|
43
|
+
MINUS: "-"
|
|
44
|
+
|
|
45
|
+
QUOTE: "'"
|
|
46
|
+
|
|
47
|
+
ZERO: "0"
|
|
48
|
+
|
|
49
|
+
NINE: "9"
|
|
50
|
+
|
|
51
|
+
COMMA: ","
|
|
52
|
+
|
|
53
|
+
DOT: "."
|
|
54
|
+
|
|
55
|
+
COLON: ":"
|
|
56
|
+
|
|
57
|
+
SEMICOLON: ";"
|
|
58
|
+
|
|
59
|
+
UPPERCASE_A: "A"
|
|
60
|
+
|
|
61
|
+
LOWERCASE_A: "a"
|
|
62
|
+
|
|
63
|
+
UPPERCASE_E: "E"
|
|
64
|
+
|
|
65
|
+
LOWERCASE_E: "e"
|
|
66
|
+
|
|
67
|
+
UPPERCASE_F: "F"
|
|
68
|
+
|
|
69
|
+
LOWERCASE_F: "f"
|
|
70
|
+
|
|
71
|
+
NON_BREAKING_SPACE: ::String
|
|
72
|
+
|
|
73
|
+
MONGOLIAN_VOWEL_SEPARATOR: ::String
|
|
74
|
+
|
|
75
|
+
EN_QUAD: ::String
|
|
76
|
+
|
|
77
|
+
ZERO_WIDTH_SPACE: ::String
|
|
78
|
+
|
|
79
|
+
NARROW_NO_BREAK_SPACE: ::String
|
|
80
|
+
|
|
81
|
+
MEDIUM_MATHEMATICAL_SPACE: ::String
|
|
82
|
+
|
|
83
|
+
IDEOGRAPHIC_SPACE: ::String
|
|
84
|
+
|
|
85
|
+
ZERO_WIDTH_NO_BREAK_SPACE: ::String
|
|
86
|
+
|
|
87
|
+
DOUBLE_QUOTE_LEFT: ::String
|
|
88
|
+
|
|
89
|
+
DOUBLE_QUOTE_RIGHT: ::String
|
|
90
|
+
|
|
91
|
+
QUOTE_LEFT: ::String
|
|
92
|
+
|
|
93
|
+
QUOTE_RIGHT: ::String
|
|
94
|
+
|
|
95
|
+
GRAVE_ACCENT: "`"
|
|
96
|
+
|
|
97
|
+
ACUTE_ACCENT: ::String
|
|
98
|
+
|
|
99
|
+
REGEX_DELIMITER: ::Regexp
|
|
100
|
+
|
|
101
|
+
REGEX_UNQUOTED_STRING_DELIMITER: ::Regexp
|
|
102
|
+
|
|
103
|
+
REGEX_START_OF_VALUE: ::Regexp
|
|
104
|
+
|
|
105
|
+
REGEX_URL_START: ::Regexp
|
|
106
|
+
|
|
107
|
+
REGEX_URL_CHAR: ::Regexp
|
|
108
|
+
|
|
109
|
+
REGEX_FUNCTION_NAME_CHAR_START: ::Regexp
|
|
110
|
+
|
|
111
|
+
REGEX_FUNCTION_NAME_CHAR: ::Regexp
|
|
112
|
+
|
|
113
|
+
# Functions to check character chars
|
|
114
|
+
def hex?: (untyped char) -> untyped
|
|
115
|
+
|
|
116
|
+
def digit?: (untyped char) -> untyped
|
|
117
|
+
|
|
118
|
+
def valid_string_character?: (untyped char) -> untyped
|
|
119
|
+
|
|
120
|
+
def delimiter?: (untyped char) -> untyped
|
|
121
|
+
|
|
122
|
+
def unquoted_string_delimiter?: (untyped char) -> untyped
|
|
123
|
+
|
|
124
|
+
def function_name_char_start?: (untyped char) -> untyped
|
|
125
|
+
|
|
126
|
+
def function_name_char?: (untyped char) -> untyped
|
|
127
|
+
|
|
128
|
+
def start_of_value?: (untyped char) -> untyped
|
|
129
|
+
|
|
130
|
+
def control_character?: (untyped char) -> untyped
|
|
131
|
+
|
|
132
|
+
def whitespace?: (untyped char) -> untyped
|
|
133
|
+
|
|
134
|
+
def whitespace_except_newline?: (untyped char) -> untyped
|
|
135
|
+
|
|
136
|
+
def special_whitespace?: (untyped char) -> untyped
|
|
137
|
+
|
|
138
|
+
def quote?: (untyped char) -> untyped
|
|
139
|
+
|
|
140
|
+
def double_quote?: (untyped char) -> untyped
|
|
141
|
+
|
|
142
|
+
def single_quote?: (untyped char) -> untyped
|
|
143
|
+
|
|
144
|
+
def double_quote_like?: (untyped char) -> untyped
|
|
145
|
+
|
|
146
|
+
def single_quote_like?: (untyped char) -> untyped
|
|
147
|
+
|
|
148
|
+
# Strip last occurrence of text_to_strip from text
|
|
149
|
+
def strip_last_occurrence: (untyped text, untyped text_to_strip, ?strip_remaining_text: bool) -> untyped
|
|
150
|
+
|
|
151
|
+
def insert_before_last_whitespace: (untyped text, untyped text_to_insert) -> untyped
|
|
152
|
+
|
|
153
|
+
# Parse keywords true, false, null
|
|
154
|
+
# Repair Python keywords True, False, None
|
|
155
|
+
# Repair Ruby keyword nil
|
|
156
|
+
def parse_keywords: () -> untyped
|
|
157
|
+
|
|
158
|
+
def parse_keyword: (untyped name, untyped value) -> (true | false)
|
|
159
|
+
|
|
160
|
+
def remove_at_index: (untyped text, untyped start, untyped count) -> untyped
|
|
161
|
+
|
|
162
|
+
def ends_with_comma_or_newline?: (untyped text) -> untyped
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
end
|
data/sig/json/repair.rbs
CHANGED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
module JSON
|
|
2
|
+
class Repairer
|
|
3
|
+
@json: ::String
|
|
4
|
+
|
|
5
|
+
@index: Integer
|
|
6
|
+
|
|
7
|
+
@output: ::String
|
|
8
|
+
|
|
9
|
+
include Repair::StringUtils
|
|
10
|
+
|
|
11
|
+
CONTROL_CHARACTERS: ::Hash[::String, "\\b" | "\\f" | "\\n" | "\\r" | "\\t"]
|
|
12
|
+
|
|
13
|
+
ESCAPE_CHARACTERS: ::Hash[::String, "\"" | "\\" | "/" | "\b" | "\f" | "\n" | "\r" | "\t"]
|
|
14
|
+
|
|
15
|
+
MARKDOWN_OPEN_BLOCKS: ::Array[::String]
|
|
16
|
+
|
|
17
|
+
MARKDOWN_CLOSE_BLOCKS: ::Array[::String]
|
|
18
|
+
|
|
19
|
+
def initialize: (::String json) -> void
|
|
20
|
+
|
|
21
|
+
def repair: () -> ::String
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
def parse_value: () -> untyped
|
|
26
|
+
|
|
27
|
+
def parse_whitespace: (?skip_newline: bool) -> (true | false)
|
|
28
|
+
|
|
29
|
+
def parse_comment: () -> (true | false)
|
|
30
|
+
|
|
31
|
+
# Find and skip over a Markdown fenced code block
|
|
32
|
+
def parse_markdown_code_block: (::Array[::String] blocks) -> (true | false)
|
|
33
|
+
|
|
34
|
+
def skip_markdown_code_block: (::Array[::String] blocks) -> (true | false)
|
|
35
|
+
|
|
36
|
+
# Parse an object like '{"key": "value"}'
|
|
37
|
+
def parse_object: () -> (false | true)
|
|
38
|
+
|
|
39
|
+
def skip_character: (untyped char) -> (true | false)
|
|
40
|
+
|
|
41
|
+
# Skip ellipsis like "[1,2,3,...]" or "[1,2,3,...,9]" or "[...,7,8,9]"
|
|
42
|
+
# or a similar construct in objects.
|
|
43
|
+
def skip_ellipsis: () -> untyped
|
|
44
|
+
|
|
45
|
+
# Parse a string enclosed by double quotes "...". Can contain escaped quotes
|
|
46
|
+
# Repair strings enclosed in single quotes or special quotes
|
|
47
|
+
# Repair an escaped string
|
|
48
|
+
#
|
|
49
|
+
# The function can run in two stages:
|
|
50
|
+
# - First, it assumes the string has a valid end quote
|
|
51
|
+
# - If it turns out that the string does not have a valid end quote followed
|
|
52
|
+
# by a delimiter (which should be the case), the function runs again in a
|
|
53
|
+
# more conservative way, stopping the string at the first next delimiter
|
|
54
|
+
# and fixing the string by inserting a quote there, or stopping at a
|
|
55
|
+
# stop index detected in the first iteration.
|
|
56
|
+
def parse_string: (?stop_at_delimiter: bool, ?stop_at_index: ::Integer) -> (untyped | true | false)
|
|
57
|
+
|
|
58
|
+
# Repair an unquoted string by adding quotes around it
|
|
59
|
+
# Repair a MongoDB function call like NumberLong("2")
|
|
60
|
+
# Repair a JSONP function call like callback({...});
|
|
61
|
+
def parse_unquoted_string: (bool is_key) -> (false | true)
|
|
62
|
+
|
|
63
|
+
# Parse a regular expression literal like /foo/ or /foo\/bar/
|
|
64
|
+
def parse_regex: () -> (false | true)
|
|
65
|
+
|
|
66
|
+
def parse_character: (untyped char) -> (true | false)
|
|
67
|
+
|
|
68
|
+
def parse_whitespace_and_skip_comments: (?skip_newline: bool) -> untyped
|
|
69
|
+
|
|
70
|
+
# Parse a number like 2.4 or 2.4e6
|
|
71
|
+
def parse_number: () -> (true | false)
|
|
72
|
+
|
|
73
|
+
def at_end_of_number?: () -> untyped
|
|
74
|
+
|
|
75
|
+
# Parse an array like '["item1", "item2", ...]'
|
|
76
|
+
def parse_array: () -> (true | false)
|
|
77
|
+
|
|
78
|
+
def prev_non_whitespace_index: (untyped start) -> untyped
|
|
79
|
+
|
|
80
|
+
# Repair concatenated strings like "hello" + "world", change this into "helloworld"
|
|
81
|
+
def parse_concatenated_string: () -> untyped
|
|
82
|
+
|
|
83
|
+
def repair_number_ending_with_numeric_symbol: (untyped start) -> untyped
|
|
84
|
+
|
|
85
|
+
# Parse and repair Newline Delimited JSON (NDJSON):
|
|
86
|
+
# multiple JSON objects separated by a newline character
|
|
87
|
+
def parse_newline_delimited_json: () -> untyped
|
|
88
|
+
|
|
89
|
+
def skip_escape_character: () -> untyped
|
|
90
|
+
|
|
91
|
+
def throw_invalid_character: (untyped char) -> untyped
|
|
92
|
+
|
|
93
|
+
def throw_unexpected_character: () -> untyped
|
|
94
|
+
|
|
95
|
+
def throw_unexpected_end: () -> untyped
|
|
96
|
+
|
|
97
|
+
def throw_object_key_expected: () -> untyped
|
|
98
|
+
|
|
99
|
+
def throw_colon_expected: () -> untyped
|
|
100
|
+
|
|
101
|
+
def throw_invalid_unicode_character: () -> untyped
|
|
102
|
+
end
|
|
103
|
+
end
|
metadata
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: json-repair
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Aleksandr Zykov
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: exe
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies: []
|
|
13
12
|
description: This is a simple gem that repairs broken JSON strings.
|
|
14
13
|
email:
|
|
@@ -24,11 +23,14 @@ files:
|
|
|
24
23
|
- LICENSE.txt
|
|
25
24
|
- README.md
|
|
26
25
|
- Rakefile
|
|
26
|
+
- Steepfile
|
|
27
27
|
- lib/json/repair.rb
|
|
28
28
|
- lib/json/repair/string_utils.rb
|
|
29
29
|
- lib/json/repair/version.rb
|
|
30
30
|
- lib/json/repairer.rb
|
|
31
31
|
- sig/json/repair.rbs
|
|
32
|
+
- sig/json/repair/string_utils.rbs
|
|
33
|
+
- sig/json/repairer.rbs
|
|
32
34
|
homepage: https://github.com/sashazykov/json-repair-rb
|
|
33
35
|
licenses:
|
|
34
36
|
- ISC
|
|
@@ -37,7 +39,6 @@ metadata:
|
|
|
37
39
|
homepage_uri: https://github.com/sashazykov/json-repair-rb
|
|
38
40
|
source_code_uri: https://github.com/sashazykov/json-repair-rb
|
|
39
41
|
changelog_uri: https://github.com/sashazykov/json-repair-rb/blob/main/CHANGELOG.md
|
|
40
|
-
post_install_message:
|
|
41
42
|
rdoc_options: []
|
|
42
43
|
require_paths:
|
|
43
44
|
- lib
|
|
@@ -52,8 +53,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
52
53
|
- !ruby/object:Gem::Version
|
|
53
54
|
version: '0'
|
|
54
55
|
requirements: []
|
|
55
|
-
rubygems_version: 3.
|
|
56
|
-
signing_key:
|
|
56
|
+
rubygems_version: 3.6.9
|
|
57
57
|
specification_version: 4
|
|
58
58
|
summary: Repairs broken JSON strings.
|
|
59
59
|
test_files: []
|