json-repair 0.15.4__py3-none-any.whl → 0.15.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_repair.py +26 -18
- {json_repair-0.15.4.dist-info → json_repair-0.15.6.dist-info}/METADATA +1 -1
- json_repair-0.15.6.dist-info/RECORD +7 -0
- json_repair-0.15.4.dist-info/RECORD +0 -7
- {json_repair-0.15.4.dist-info → json_repair-0.15.6.dist-info}/LICENSE +0 -0
- {json_repair-0.15.4.dist-info → json_repair-0.15.6.dist-info}/WHEEL +0 -0
- {json_repair-0.15.4.dist-info → json_repair-0.15.6.dist-info}/top_level.txt +0 -0
json_repair/json_repair.py
CHANGED
@@ -174,6 +174,7 @@ class JSONParser:
|
|
174
174
|
arr = []
|
175
175
|
# Stop when you either find the closing parentheses or you have iterated over the entire string
|
176
176
|
while (self.get_char_at() or "]") != "]":
|
177
|
+
self.skip_whitespaces_at()
|
177
178
|
value = self.parse_json()
|
178
179
|
|
179
180
|
# It is possible that parse_json() returns nothing valid, so we stop
|
@@ -218,6 +219,7 @@ class JSONParser:
|
|
218
219
|
|
219
220
|
# Flag to manage corner cases related to missing starting quote
|
220
221
|
fixed_quotes = False
|
222
|
+
doubled_quotes = False
|
221
223
|
lstring_delimiter = rstring_delimiter = '"'
|
222
224
|
if isinstance(string_quotes, list):
|
223
225
|
lstring_delimiter = string_quotes[0]
|
@@ -226,17 +228,20 @@ class JSONParser:
|
|
226
228
|
lstring_delimiter = rstring_delimiter = string_quotes
|
227
229
|
# There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
|
228
230
|
if self.get_char_at(1) == lstring_delimiter:
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
231
|
+
# This is a valid exception only if it's closed by a double delimiter again
|
232
|
+
i = 2
|
233
|
+
next_c = self.get_char_at(i)
|
234
|
+
while next_c and next_c != rstring_delimiter:
|
235
|
+
i += 1
|
236
|
+
next_c = self.get_char_at(i)
|
237
|
+
# Now check that the next character is also a delimiter to ensure that we have "".....""
|
238
|
+
# In that case we ignore this rstring delimiter
|
239
|
+
if next_c and (self.get_char_at(i + 1) or "") == rstring_delimiter:
|
236
240
|
self.log(
|
237
|
-
"While parsing a string, we found a doubled quote, ignoring it",
|
241
|
+
"While parsing a string, we found a valid starting doubled quote, ignoring it",
|
238
242
|
"info",
|
239
243
|
)
|
244
|
+
doubled_quotes = True
|
240
245
|
self.index += 1
|
241
246
|
char = self.get_char_at()
|
242
247
|
if char != lstring_delimiter:
|
@@ -277,13 +282,9 @@ class JSONParser:
|
|
277
282
|
self.remove_char_at(-1)
|
278
283
|
self.index -= 1
|
279
284
|
# ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
|
280
|
-
if
|
281
|
-
char == rstring_delimiter
|
282
|
-
# Next character is not a delimiter
|
283
|
-
and self.get_char_at(1) not in [",", ":", "]", "}"]
|
284
|
-
):
|
285
|
+
if char == rstring_delimiter:
|
285
286
|
# Special case here, in case of double quotes one after another
|
286
|
-
if self.get_char_at(1) == rstring_delimiter:
|
287
|
+
if doubled_quotes and self.get_char_at(1) == rstring_delimiter:
|
287
288
|
self.log(
|
288
289
|
"While parsing a string, we found a doubled quote, ignoring it",
|
289
290
|
"info",
|
@@ -292,13 +293,20 @@ class JSONParser:
|
|
292
293
|
self.remove_char_at()
|
293
294
|
else:
|
294
295
|
# Check if eventually there is a rstring delimiter, otherwise we bail
|
295
|
-
i =
|
296
|
+
i = 1
|
297
|
+
context = self.get_context()
|
296
298
|
next_c = self.get_char_at(i)
|
297
299
|
while next_c and next_c != rstring_delimiter:
|
300
|
+
# If we are in an object context, let's check for the right delimiters
|
301
|
+
if (
|
302
|
+
(context == "object_key" and next_c == ":")
|
303
|
+
or (context == "object_value" and next_c in ["}", ","])
|
304
|
+
or (context == "" and next_c in ["]", ","])
|
305
|
+
):
|
306
|
+
break
|
298
307
|
i += 1
|
299
308
|
next_c = self.get_char_at(i)
|
300
|
-
|
301
|
-
if next_c:
|
309
|
+
if next_c == rstring_delimiter:
|
302
310
|
self.log(
|
303
311
|
"While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
|
304
312
|
"info",
|
@@ -414,7 +422,7 @@ class JSONParser:
|
|
414
422
|
|
415
423
|
def get_context(self) -> str:
|
416
424
|
try:
|
417
|
-
return self.context[
|
425
|
+
return self.context[-1]
|
418
426
|
except Exception:
|
419
427
|
return ""
|
420
428
|
|
@@ -0,0 +1,7 @@
|
|
1
|
+
json_repair/__init__.py,sha256=AlNie5y6BZBioGi5fzTAUvum_y0U5aL5aNsuQ_68LQc,175
|
2
|
+
json_repair/json_repair.py,sha256=qQosSMpX3r1adMubgHNZ9gT0lSlZBn3Id_98yS1ITwk,20270
|
3
|
+
json_repair-0.15.6.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
+
json_repair-0.15.6.dist-info/METADATA,sha256=BNtrdGCSn8NC9gBrWdmX2bRQv6VJgEcwgCM5_SWLXUY,7355
|
5
|
+
json_repair-0.15.6.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
6
|
+
json_repair-0.15.6.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
+
json_repair-0.15.6.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
json_repair/__init__.py,sha256=AlNie5y6BZBioGi5fzTAUvum_y0U5aL5aNsuQ_68LQc,175
|
2
|
-
json_repair/json_repair.py,sha256=kz8FzeOfSkliMJbAN1reShDfGyGxI2sbozGY6vXKSL0,19576
|
3
|
-
json_repair-0.15.4.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
-
json_repair-0.15.4.dist-info/METADATA,sha256=a4xk22AfJLAACJFvyAZ29KiYIQ3ngo0VmF7CpbK8wEU,7355
|
5
|
-
json_repair-0.15.4.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
6
|
-
json_repair-0.15.4.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
-
json_repair-0.15.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|