json-repair 0.33.0__py3-none-any.whl → 0.35.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- json_repair/json_parser.py +97 -26
- json_repair/string_file_wrapper.py +21 -0
- {json_repair-0.33.0.dist-info → json_repair-0.35.0.dist-info}/METADATA +1 -1
- json_repair-0.35.0.dist-info/RECORD +13 -0
- json_repair-0.33.0.dist-info/RECORD +0 -13
- {json_repair-0.33.0.dist-info → json_repair-0.35.0.dist-info}/LICENSE +0 -0
- {json_repair-0.33.0.dist-info → json_repair-0.35.0.dist-info}/WHEEL +0 -0
- {json_repair-0.33.0.dist-info → json_repair-0.35.0.dist-info}/entry_points.txt +0 -0
- {json_repair-0.33.0.dist-info → json_repair-0.35.0.dist-info}/top_level.txt +0 -0
json_repair/json_parser.py
CHANGED
@@ -7,6 +7,9 @@ JSONReturnType = Union[Dict[str, Any], List[Any], str, float, int, bool, None]
|
|
7
7
|
|
8
8
|
|
9
9
|
class JSONParser:
|
10
|
+
# Constants
|
11
|
+
STRING_DELIMITERS = ['"', "'", "“", "”"]
|
12
|
+
|
10
13
|
def __init__(
|
11
14
|
self,
|
12
15
|
json_str: Union[str, StringFileWrapper],
|
@@ -89,7 +92,9 @@ class JSONParser:
|
|
89
92
|
)
|
90
93
|
return ""
|
91
94
|
# <string> starts with a quote
|
92
|
-
elif not self.context.empty and (
|
95
|
+
elif not self.context.empty and (
|
96
|
+
char in self.STRING_DELIMITERS or char.isalpha()
|
97
|
+
):
|
93
98
|
return self.parse_string()
|
94
99
|
# <number> starts with [0-9] or minus
|
95
100
|
elif not self.context.empty and (
|
@@ -130,6 +135,8 @@ class JSONParser:
|
|
130
135
|
# <member> starts with a <string>
|
131
136
|
key = ""
|
132
137
|
while self.get_char_at():
|
138
|
+
# The rollback index needs to be updated here in case the key is empty
|
139
|
+
rollback_index = self.index
|
133
140
|
key = str(self.parse_string())
|
134
141
|
|
135
142
|
if key != "" or (key == "" and self.get_char_at() == ":"):
|
@@ -140,6 +147,12 @@ class JSONParser:
|
|
140
147
|
"While parsing an object we found a duplicate key, closing the object here and rolling back the index",
|
141
148
|
)
|
142
149
|
self.index = rollback_index - 1
|
150
|
+
# add an opening curly brace to make this work
|
151
|
+
self.json_str = (
|
152
|
+
self.json_str[: self.index + 1]
|
153
|
+
+ "{"
|
154
|
+
+ self.json_str[self.index + 1 :]
|
155
|
+
)
|
143
156
|
break
|
144
157
|
|
145
158
|
# Skip filler whitespaces
|
@@ -227,7 +240,7 @@ class JSONParser:
|
|
227
240
|
|
228
241
|
char = self.get_char_at()
|
229
242
|
# A valid string can only start with a valid quote or, in our case, with a literal
|
230
|
-
while char and char not in
|
243
|
+
while char and char not in self.STRING_DELIMITERS and not char.isalnum():
|
231
244
|
self.index += 1
|
232
245
|
char = self.get_char_at()
|
233
246
|
|
@@ -262,35 +275,61 @@ class JSONParser:
|
|
262
275
|
if not missing_quotes:
|
263
276
|
self.index += 1
|
264
277
|
|
278
|
+
self.skip_whitespaces_at()
|
265
279
|
# There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
|
266
|
-
if self.get_char_at()
|
267
|
-
# If
|
268
|
-
if (
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
self.index += 1
|
285
|
-
else:
|
286
|
-
# Ok this is not a doubled quote, check if this is an empty string or not
|
287
|
-
i = self.skip_whitespaces_at(idx=1, move_main_index=False)
|
280
|
+
if self.get_char_at() in self.STRING_DELIMITERS:
|
281
|
+
# If the next character is the same type of quote, then we manage it as double quotes
|
282
|
+
if self.get_char_at() == lstring_delimiter:
|
283
|
+
# If it's an empty key, this was easy
|
284
|
+
if (
|
285
|
+
self.context.current == ContextValues.OBJECT_KEY
|
286
|
+
and self.get_char_at(1) == ":"
|
287
|
+
):
|
288
|
+
self.index += 1
|
289
|
+
return ""
|
290
|
+
if self.get_char_at(1) == lstring_delimiter:
|
291
|
+
# There's something fishy about this, we found doubled quotes and then again quotes
|
292
|
+
self.log(
|
293
|
+
"While parsing a string, we found a doubled quote and then a quote again, ignoring it",
|
294
|
+
)
|
295
|
+
return ""
|
296
|
+
# Find the next delimiter
|
297
|
+
i = self.skip_to_character(character=rstring_delimiter, idx=1)
|
288
298
|
next_c = self.get_char_at(i)
|
289
|
-
|
299
|
+
# Now check that the next character is also a delimiter to ensure that we have "".....""
|
300
|
+
# In that case we ignore this rstring delimiter
|
301
|
+
if next_c and (self.get_char_at(i + 1) or "") == rstring_delimiter:
|
290
302
|
self.log(
|
291
|
-
"While parsing a string, we found a
|
303
|
+
"While parsing a string, we found a valid starting doubled quote",
|
292
304
|
)
|
305
|
+
doubled_quotes = True
|
293
306
|
self.index += 1
|
307
|
+
else:
|
308
|
+
# Ok this is not a doubled quote, check if this is an empty string or not
|
309
|
+
i = self.skip_whitespaces_at(idx=1, move_main_index=False)
|
310
|
+
next_c = self.get_char_at(i)
|
311
|
+
if next_c in self.STRING_DELIMITERS + ["{", "["]:
|
312
|
+
# something fishy is going on here
|
313
|
+
self.log(
|
314
|
+
"While parsing a string, we found a doubled quote but also another quote afterwards, ignoring it",
|
315
|
+
)
|
316
|
+
self.index += 1
|
317
|
+
return ""
|
318
|
+
elif next_c not in [",", "]", "}"]:
|
319
|
+
self.log(
|
320
|
+
"While parsing a string, we found a doubled quote but it was a mistake, removing one quote",
|
321
|
+
)
|
322
|
+
self.index += 1
|
323
|
+
else:
|
324
|
+
# Otherwise we need to do another check before continuing
|
325
|
+
i = self.skip_to_character(character=rstring_delimiter, idx=1)
|
326
|
+
next_c = self.get_char_at(i)
|
327
|
+
if not next_c:
|
328
|
+
# mmmm that delimiter never appears again, this is a mistake
|
329
|
+
self.log(
|
330
|
+
"While parsing a string, we found a quote but it was a mistake, ignoring it",
|
331
|
+
)
|
332
|
+
return ""
|
294
333
|
|
295
334
|
# Initialize our return value
|
296
335
|
string_acc = ""
|
@@ -404,6 +443,38 @@ class JSONParser:
|
|
404
443
|
string_acc += escape_seqs.get(char, char) or char
|
405
444
|
self.index += 1
|
406
445
|
char = self.get_char_at()
|
446
|
+
# If we are in object key context and we find a colon, it could be a missing right quote
|
447
|
+
if (
|
448
|
+
char == ":"
|
449
|
+
and not missing_quotes
|
450
|
+
and self.context.current == ContextValues.OBJECT_KEY
|
451
|
+
):
|
452
|
+
# Ok now we need to check if this is followed by a value like "..."
|
453
|
+
i = self.skip_to_character(character=lstring_delimiter, idx=1)
|
454
|
+
next_c = self.get_char_at(i)
|
455
|
+
if next_c:
|
456
|
+
i += 1
|
457
|
+
# found the first delimiter
|
458
|
+
i = self.skip_to_character(character=rstring_delimiter, idx=i)
|
459
|
+
next_c = self.get_char_at(i)
|
460
|
+
if next_c:
|
461
|
+
# found a second delimiter
|
462
|
+
i += 1
|
463
|
+
# Skip spaces
|
464
|
+
i = self.skip_whitespaces_at(idx=i, move_main_index=False)
|
465
|
+
next_c = self.get_char_at(i)
|
466
|
+
if next_c and next_c in [",", "}"]:
|
467
|
+
# Ok then this is a missing right quote
|
468
|
+
self.log(
|
469
|
+
"While parsing a string missing the right delimiter in object key context, we found a :, stopping here",
|
470
|
+
)
|
471
|
+
break
|
472
|
+
else:
|
473
|
+
# The string ended without finding a lstring_delimiter, I will assume this is a missing right quote
|
474
|
+
self.log(
|
475
|
+
"While parsing a string missing the right delimiter in object key context, we found a :, stopping here",
|
476
|
+
)
|
477
|
+
break
|
407
478
|
# ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
|
408
479
|
if char == rstring_delimiter:
|
409
480
|
# Special case here, in case of double quotes one after another
|
@@ -96,3 +96,24 @@ class StringFileWrapper:
|
|
96
96
|
self.length = self.fd.tell()
|
97
97
|
self.fd.seek(current_position)
|
98
98
|
return self.length
|
99
|
+
|
100
|
+
def __setitem__(self, index: Union[int, slice], value: str) -> None:
|
101
|
+
"""
|
102
|
+
Set a character or a slice of characters in the file.
|
103
|
+
|
104
|
+
Args:
|
105
|
+
index (slice): The slice of characters to set.
|
106
|
+
value (str): The value to set at the specified index or slice.
|
107
|
+
"""
|
108
|
+
if isinstance(index, slice):
|
109
|
+
start = index.start or 0
|
110
|
+
else:
|
111
|
+
start = index or 0
|
112
|
+
|
113
|
+
if start < 0:
|
114
|
+
start += len(self)
|
115
|
+
|
116
|
+
current_position = self.fd.tell()
|
117
|
+
self.fd.seek(start)
|
118
|
+
self.fd.write(value)
|
119
|
+
self.fd.seek(current_position)
|
@@ -0,0 +1,13 @@
|
|
1
|
+
json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
|
2
|
+
json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
|
3
|
+
json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
|
4
|
+
json_repair/json_parser.py,sha256=9ZHHQmfHPYQxSy93yjYPYtIHL415HiDo26hSuJCX4MA,35581
|
5
|
+
json_repair/json_repair.py,sha256=LINLSJBs3cJMfs1YRDaIpfWR5PJLs87Oe06G5yQjY18,9729
|
6
|
+
json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
+
json_repair/string_file_wrapper.py,sha256=koZmdq2-Z5K7XF1bDqX6dEbNaVMJYcMTjq-aGe6NQvA,4526
|
8
|
+
json_repair-0.35.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
9
|
+
json_repair-0.35.0.dist-info/METADATA,sha256=hUDqZJmhcKN7_8UDQk6FfHjvBvPE-jbzkjiEBqP-26I,11794
|
10
|
+
json_repair-0.35.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
11
|
+
json_repair-0.35.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
|
12
|
+
json_repair-0.35.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
13
|
+
json_repair-0.35.0.dist-info/RECORD,,
|
@@ -1,13 +0,0 @@
|
|
1
|
-
json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
|
2
|
-
json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
|
3
|
-
json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
|
4
|
-
json_repair/json_parser.py,sha256=qLMNN6xJm-8CM4446WizbENCBnFKzEoSM4VLZXTGXaQ,31831
|
5
|
-
json_repair/json_repair.py,sha256=LINLSJBs3cJMfs1YRDaIpfWR5PJLs87Oe06G5yQjY18,9729
|
6
|
-
json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
-
json_repair/string_file_wrapper.py,sha256=EHLhNBWoyUitzT08thytYJiNZh_klEFwfT8zutPSdb4,3905
|
8
|
-
json_repair-0.33.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
9
|
-
json_repair-0.33.0.dist-info/METADATA,sha256=-cTTyNlIlv3xouXyqxRuHDhSXT-2_QuB5AdeKpDbxs4,11794
|
10
|
-
json_repair-0.33.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
11
|
-
json_repair-0.33.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
|
12
|
-
json_repair-0.33.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
13
|
-
json_repair-0.33.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|