json-repair 0.33.0__py3-none-any.whl → 0.35.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_parser.py +97 -26
- json_repair/string_file_wrapper.py +21 -0
- {json_repair-0.33.0.dist-info → json_repair-0.35.0.dist-info}/METADATA +1 -1
- json_repair-0.35.0.dist-info/RECORD +13 -0
- json_repair-0.33.0.dist-info/RECORD +0 -13
- {json_repair-0.33.0.dist-info → json_repair-0.35.0.dist-info}/LICENSE +0 -0
- {json_repair-0.33.0.dist-info → json_repair-0.35.0.dist-info}/WHEEL +0 -0
- {json_repair-0.33.0.dist-info → json_repair-0.35.0.dist-info}/entry_points.txt +0 -0
- {json_repair-0.33.0.dist-info → json_repair-0.35.0.dist-info}/top_level.txt +0 -0
json_repair/json_parser.py
CHANGED
@@ -7,6 +7,9 @@ JSONReturnType = Union[Dict[str, Any], List[Any], str, float, int, bool, None]
|
|
7
7
|
|
8
8
|
|
9
9
|
class JSONParser:
|
10
|
+
# Constants
|
11
|
+
STRING_DELIMITERS = ['"', "'", "“", "”"]
|
12
|
+
|
10
13
|
def __init__(
|
11
14
|
self,
|
12
15
|
json_str: Union[str, StringFileWrapper],
|
@@ -89,7 +92,9 @@ class JSONParser:
|
|
89
92
|
)
|
90
93
|
return ""
|
91
94
|
# <string> starts with a quote
|
92
|
-
elif not self.context.empty and (
|
95
|
+
elif not self.context.empty and (
|
96
|
+
char in self.STRING_DELIMITERS or char.isalpha()
|
97
|
+
):
|
93
98
|
return self.parse_string()
|
94
99
|
# <number> starts with [0-9] or minus
|
95
100
|
elif not self.context.empty and (
|
@@ -130,6 +135,8 @@ class JSONParser:
|
|
130
135
|
# <member> starts with a <string>
|
131
136
|
key = ""
|
132
137
|
while self.get_char_at():
|
138
|
+
# The rollback index needs to be updated here in case the key is empty
|
139
|
+
rollback_index = self.index
|
133
140
|
key = str(self.parse_string())
|
134
141
|
|
135
142
|
if key != "" or (key == "" and self.get_char_at() == ":"):
|
@@ -140,6 +147,12 @@ class JSONParser:
|
|
140
147
|
"While parsing an object we found a duplicate key, closing the object here and rolling back the index",
|
141
148
|
)
|
142
149
|
self.index = rollback_index - 1
|
150
|
+
# add an opening curly brace to make this work
|
151
|
+
self.json_str = (
|
152
|
+
self.json_str[: self.index + 1]
|
153
|
+
+ "{"
|
154
|
+
+ self.json_str[self.index + 1 :]
|
155
|
+
)
|
143
156
|
break
|
144
157
|
|
145
158
|
# Skip filler whitespaces
|
@@ -227,7 +240,7 @@ class JSONParser:
|
|
227
240
|
|
228
241
|
char = self.get_char_at()
|
229
242
|
# A valid string can only start with a valid quote or, in our case, with a literal
|
230
|
-
while char and char not in
|
243
|
+
while char and char not in self.STRING_DELIMITERS and not char.isalnum():
|
231
244
|
self.index += 1
|
232
245
|
char = self.get_char_at()
|
233
246
|
|
@@ -262,35 +275,61 @@ class JSONParser:
|
|
262
275
|
if not missing_quotes:
|
263
276
|
self.index += 1
|
264
277
|
|
278
|
+
self.skip_whitespaces_at()
|
265
279
|
# There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
|
266
|
-
if self.get_char_at()
|
267
|
-
# If
|
268
|
-
if (
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
self.index += 1
|
285
|
-
else:
|
286
|
-
# Ok this is not a doubled quote, check if this is an empty string or not
|
287
|
-
i = self.skip_whitespaces_at(idx=1, move_main_index=False)
|
280
|
+
if self.get_char_at() in self.STRING_DELIMITERS:
|
281
|
+
# If the next character is the same type of quote, then we manage it as double quotes
|
282
|
+
if self.get_char_at() == lstring_delimiter:
|
283
|
+
# If it's an empty key, this was easy
|
284
|
+
if (
|
285
|
+
self.context.current == ContextValues.OBJECT_KEY
|
286
|
+
and self.get_char_at(1) == ":"
|
287
|
+
):
|
288
|
+
self.index += 1
|
289
|
+
return ""
|
290
|
+
if self.get_char_at(1) == lstring_delimiter:
|
291
|
+
# There's something fishy about this, we found doubled quotes and then again quotes
|
292
|
+
self.log(
|
293
|
+
"While parsing a string, we found a doubled quote and then a quote again, ignoring it",
|
294
|
+
)
|
295
|
+
return ""
|
296
|
+
# Find the next delimiter
|
297
|
+
i = self.skip_to_character(character=rstring_delimiter, idx=1)
|
288
298
|
next_c = self.get_char_at(i)
|
289
|
-
|
299
|
+
# Now check that the next character is also a delimiter to ensure that we have "".....""
|
300
|
+
# In that case we ignore this rstring delimiter
|
301
|
+
if next_c and (self.get_char_at(i + 1) or "") == rstring_delimiter:
|
290
302
|
self.log(
|
291
|
-
"While parsing a string, we found a
|
303
|
+
"While parsing a string, we found a valid starting doubled quote",
|
292
304
|
)
|
305
|
+
doubled_quotes = True
|
293
306
|
self.index += 1
|
307
|
+
else:
|
308
|
+
# Ok this is not a doubled quote, check if this is an empty string or not
|
309
|
+
i = self.skip_whitespaces_at(idx=1, move_main_index=False)
|
310
|
+
next_c = self.get_char_at(i)
|
311
|
+
if next_c in self.STRING_DELIMITERS + ["{", "["]:
|
312
|
+
# something fishy is going on here
|
313
|
+
self.log(
|
314
|
+
"While parsing a string, we found a doubled quote but also another quote afterwards, ignoring it",
|
315
|
+
)
|
316
|
+
self.index += 1
|
317
|
+
return ""
|
318
|
+
elif next_c not in [",", "]", "}"]:
|
319
|
+
self.log(
|
320
|
+
"While parsing a string, we found a doubled quote but it was a mistake, removing one quote",
|
321
|
+
)
|
322
|
+
self.index += 1
|
323
|
+
else:
|
324
|
+
# Otherwise we need to do another check before continuing
|
325
|
+
i = self.skip_to_character(character=rstring_delimiter, idx=1)
|
326
|
+
next_c = self.get_char_at(i)
|
327
|
+
if not next_c:
|
328
|
+
# mmmm that delimiter never appears again, this is a mistake
|
329
|
+
self.log(
|
330
|
+
"While parsing a string, we found a quote but it was a mistake, ignoring it",
|
331
|
+
)
|
332
|
+
return ""
|
294
333
|
|
295
334
|
# Initialize our return value
|
296
335
|
string_acc = ""
|
@@ -404,6 +443,38 @@ class JSONParser:
|
|
404
443
|
string_acc += escape_seqs.get(char, char) or char
|
405
444
|
self.index += 1
|
406
445
|
char = self.get_char_at()
|
446
|
+
# If we are in object key context and we find a colon, it could be a missing right quote
|
447
|
+
if (
|
448
|
+
char == ":"
|
449
|
+
and not missing_quotes
|
450
|
+
and self.context.current == ContextValues.OBJECT_KEY
|
451
|
+
):
|
452
|
+
# Ok now we need to check if this is followed by a value like "..."
|
453
|
+
i = self.skip_to_character(character=lstring_delimiter, idx=1)
|
454
|
+
next_c = self.get_char_at(i)
|
455
|
+
if next_c:
|
456
|
+
i += 1
|
457
|
+
# found the first delimiter
|
458
|
+
i = self.skip_to_character(character=rstring_delimiter, idx=i)
|
459
|
+
next_c = self.get_char_at(i)
|
460
|
+
if next_c:
|
461
|
+
# found a second delimiter
|
462
|
+
i += 1
|
463
|
+
# Skip spaces
|
464
|
+
i = self.skip_whitespaces_at(idx=i, move_main_index=False)
|
465
|
+
next_c = self.get_char_at(i)
|
466
|
+
if next_c and next_c in [",", "}"]:
|
467
|
+
# Ok then this is a missing right quote
|
468
|
+
self.log(
|
469
|
+
"While parsing a string missing the right delimiter in object key context, we found a :, stopping here",
|
470
|
+
)
|
471
|
+
break
|
472
|
+
else:
|
473
|
+
# The string ended without finding a lstring_delimiter, I will assume this is a missing right quote
|
474
|
+
self.log(
|
475
|
+
"While parsing a string missing the right delimiter in object key context, we found a :, stopping here",
|
476
|
+
)
|
477
|
+
break
|
407
478
|
# ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
|
408
479
|
if char == rstring_delimiter:
|
409
480
|
# Special case here, in case of double quotes one after another
|
@@ -96,3 +96,24 @@ class StringFileWrapper:
|
|
96
96
|
self.length = self.fd.tell()
|
97
97
|
self.fd.seek(current_position)
|
98
98
|
return self.length
|
99
|
+
|
100
|
+
def __setitem__(self, index: Union[int, slice], value: str) -> None:
|
101
|
+
"""
|
102
|
+
Set a character or a slice of characters in the file.
|
103
|
+
|
104
|
+
Args:
|
105
|
+
index (slice): The slice of characters to set.
|
106
|
+
value (str): The value to set at the specified index or slice.
|
107
|
+
"""
|
108
|
+
if isinstance(index, slice):
|
109
|
+
start = index.start or 0
|
110
|
+
else:
|
111
|
+
start = index or 0
|
112
|
+
|
113
|
+
if start < 0:
|
114
|
+
start += len(self)
|
115
|
+
|
116
|
+
current_position = self.fd.tell()
|
117
|
+
self.fd.seek(start)
|
118
|
+
self.fd.write(value)
|
119
|
+
self.fd.seek(current_position)
|
@@ -0,0 +1,13 @@
|
|
1
|
+
json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
|
2
|
+
json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
|
3
|
+
json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
|
4
|
+
json_repair/json_parser.py,sha256=9ZHHQmfHPYQxSy93yjYPYtIHL415HiDo26hSuJCX4MA,35581
|
5
|
+
json_repair/json_repair.py,sha256=LINLSJBs3cJMfs1YRDaIpfWR5PJLs87Oe06G5yQjY18,9729
|
6
|
+
json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
+
json_repair/string_file_wrapper.py,sha256=koZmdq2-Z5K7XF1bDqX6dEbNaVMJYcMTjq-aGe6NQvA,4526
|
8
|
+
json_repair-0.35.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
9
|
+
json_repair-0.35.0.dist-info/METADATA,sha256=hUDqZJmhcKN7_8UDQk6FfHjvBvPE-jbzkjiEBqP-26I,11794
|
10
|
+
json_repair-0.35.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
11
|
+
json_repair-0.35.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
|
12
|
+
json_repair-0.35.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
13
|
+
json_repair-0.35.0.dist-info/RECORD,,
|
@@ -1,13 +0,0 @@
|
|
1
|
-
json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
|
2
|
-
json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
|
3
|
-
json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
|
4
|
-
json_repair/json_parser.py,sha256=qLMNN6xJm-8CM4446WizbENCBnFKzEoSM4VLZXTGXaQ,31831
|
5
|
-
json_repair/json_repair.py,sha256=LINLSJBs3cJMfs1YRDaIpfWR5PJLs87Oe06G5yQjY18,9729
|
6
|
-
json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
-
json_repair/string_file_wrapper.py,sha256=EHLhNBWoyUitzT08thytYJiNZh_klEFwfT8zutPSdb4,3905
|
8
|
-
json_repair-0.33.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
9
|
-
json_repair-0.33.0.dist-info/METADATA,sha256=-cTTyNlIlv3xouXyqxRuHDhSXT-2_QuB5AdeKpDbxs4,11794
|
10
|
-
json_repair-0.33.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
11
|
-
json_repair-0.33.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
|
12
|
-
json_repair-0.33.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
13
|
-
json_repair-0.33.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|