json-repair 0.27.1__py3-none-any.whl → 0.27.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_repair.py +21 -50
- {json_repair-0.27.1.dist-info → json_repair-0.27.2.dist-info}/METADATA +2 -2
- json_repair-0.27.2.dist-info/RECORD +7 -0
- json_repair-0.27.1.dist-info/RECORD +0 -7
- {json_repair-0.27.1.dist-info → json_repair-0.27.2.dist-info}/LICENSE +0 -0
- {json_repair-0.27.1.dist-info → json_repair-0.27.2.dist-info}/WHEEL +0 -0
- {json_repair-0.27.1.dist-info → json_repair-0.27.2.dist-info}/top_level.txt +0 -0
json_repair/json_repair.py
CHANGED
@@ -179,21 +179,12 @@ class JSONParser:
|
|
179
179
|
|
180
180
|
# <member> starts with a <string>
|
181
181
|
key = ""
|
182
|
-
while
|
183
|
-
current_index = self.index
|
182
|
+
while self.get_char_at():
|
184
183
|
key = self.parse_string()
|
185
184
|
|
186
|
-
|
187
|
-
|
188
|
-
key = "empty_placeholder"
|
189
|
-
self.log(
|
190
|
-
"While parsing an object we found an empty key, replacing with empty_placeholder",
|
191
|
-
"info",
|
192
|
-
)
|
185
|
+
if key != "" or (key == "" and self.get_char_at() == ":"):
|
186
|
+
# If the string is empty but there is a object divider, we are done here
|
193
187
|
break
|
194
|
-
elif key == "" and self.index == current_index:
|
195
|
-
# Sometimes the string search might not move the index at all, that might lead us to an infinite loop
|
196
|
-
self.index += 1
|
197
188
|
|
198
189
|
self.skip_whitespaces_at()
|
199
190
|
|
@@ -226,13 +217,6 @@ class JSONParser:
|
|
226
217
|
# Remove trailing spaces
|
227
218
|
self.skip_whitespaces_at()
|
228
219
|
|
229
|
-
# Especially at the end of an LLM generated json you might miss the last "}"
|
230
|
-
if (self.get_char_at() or "}") != "}":
|
231
|
-
self.log(
|
232
|
-
"While parsing an object, we couldn't find the closing }, ignoring",
|
233
|
-
"info",
|
234
|
-
)
|
235
|
-
|
236
220
|
self.index += 1
|
237
221
|
return obj
|
238
222
|
|
@@ -261,13 +245,6 @@ class JSONParser:
|
|
261
245
|
while char and (char.isspace() or char == ","):
|
262
246
|
self.index += 1
|
263
247
|
char = self.get_char_at()
|
264
|
-
# If this is the right value of an object and we are closing the object, it means the array is over
|
265
|
-
if self.get_context() == "object_value" and char == "}":
|
266
|
-
self.log(
|
267
|
-
"While parsing an array inside an object, we got to the end without finding a ]. Stopped parsing",
|
268
|
-
"info",
|
269
|
-
)
|
270
|
-
break
|
271
248
|
|
272
249
|
# Especially at the end of an LLM generated json you might miss the last "]"
|
273
250
|
char = self.get_char_at()
|
@@ -275,14 +252,6 @@ class JSONParser:
|
|
275
252
|
self.log(
|
276
253
|
"While parsing an array we missed the closing ], adding it back", "info"
|
277
254
|
)
|
278
|
-
# Sometimes when you fix a missing "]" you'll have a trailing "," there that makes the JSON invalid
|
279
|
-
if char == ",":
|
280
|
-
# Remove trailing "," before adding the "]"
|
281
|
-
self.log(
|
282
|
-
"While parsing an array, found a trailing , before adding ]",
|
283
|
-
"info",
|
284
|
-
)
|
285
|
-
|
286
255
|
self.index -= 1
|
287
256
|
|
288
257
|
self.index += 1
|
@@ -337,6 +306,11 @@ class JSONParser:
|
|
337
306
|
|
338
307
|
# There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
|
339
308
|
if self.get_char_at() == lstring_delimiter:
|
309
|
+
# If it's an empty key, this was easy
|
310
|
+
if self.get_context() == "object_key" and self.get_char_at(1) == ":":
|
311
|
+
self.index += 1
|
312
|
+
return ""
|
313
|
+
|
340
314
|
# This is a valid exception only if it's closed by a double delimiter again
|
341
315
|
i = 1
|
342
316
|
next_c = self.get_char_at(i)
|
@@ -429,6 +403,7 @@ class JSONParser:
|
|
429
403
|
"While parsing a string, we found a doubled quote, ignoring it",
|
430
404
|
"info",
|
431
405
|
)
|
406
|
+
self.index += 1
|
432
407
|
elif missing_quotes and self.get_context() == "object_value":
|
433
408
|
# In case of missing starting quote I need to check if the delimeter is the end or the beginning of a key
|
434
409
|
i = 1
|
@@ -575,22 +550,18 @@ class JSONParser:
|
|
575
550
|
# The number ends with a non valid character for a number/currency, rolling back one
|
576
551
|
number_str = number_str[:-1]
|
577
552
|
self.index -= 1
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
return number_str
|
591
|
-
else:
|
592
|
-
# If nothing works, let's skip and keep parsing
|
593
|
-
return self.parse_json()
|
553
|
+
try:
|
554
|
+
if "," in number_str:
|
555
|
+
return str(number_str)
|
556
|
+
if "." in number_str or "e" in number_str or "E" in number_str:
|
557
|
+
return float(number_str)
|
558
|
+
elif number_str == "-":
|
559
|
+
# If there is a stray "-" this will throw an exception, throw away this character
|
560
|
+
return self.parse_json()
|
561
|
+
else:
|
562
|
+
return int(number_str)
|
563
|
+
except ValueError:
|
564
|
+
return number_str
|
594
565
|
|
595
566
|
def parse_boolean_or_null(self) -> Union[bool, str, None]:
|
596
567
|
# <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.27.
|
3
|
+
Version: 0.27.2
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -38,7 +38,7 @@ License-File: LICENSE
|
|
38
38
|
[](https://pypi.org/project/json-repair/)
|
39
39
|

|
40
40
|
[](https://pypi.org/project/json-repair/)
|
41
|
-
|
41
|
+
[](https://github.com/sponsors/mangiucugna)
|
42
42
|
|
43
43
|
This simple package can be used to fix an invalid json string. To know all cases in which this package will work, check out the unit test.
|
44
44
|
|
@@ -0,0 +1,7 @@
|
|
1
|
+
json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
|
2
|
+
json_repair/json_repair.py,sha256=599pWb3Wn7Lltvy8X3eWN9u7ccnSGdAaHt5De_L219s,30337
|
3
|
+
json_repair-0.27.2.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
+
json_repair-0.27.2.dist-info/METADATA,sha256=yTnkoMdKmX0_E48cLHflA8grpL00MQJb91yLfWpgxdA,7976
|
5
|
+
json_repair-0.27.2.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
6
|
+
json_repair-0.27.2.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
+
json_repair-0.27.2.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
|
2
|
-
json_repair/json_repair.py,sha256=54sOesew2z21WAd4UVaEYOLI8oUkSqF8Z4HbkYBdCj8,31792
|
3
|
-
json_repair-0.27.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
-
json_repair-0.27.1.dist-info/METADATA,sha256=z1CIA9xV6Cnd7Ko49bZ1snzdwJHK-ev8vKCfhV62vzo,7863
|
5
|
-
json_repair-0.27.1.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
6
|
-
json_repair-0.27.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
-
json_repair-0.27.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|