json-repair 0.27.1__py3-none-any.whl → 0.27.2__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- json_repair/json_repair.py +21 -50
- {json_repair-0.27.1.dist-info → json_repair-0.27.2.dist-info}/METADATA +2 -2
- json_repair-0.27.2.dist-info/RECORD +7 -0
- json_repair-0.27.1.dist-info/RECORD +0 -7
- {json_repair-0.27.1.dist-info → json_repair-0.27.2.dist-info}/LICENSE +0 -0
- {json_repair-0.27.1.dist-info → json_repair-0.27.2.dist-info}/WHEEL +0 -0
- {json_repair-0.27.1.dist-info → json_repair-0.27.2.dist-info}/top_level.txt +0 -0
json_repair/json_repair.py
CHANGED
@@ -179,21 +179,12 @@ class JSONParser:
|
|
179
179
|
|
180
180
|
# <member> starts with a <string>
|
181
181
|
key = ""
|
182
|
-
while
|
183
|
-
current_index = self.index
|
182
|
+
while self.get_char_at():
|
184
183
|
key = self.parse_string()
|
185
184
|
|
186
|
-
|
187
|
-
|
188
|
-
key = "empty_placeholder"
|
189
|
-
self.log(
|
190
|
-
"While parsing an object we found an empty key, replacing with empty_placeholder",
|
191
|
-
"info",
|
192
|
-
)
|
185
|
+
if key != "" or (key == "" and self.get_char_at() == ":"):
|
186
|
+
# If the string is empty but there is a object divider, we are done here
|
193
187
|
break
|
194
|
-
elif key == "" and self.index == current_index:
|
195
|
-
# Sometimes the string search might not move the index at all, that might lead us to an infinite loop
|
196
|
-
self.index += 1
|
197
188
|
|
198
189
|
self.skip_whitespaces_at()
|
199
190
|
|
@@ -226,13 +217,6 @@ class JSONParser:
|
|
226
217
|
# Remove trailing spaces
|
227
218
|
self.skip_whitespaces_at()
|
228
219
|
|
229
|
-
# Especially at the end of an LLM generated json you might miss the last "}"
|
230
|
-
if (self.get_char_at() or "}") != "}":
|
231
|
-
self.log(
|
232
|
-
"While parsing an object, we couldn't find the closing }, ignoring",
|
233
|
-
"info",
|
234
|
-
)
|
235
|
-
|
236
220
|
self.index += 1
|
237
221
|
return obj
|
238
222
|
|
@@ -261,13 +245,6 @@ class JSONParser:
|
|
261
245
|
while char and (char.isspace() or char == ","):
|
262
246
|
self.index += 1
|
263
247
|
char = self.get_char_at()
|
264
|
-
# If this is the right value of an object and we are closing the object, it means the array is over
|
265
|
-
if self.get_context() == "object_value" and char == "}":
|
266
|
-
self.log(
|
267
|
-
"While parsing an array inside an object, we got to the end without finding a ]. Stopped parsing",
|
268
|
-
"info",
|
269
|
-
)
|
270
|
-
break
|
271
248
|
|
272
249
|
# Especially at the end of an LLM generated json you might miss the last "]"
|
273
250
|
char = self.get_char_at()
|
@@ -275,14 +252,6 @@ class JSONParser:
|
|
275
252
|
self.log(
|
276
253
|
"While parsing an array we missed the closing ], adding it back", "info"
|
277
254
|
)
|
278
|
-
# Sometimes when you fix a missing "]" you'll have a trailing "," there that makes the JSON invalid
|
279
|
-
if char == ",":
|
280
|
-
# Remove trailing "," before adding the "]"
|
281
|
-
self.log(
|
282
|
-
"While parsing an array, found a trailing , before adding ]",
|
283
|
-
"info",
|
284
|
-
)
|
285
|
-
|
286
255
|
self.index -= 1
|
287
256
|
|
288
257
|
self.index += 1
|
@@ -337,6 +306,11 @@ class JSONParser:
|
|
337
306
|
|
338
307
|
# There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
|
339
308
|
if self.get_char_at() == lstring_delimiter:
|
309
|
+
# If it's an empty key, this was easy
|
310
|
+
if self.get_context() == "object_key" and self.get_char_at(1) == ":":
|
311
|
+
self.index += 1
|
312
|
+
return ""
|
313
|
+
|
340
314
|
# This is a valid exception only if it's closed by a double delimiter again
|
341
315
|
i = 1
|
342
316
|
next_c = self.get_char_at(i)
|
@@ -429,6 +403,7 @@ class JSONParser:
|
|
429
403
|
"While parsing a string, we found a doubled quote, ignoring it",
|
430
404
|
"info",
|
431
405
|
)
|
406
|
+
self.index += 1
|
432
407
|
elif missing_quotes and self.get_context() == "object_value":
|
433
408
|
# In case of missing starting quote I need to check if the delimeter is the end or the beginning of a key
|
434
409
|
i = 1
|
@@ -575,22 +550,18 @@ class JSONParser:
|
|
575
550
|
# The number ends with a non valid character for a number/currency, rolling back one
|
576
551
|
number_str = number_str[:-1]
|
577
552
|
self.index -= 1
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
return number_str
|
591
|
-
else:
|
592
|
-
# If nothing works, let's skip and keep parsing
|
593
|
-
return self.parse_json()
|
553
|
+
try:
|
554
|
+
if "," in number_str:
|
555
|
+
return str(number_str)
|
556
|
+
if "." in number_str or "e" in number_str or "E" in number_str:
|
557
|
+
return float(number_str)
|
558
|
+
elif number_str == "-":
|
559
|
+
# If there is a stray "-" this will throw an exception, throw away this character
|
560
|
+
return self.parse_json()
|
561
|
+
else:
|
562
|
+
return int(number_str)
|
563
|
+
except ValueError:
|
564
|
+
return number_str
|
594
565
|
|
595
566
|
def parse_boolean_or_null(self) -> Union[bool, str, None]:
|
596
567
|
# <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.27.
|
3
|
+
Version: 0.27.2
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -38,7 +38,7 @@ License-File: LICENSE
|
|
38
38
|
[data:image/s3,"s3://crabby-images/24cc2/24cc27e92d35c47c6c4cb73f8131696d29065dd2" alt="PyPI"](https://pypi.org/project/json-repair/)
|
39
39
|
data:image/s3,"s3://crabby-images/7c195/7c195ec0ce8f4bd0be15fa58d3802cbebdbf1b37" alt="Python version"
|
40
40
|
[data:image/s3,"s3://crabby-images/8b55b/8b55be8502970b06016864be9ce046f1c0a7ec33" alt="PyPI downloads"](https://pypi.org/project/json-repair/)
|
41
|
-
|
41
|
+
[data:image/s3,"s3://crabby-images/bd5f7/bd5f772be6c859b9655a69ddd1f9967aefd77400" alt="Github Sponsors"](https://github.com/sponsors/mangiucugna)
|
42
42
|
|
43
43
|
This simple package can be used to fix an invalid json string. To know all cases in which this package will work, check out the unit test.
|
44
44
|
|
@@ -0,0 +1,7 @@
|
|
1
|
+
json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
|
2
|
+
json_repair/json_repair.py,sha256=599pWb3Wn7Lltvy8X3eWN9u7ccnSGdAaHt5De_L219s,30337
|
3
|
+
json_repair-0.27.2.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
+
json_repair-0.27.2.dist-info/METADATA,sha256=yTnkoMdKmX0_E48cLHflA8grpL00MQJb91yLfWpgxdA,7976
|
5
|
+
json_repair-0.27.2.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
6
|
+
json_repair-0.27.2.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
+
json_repair-0.27.2.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
|
2
|
-
json_repair/json_repair.py,sha256=54sOesew2z21WAd4UVaEYOLI8oUkSqF8Z4HbkYBdCj8,31792
|
3
|
-
json_repair-0.27.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
-
json_repair-0.27.1.dist-info/METADATA,sha256=z1CIA9xV6Cnd7Ko49bZ1snzdwJHK-ev8vKCfhV62vzo,7863
|
5
|
-
json_repair-0.27.1.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
6
|
-
json_repair-0.27.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
-
json_repair-0.27.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|