json-repair 0.27.1__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_repair.py +24 -63
- json_repair/py.typed +0 -0
- {json_repair-0.27.1.dist-info → json_repair-0.28.0.dist-info}/METADATA +3 -2
- json_repair-0.28.0.dist-info/RECORD +8 -0
- {json_repair-0.27.1.dist-info → json_repair-0.28.0.dist-info}/WHEEL +1 -1
- json_repair-0.27.1.dist-info/RECORD +0 -7
- {json_repair-0.27.1.dist-info → json_repair-0.28.0.dist-info}/LICENSE +0 -0
- {json_repair-0.27.1.dist-info → json_repair-0.28.0.dist-info}/top_level.txt +0 -0
json_repair/json_repair.py
CHANGED
@@ -51,9 +51,6 @@ class StringFileWrapper:
|
|
51
51
|
self.fd.seek(current_position)
|
52
52
|
return self.length
|
53
53
|
|
54
|
-
def __setitem__(self) -> None:
|
55
|
-
raise Exception("This is read-only!")
|
56
|
-
|
57
54
|
|
58
55
|
class LoggerConfig:
|
59
56
|
# This is a type class to simplify the declaration
|
@@ -179,21 +176,12 @@ class JSONParser:
|
|
179
176
|
|
180
177
|
# <member> starts with a <string>
|
181
178
|
key = ""
|
182
|
-
while
|
183
|
-
current_index = self.index
|
179
|
+
while self.get_char_at():
|
184
180
|
key = self.parse_string()
|
185
181
|
|
186
|
-
|
187
|
-
|
188
|
-
key = "empty_placeholder"
|
189
|
-
self.log(
|
190
|
-
"While parsing an object we found an empty key, replacing with empty_placeholder",
|
191
|
-
"info",
|
192
|
-
)
|
182
|
+
if key != "" or (key == "" and self.get_char_at() == ":"):
|
183
|
+
# If the string is empty but there is a object divider, we are done here
|
193
184
|
break
|
194
|
-
elif key == "" and self.index == current_index:
|
195
|
-
# Sometimes the string search might not move the index at all, that might lead us to an infinite loop
|
196
|
-
self.index += 1
|
197
185
|
|
198
186
|
self.skip_whitespaces_at()
|
199
187
|
|
@@ -226,13 +214,6 @@ class JSONParser:
|
|
226
214
|
# Remove trailing spaces
|
227
215
|
self.skip_whitespaces_at()
|
228
216
|
|
229
|
-
# Especially at the end of an LLM generated json you might miss the last "}"
|
230
|
-
if (self.get_char_at() or "}") != "}":
|
231
|
-
self.log(
|
232
|
-
"While parsing an object, we couldn't find the closing }, ignoring",
|
233
|
-
"info",
|
234
|
-
)
|
235
|
-
|
236
217
|
self.index += 1
|
237
218
|
return obj
|
238
219
|
|
@@ -261,13 +242,6 @@ class JSONParser:
|
|
261
242
|
while char and (char.isspace() or char == ","):
|
262
243
|
self.index += 1
|
263
244
|
char = self.get_char_at()
|
264
|
-
# If this is the right value of an object and we are closing the object, it means the array is over
|
265
|
-
if self.get_context() == "object_value" and char == "}":
|
266
|
-
self.log(
|
267
|
-
"While parsing an array inside an object, we got to the end without finding a ]. Stopped parsing",
|
268
|
-
"info",
|
269
|
-
)
|
270
|
-
break
|
271
245
|
|
272
246
|
# Especially at the end of an LLM generated json you might miss the last "]"
|
273
247
|
char = self.get_char_at()
|
@@ -275,14 +249,6 @@ class JSONParser:
|
|
275
249
|
self.log(
|
276
250
|
"While parsing an array we missed the closing ], adding it back", "info"
|
277
251
|
)
|
278
|
-
# Sometimes when you fix a missing "]" you'll have a trailing "," there that makes the JSON invalid
|
279
|
-
if char == ",":
|
280
|
-
# Remove trailing "," before adding the "]"
|
281
|
-
self.log(
|
282
|
-
"While parsing an array, found a trailing , before adding ]",
|
283
|
-
"info",
|
284
|
-
)
|
285
|
-
|
286
252
|
self.index -= 1
|
287
253
|
|
288
254
|
self.index += 1
|
@@ -337,7 +303,11 @@ class JSONParser:
|
|
337
303
|
|
338
304
|
# There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
|
339
305
|
if self.get_char_at() == lstring_delimiter:
|
340
|
-
#
|
306
|
+
# If it's an empty key, this was easy
|
307
|
+
if self.get_context() == "object_key" and self.get_char_at(1) == ":":
|
308
|
+
self.index += 1
|
309
|
+
return ""
|
310
|
+
# Find the next delimiter
|
341
311
|
i = 1
|
342
312
|
next_c = self.get_char_at(i)
|
343
313
|
while next_c and next_c != rstring_delimiter:
|
@@ -429,6 +399,7 @@ class JSONParser:
|
|
429
399
|
"While parsing a string, we found a doubled quote, ignoring it",
|
430
400
|
"info",
|
431
401
|
)
|
402
|
+
self.index += 1
|
432
403
|
elif missing_quotes and self.get_context() == "object_value":
|
433
404
|
# In case of missing starting quote I need to check if the delimeter is the end or the beginning of a key
|
434
405
|
i = 1
|
@@ -467,7 +438,7 @@ class JSONParser:
|
|
467
438
|
]:
|
468
439
|
# This is a bit of a weird workaround, essentially in object_value context we don't always break on commas
|
469
440
|
# This is because the routine after will make sure to correct any bad guess and this solves a corner case
|
470
|
-
if next_c.isalpha():
|
441
|
+
if check_comma_in_object_value and next_c.isalpha():
|
471
442
|
check_comma_in_object_value = False
|
472
443
|
# If we are in an object context, let's check for the right delimiters
|
473
444
|
if (
|
@@ -575,22 +546,18 @@ class JSONParser:
|
|
575
546
|
# The number ends with a non valid character for a number/currency, rolling back one
|
576
547
|
number_str = number_str[:-1]
|
577
548
|
self.index -= 1
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
return number_str
|
591
|
-
else:
|
592
|
-
# If nothing works, let's skip and keep parsing
|
593
|
-
return self.parse_json()
|
549
|
+
try:
|
550
|
+
if "," in number_str:
|
551
|
+
return str(number_str)
|
552
|
+
if "." in number_str or "e" in number_str or "E" in number_str:
|
553
|
+
return float(number_str)
|
554
|
+
elif number_str == "-":
|
555
|
+
# If there is a stray "-" this will throw an exception, throw away this character
|
556
|
+
return self.parse_json()
|
557
|
+
else:
|
558
|
+
return int(number_str)
|
559
|
+
except ValueError:
|
560
|
+
return number_str
|
594
561
|
|
595
562
|
def parse_boolean_or_null(self) -> Union[bool, str, None]:
|
596
563
|
# <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
|
@@ -644,16 +611,10 @@ class JSONParser:
|
|
644
611
|
self.context.append(value)
|
645
612
|
|
646
613
|
def reset_context(self) -> None:
|
647
|
-
|
648
|
-
self.context.pop()
|
649
|
-
except Exception:
|
650
|
-
return
|
614
|
+
self.context.pop()
|
651
615
|
|
652
616
|
def get_context(self) -> str:
|
653
|
-
|
654
|
-
return self.context[-1]
|
655
|
-
except Exception:
|
656
|
-
return ""
|
617
|
+
return self.context[-1]
|
657
618
|
|
658
619
|
def log(self, text: str, level: str) -> None:
|
659
620
|
if level == self.logger.log_level:
|
json_repair/py.typed
ADDED
File without changes
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.28.0
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -27,6 +27,7 @@ License: MIT License
|
|
27
27
|
|
28
28
|
Project-URL: Homepage, https://github.com/mangiucugna/json_repair/
|
29
29
|
Project-URL: Bug Tracker, https://github.com/mangiucugna/json_repair/issues
|
30
|
+
Project-URL: Live demo, https://mangiucugna.github.io/json_repair/
|
30
31
|
Keywords: JSON,REPAIR,LLM,PARSER
|
31
32
|
Classifier: Programming Language :: Python :: 3
|
32
33
|
Classifier: License :: OSI Approved :: MIT License
|
@@ -38,7 +39,7 @@ License-File: LICENSE
|
|
38
39
|
[](https://pypi.org/project/json-repair/)
|
39
40
|

|
40
41
|
[](https://pypi.org/project/json-repair/)
|
41
|
-
|
42
|
+
[](https://github.com/sponsors/mangiucugna)
|
42
43
|
|
43
44
|
This simple package can be used to fix an invalid json string. To know all cases in which this package will work, check out the unit test.
|
44
45
|
|
@@ -0,0 +1,8 @@
|
|
1
|
+
json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
|
2
|
+
json_repair/json_repair.py,sha256=5WjVoNO7Grdq9wfnbSjggiVlDv2XVcvWbCIQVmREP38,30109
|
3
|
+
json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
+
json_repair-0.28.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
5
|
+
json_repair-0.28.0.dist-info/METADATA,sha256=rPrWno-My7ZQt7EJIR0BodmpLQ84fr0vjJwBM4XYmYU,8043
|
6
|
+
json_repair-0.28.0.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
|
7
|
+
json_repair-0.28.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
8
|
+
json_repair-0.28.0.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
|
2
|
-
json_repair/json_repair.py,sha256=54sOesew2z21WAd4UVaEYOLI8oUkSqF8Z4HbkYBdCj8,31792
|
3
|
-
json_repair-0.27.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
-
json_repair-0.27.1.dist-info/METADATA,sha256=z1CIA9xV6Cnd7Ko49bZ1snzdwJHK-ev8vKCfhV62vzo,7863
|
5
|
-
json_repair-0.27.1.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
6
|
-
json_repair-0.27.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
-
json_repair-0.27.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|