json-repair 0.39.0__py3-none-any.whl → 0.39.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- json_repair/json_parser.py +21 -35
- {json_repair-0.39.0.dist-info → json_repair-0.39.1.dist-info}/METADATA +7 -7
- {json_repair-0.39.0.dist-info → json_repair-0.39.1.dist-info}/RECORD +7 -7
- {json_repair-0.39.0.dist-info → json_repair-0.39.1.dist-info}/LICENSE +0 -0
- {json_repair-0.39.0.dist-info → json_repair-0.39.1.dist-info}/WHEEL +0 -0
- {json_repair-0.39.0.dist-info → json_repair-0.39.1.dist-info}/entry_points.txt +0 -0
- {json_repair-0.39.0.dist-info → json_repair-0.39.1.dist-info}/top_level.txt +0 -0
json_repair/json_parser.py
CHANGED
@@ -9,6 +9,7 @@ JSONReturnType = Union[Dict[str, Any], List[Any], str, float, int, bool, None]
|
|
9
9
|
class JSONParser:
|
10
10
|
# Constants
|
11
11
|
STRING_DELIMITERS = ['"', "'", "“", "”"]
|
12
|
+
NUMBER_CHARS = set("0123456789-.eE/,")
|
12
13
|
|
13
14
|
def __init__(
|
14
15
|
self,
|
@@ -129,8 +130,6 @@ class JSONParser:
|
|
129
130
|
# Context is used in the string parser to manage the lack of quotes
|
130
131
|
self.context.set(ContextValues.OBJECT_KEY)
|
131
132
|
|
132
|
-
self.skip_whitespaces_at()
|
133
|
-
|
134
133
|
# Save this index in case we need find a duplicate key
|
135
134
|
rollback_index = self.index
|
136
135
|
|
@@ -219,18 +218,13 @@ class JSONParser:
|
|
219
218
|
char = self.get_char_at()
|
220
219
|
|
221
220
|
# Especially at the end of an LLM generated json you might miss the last "]"
|
222
|
-
char = self.get_char_at()
|
223
221
|
if char and char != "]":
|
224
222
|
self.log(
|
225
|
-
"While parsing an array we missed the closing ],
|
226
|
-
)
|
227
|
-
self.index -= 1
|
228
|
-
# Add the missing closing bracket
|
229
|
-
self.json_str = (
|
230
|
-
self.json_str[: self.index + 1] + "]" + self.json_str[self.index + 1 :]
|
223
|
+
"While parsing an array we missed the closing ], ignoring it",
|
231
224
|
)
|
232
225
|
|
233
226
|
self.index += 1
|
227
|
+
|
234
228
|
self.context.reset()
|
235
229
|
return arr
|
236
230
|
|
@@ -275,15 +269,11 @@ class JSONParser:
|
|
275
269
|
self.log(
|
276
270
|
"While parsing a string, we found a literal instead of a quote",
|
277
271
|
)
|
278
|
-
self.log(
|
279
|
-
"While parsing a string, we found no starting quote. Will add the quote back",
|
280
|
-
)
|
281
272
|
missing_quotes = True
|
282
273
|
|
283
274
|
if not missing_quotes:
|
284
275
|
self.index += 1
|
285
276
|
|
286
|
-
self.skip_whitespaces_at()
|
287
277
|
# There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
|
288
278
|
if self.get_char_at() in self.STRING_DELIMITERS:
|
289
279
|
# If the next character is the same type of quote, then we manage it as double quotes
|
@@ -583,6 +573,13 @@ class JSONParser:
|
|
583
573
|
elif (
|
584
574
|
next_c == rstring_delimiter and self.get_char_at(i - 1) != "\\"
|
585
575
|
):
|
576
|
+
# Check if self.index:self.index+i is only whitespaces, break if that's the case
|
577
|
+
if all(
|
578
|
+
str(self.get_char_at(j)).isspace()
|
579
|
+
for j in range(1, i)
|
580
|
+
if self.get_char_at(j)
|
581
|
+
):
|
582
|
+
break
|
586
583
|
if self.context.current == ContextValues.OBJECT_VALUE:
|
587
584
|
# But this might not be it! This could be just a missing comma
|
588
585
|
# We found a delimiter and we need to check if this is a key
|
@@ -610,26 +607,16 @@ class JSONParser:
|
|
610
607
|
self.index += 1
|
611
608
|
char = self.get_char_at()
|
612
609
|
elif self.context.current == ContextValues.ARRAY:
|
613
|
-
#
|
614
|
-
#
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
)
|
624
|
-
next_c = self.get_char_at(i)
|
625
|
-
if next_c and next_c in [",", "]"]:
|
626
|
-
self.log(
|
627
|
-
"While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
|
628
|
-
)
|
629
|
-
unmatched_delimiter = not unmatched_delimiter
|
630
|
-
string_acc += str(char)
|
631
|
-
self.index += 1
|
632
|
-
char = self.get_char_at()
|
610
|
+
# If we got up to here it means that this is a situation like this:
|
611
|
+
# ["bla bla bla "puppy" bla bla bla "kitty" bla bla"]
|
612
|
+
# So we need to ignore this quote
|
613
|
+
self.log(
|
614
|
+
"While parsing a string in Array context, we detected a quoted section that would have closed the string but has a different meaning here, ignoring it",
|
615
|
+
)
|
616
|
+
unmatched_delimiter = not unmatched_delimiter
|
617
|
+
string_acc += str(char)
|
618
|
+
self.index += 1
|
619
|
+
char = self.get_char_at()
|
633
620
|
|
634
621
|
if (
|
635
622
|
char
|
@@ -663,10 +650,9 @@ class JSONParser:
|
|
663
650
|
def parse_number(self) -> Union[float, int, str, JSONReturnType]:
|
664
651
|
# <number> is a valid real number expressed in one of a number of given formats
|
665
652
|
number_str = ""
|
666
|
-
number_chars = set("0123456789-.eE/,")
|
667
653
|
char = self.get_char_at()
|
668
654
|
is_array = self.context.current == ContextValues.ARRAY
|
669
|
-
while char and char in
|
655
|
+
while char and char in self.NUMBER_CHARS and (not is_array or char != ","):
|
670
656
|
number_str += char
|
671
657
|
self.index += 1
|
672
658
|
char = self.get_char_at()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.39.
|
3
|
+
Version: 0.39.1
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -196,12 +196,12 @@ pipx install json-repair
|
|
196
196
|
to know all options available:
|
197
197
|
```
|
198
198
|
$ json_repair -h
|
199
|
-
usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] filename
|
199
|
+
usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] [filename]
|
200
200
|
|
201
201
|
Repair and parse JSON files.
|
202
202
|
|
203
203
|
positional arguments:
|
204
|
-
filename The JSON file to repair
|
204
|
+
filename The JSON file to repair (if omitted, reads from stdin)
|
205
205
|
|
206
206
|
options:
|
207
207
|
-h, --help show this help message and exit
|
@@ -226,13 +226,13 @@ In this example, any version that starts with `0.` will be acceptable, allowing
|
|
226
226
|
# How to cite
|
227
227
|
If you are using this library in your academic work (as I know many folks are) please find the BibTex here:
|
228
228
|
|
229
|
-
@software{Baccianella_JSON_Repair_-
|
229
|
+
@software{Baccianella_JSON_Repair_-_2025,
|
230
230
|
author = {Baccianella, Stefano},
|
231
|
-
month =
|
231
|
+
month = feb,
|
232
232
|
title = {{JSON Repair - A python module to repair invalid JSON, commonly used to parse the output of LLMs}},
|
233
233
|
url = {https://github.com/mangiucugna/json_repair},
|
234
|
-
version = {0.
|
235
|
-
year = {
|
234
|
+
version = {0.39.0},
|
235
|
+
year = {2025}
|
236
236
|
}
|
237
237
|
|
238
238
|
Thank you for citing my work and please send me a link to the paper if you can!
|
@@ -1,13 +1,13 @@
|
|
1
1
|
json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
|
2
2
|
json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
|
3
3
|
json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
|
4
|
-
json_repair/json_parser.py,sha256=
|
4
|
+
json_repair/json_parser.py,sha256=kt58S7pHxCOfqktzn48iMrvd3vi7HTfK6OD02PWwWcc,38189
|
5
5
|
json_repair/json_repair.py,sha256=k-5HRRlCqrxNmJi0u1KE3IUeL4HXqi1XZ7oAL-NFDLo,10314
|
6
6
|
json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
7
|
json_repair/string_file_wrapper.py,sha256=koZmdq2-Z5K7XF1bDqX6dEbNaVMJYcMTjq-aGe6NQvA,4526
|
8
|
-
json_repair-0.39.
|
9
|
-
json_repair-0.39.
|
10
|
-
json_repair-0.39.
|
11
|
-
json_repair-0.39.
|
12
|
-
json_repair-0.39.
|
13
|
-
json_repair-0.39.
|
8
|
+
json_repair-0.39.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
9
|
+
json_repair-0.39.1.dist-info/METADATA,sha256=T1k1afyqqWG-NXYHpgntQsPYpmIdkLg72eSo_iNwZZk,11827
|
10
|
+
json_repair-0.39.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
11
|
+
json_repair-0.39.1.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
|
12
|
+
json_repair-0.39.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
13
|
+
json_repair-0.39.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|