json-repair 0.39.0__py3-none-any.whl → 0.39.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_parser.py +21 -35
- {json_repair-0.39.0.dist-info → json_repair-0.39.1.dist-info}/METADATA +7 -7
- {json_repair-0.39.0.dist-info → json_repair-0.39.1.dist-info}/RECORD +7 -7
- {json_repair-0.39.0.dist-info → json_repair-0.39.1.dist-info}/LICENSE +0 -0
- {json_repair-0.39.0.dist-info → json_repair-0.39.1.dist-info}/WHEEL +0 -0
- {json_repair-0.39.0.dist-info → json_repair-0.39.1.dist-info}/entry_points.txt +0 -0
- {json_repair-0.39.0.dist-info → json_repair-0.39.1.dist-info}/top_level.txt +0 -0
json_repair/json_parser.py
CHANGED
@@ -9,6 +9,7 @@ JSONReturnType = Union[Dict[str, Any], List[Any], str, float, int, bool, None]
|
|
9
9
|
class JSONParser:
|
10
10
|
# Constants
|
11
11
|
STRING_DELIMITERS = ['"', "'", "“", "”"]
|
12
|
+
NUMBER_CHARS = set("0123456789-.eE/,")
|
12
13
|
|
13
14
|
def __init__(
|
14
15
|
self,
|
@@ -129,8 +130,6 @@ class JSONParser:
|
|
129
130
|
# Context is used in the string parser to manage the lack of quotes
|
130
131
|
self.context.set(ContextValues.OBJECT_KEY)
|
131
132
|
|
132
|
-
self.skip_whitespaces_at()
|
133
|
-
|
134
133
|
# Save this index in case we need find a duplicate key
|
135
134
|
rollback_index = self.index
|
136
135
|
|
@@ -219,18 +218,13 @@ class JSONParser:
|
|
219
218
|
char = self.get_char_at()
|
220
219
|
|
221
220
|
# Especially at the end of an LLM generated json you might miss the last "]"
|
222
|
-
char = self.get_char_at()
|
223
221
|
if char and char != "]":
|
224
222
|
self.log(
|
225
|
-
"While parsing an array we missed the closing ],
|
226
|
-
)
|
227
|
-
self.index -= 1
|
228
|
-
# Add the missing closing bracket
|
229
|
-
self.json_str = (
|
230
|
-
self.json_str[: self.index + 1] + "]" + self.json_str[self.index + 1 :]
|
223
|
+
"While parsing an array we missed the closing ], ignoring it",
|
231
224
|
)
|
232
225
|
|
233
226
|
self.index += 1
|
227
|
+
|
234
228
|
self.context.reset()
|
235
229
|
return arr
|
236
230
|
|
@@ -275,15 +269,11 @@ class JSONParser:
|
|
275
269
|
self.log(
|
276
270
|
"While parsing a string, we found a literal instead of a quote",
|
277
271
|
)
|
278
|
-
self.log(
|
279
|
-
"While parsing a string, we found no starting quote. Will add the quote back",
|
280
|
-
)
|
281
272
|
missing_quotes = True
|
282
273
|
|
283
274
|
if not missing_quotes:
|
284
275
|
self.index += 1
|
285
276
|
|
286
|
-
self.skip_whitespaces_at()
|
287
277
|
# There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
|
288
278
|
if self.get_char_at() in self.STRING_DELIMITERS:
|
289
279
|
# If the next character is the same type of quote, then we manage it as double quotes
|
@@ -583,6 +573,13 @@ class JSONParser:
|
|
583
573
|
elif (
|
584
574
|
next_c == rstring_delimiter and self.get_char_at(i - 1) != "\\"
|
585
575
|
):
|
576
|
+
# Check if self.index:self.index+i is only whitespaces, break if that's the case
|
577
|
+
if all(
|
578
|
+
str(self.get_char_at(j)).isspace()
|
579
|
+
for j in range(1, i)
|
580
|
+
if self.get_char_at(j)
|
581
|
+
):
|
582
|
+
break
|
586
583
|
if self.context.current == ContextValues.OBJECT_VALUE:
|
587
584
|
# But this might not be it! This could be just a missing comma
|
588
585
|
# We found a delimiter and we need to check if this is a key
|
@@ -610,26 +607,16 @@ class JSONParser:
|
|
610
607
|
self.index += 1
|
611
608
|
char = self.get_char_at()
|
612
609
|
elif self.context.current == ContextValues.ARRAY:
|
613
|
-
#
|
614
|
-
#
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
)
|
624
|
-
next_c = self.get_char_at(i)
|
625
|
-
if next_c and next_c in [",", "]"]:
|
626
|
-
self.log(
|
627
|
-
"While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
|
628
|
-
)
|
629
|
-
unmatched_delimiter = not unmatched_delimiter
|
630
|
-
string_acc += str(char)
|
631
|
-
self.index += 1
|
632
|
-
char = self.get_char_at()
|
610
|
+
# If we got up to here it means that this is a situation like this:
|
611
|
+
# ["bla bla bla "puppy" bla bla bla "kitty" bla bla"]
|
612
|
+
# So we need to ignore this quote
|
613
|
+
self.log(
|
614
|
+
"While parsing a string in Array context, we detected a quoted section that would have closed the string but has a different meaning here, ignoring it",
|
615
|
+
)
|
616
|
+
unmatched_delimiter = not unmatched_delimiter
|
617
|
+
string_acc += str(char)
|
618
|
+
self.index += 1
|
619
|
+
char = self.get_char_at()
|
633
620
|
|
634
621
|
if (
|
635
622
|
char
|
@@ -663,10 +650,9 @@ class JSONParser:
|
|
663
650
|
def parse_number(self) -> Union[float, int, str, JSONReturnType]:
|
664
651
|
# <number> is a valid real number expressed in one of a number of given formats
|
665
652
|
number_str = ""
|
666
|
-
number_chars = set("0123456789-.eE/,")
|
667
653
|
char = self.get_char_at()
|
668
654
|
is_array = self.context.current == ContextValues.ARRAY
|
669
|
-
while char and char in
|
655
|
+
while char and char in self.NUMBER_CHARS and (not is_array or char != ","):
|
670
656
|
number_str += char
|
671
657
|
self.index += 1
|
672
658
|
char = self.get_char_at()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.39.
|
3
|
+
Version: 0.39.1
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -196,12 +196,12 @@ pipx install json-repair
|
|
196
196
|
to know all options available:
|
197
197
|
```
|
198
198
|
$ json_repair -h
|
199
|
-
usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] filename
|
199
|
+
usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] [filename]
|
200
200
|
|
201
201
|
Repair and parse JSON files.
|
202
202
|
|
203
203
|
positional arguments:
|
204
|
-
filename The JSON file to repair
|
204
|
+
filename The JSON file to repair (if omitted, reads from stdin)
|
205
205
|
|
206
206
|
options:
|
207
207
|
-h, --help show this help message and exit
|
@@ -226,13 +226,13 @@ In this example, any version that starts with `0.` will be acceptable, allowing
|
|
226
226
|
# How to cite
|
227
227
|
If you are using this library in your academic work (as I know many folks are) please find the BibTex here:
|
228
228
|
|
229
|
-
@software{Baccianella_JSON_Repair_-
|
229
|
+
@software{Baccianella_JSON_Repair_-_2025,
|
230
230
|
author = {Baccianella, Stefano},
|
231
|
-
month =
|
231
|
+
month = feb,
|
232
232
|
title = {{JSON Repair - A python module to repair invalid JSON, commonly used to parse the output of LLMs}},
|
233
233
|
url = {https://github.com/mangiucugna/json_repair},
|
234
|
-
version = {0.
|
235
|
-
year = {
|
234
|
+
version = {0.39.0},
|
235
|
+
year = {2025}
|
236
236
|
}
|
237
237
|
|
238
238
|
Thank you for citing my work and please send me a link to the paper if you can!
|
@@ -1,13 +1,13 @@
|
|
1
1
|
json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
|
2
2
|
json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
|
3
3
|
json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
|
4
|
-
json_repair/json_parser.py,sha256=
|
4
|
+
json_repair/json_parser.py,sha256=kt58S7pHxCOfqktzn48iMrvd3vi7HTfK6OD02PWwWcc,38189
|
5
5
|
json_repair/json_repair.py,sha256=k-5HRRlCqrxNmJi0u1KE3IUeL4HXqi1XZ7oAL-NFDLo,10314
|
6
6
|
json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
7
|
json_repair/string_file_wrapper.py,sha256=koZmdq2-Z5K7XF1bDqX6dEbNaVMJYcMTjq-aGe6NQvA,4526
|
8
|
-
json_repair-0.39.
|
9
|
-
json_repair-0.39.
|
10
|
-
json_repair-0.39.
|
11
|
-
json_repair-0.39.
|
12
|
-
json_repair-0.39.
|
13
|
-
json_repair-0.39.
|
8
|
+
json_repair-0.39.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
9
|
+
json_repair-0.39.1.dist-info/METADATA,sha256=T1k1afyqqWG-NXYHpgntQsPYpmIdkLg72eSo_iNwZZk,11827
|
10
|
+
json_repair-0.39.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
11
|
+
json_repair-0.39.1.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
|
12
|
+
json_repair-0.39.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
13
|
+
json_repair-0.39.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|