json-repair 0.39.1__py3-none-any.whl → 0.40.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_parser.py +53 -46
- {json_repair-0.39.1.dist-info → json_repair-0.40.0.dist-info}/METADATA +7 -7
- {json_repair-0.39.1.dist-info → json_repair-0.40.0.dist-info}/RECORD +7 -7
- {json_repair-0.39.1.dist-info → json_repair-0.40.0.dist-info}/WHEEL +1 -1
- {json_repair-0.39.1.dist-info → json_repair-0.40.0.dist-info}/LICENSE +0 -0
- {json_repair-0.39.1.dist-info → json_repair-0.40.0.dist-info}/entry_points.txt +0 -0
- {json_repair-0.39.1.dist-info → json_repair-0.40.0.dist-info}/top_level.txt +0 -0
json_repair/json_parser.py
CHANGED
@@ -617,7 +617,14 @@ class JSONParser:
|
|
617
617
|
string_acc += str(char)
|
618
618
|
self.index += 1
|
619
619
|
char = self.get_char_at()
|
620
|
-
|
620
|
+
elif self.context.current == ContextValues.OBJECT_KEY:
|
621
|
+
# In this case we just ignore this and move on
|
622
|
+
self.log(
|
623
|
+
"While parsing a string in Object Key context, we detected a quoted section that would have closed the string but has a different meaning here, ignoring it",
|
624
|
+
)
|
625
|
+
string_acc += str(char)
|
626
|
+
self.index += 1
|
627
|
+
char = self.get_char_at()
|
621
628
|
if (
|
622
629
|
char
|
623
630
|
and missing_quotes
|
@@ -698,51 +705,6 @@ class JSONParser:
|
|
698
705
|
self.index = starting_index
|
699
706
|
return ""
|
700
707
|
|
701
|
-
def get_char_at(self, count: int = 0) -> Union[str, Literal[False]]:
|
702
|
-
# Why not use something simpler? Because try/except in python is a faster alternative to an "if" statement that is often True
|
703
|
-
try:
|
704
|
-
return self.json_str[self.index + count]
|
705
|
-
except IndexError:
|
706
|
-
return False
|
707
|
-
|
708
|
-
def skip_whitespaces_at(self, idx: int = 0, move_main_index=True) -> int:
|
709
|
-
"""
|
710
|
-
This function quickly iterates on whitespaces, syntactic sugar to make the code more concise
|
711
|
-
"""
|
712
|
-
try:
|
713
|
-
char = self.json_str[self.index + idx]
|
714
|
-
except IndexError:
|
715
|
-
return idx
|
716
|
-
while char.isspace():
|
717
|
-
if move_main_index:
|
718
|
-
self.index += 1
|
719
|
-
else:
|
720
|
-
idx += 1
|
721
|
-
try:
|
722
|
-
char = self.json_str[self.index + idx]
|
723
|
-
except IndexError:
|
724
|
-
return idx
|
725
|
-
return idx
|
726
|
-
|
727
|
-
def skip_to_character(self, character: str, idx: int = 0) -> int:
|
728
|
-
"""
|
729
|
-
This function quickly iterates to find a character, syntactic sugar to make the code more concise
|
730
|
-
"""
|
731
|
-
try:
|
732
|
-
char = self.json_str[self.index + idx]
|
733
|
-
except IndexError:
|
734
|
-
return idx
|
735
|
-
while char != character:
|
736
|
-
idx += 1
|
737
|
-
try:
|
738
|
-
char = self.json_str[self.index + idx]
|
739
|
-
except IndexError:
|
740
|
-
return idx
|
741
|
-
if self.index + idx > 0 and self.json_str[self.index + idx - 1] == "\\":
|
742
|
-
# Ah this is an escaped character, try again
|
743
|
-
return self.skip_to_character(character=character, idx=idx + 1)
|
744
|
-
return idx
|
745
|
-
|
746
708
|
def parse_comment(self) -> str:
|
747
709
|
"""
|
748
710
|
Parse code-like comments:
|
@@ -813,6 +775,51 @@ class JSONParser:
|
|
813
775
|
self.index += 1
|
814
776
|
return ""
|
815
777
|
|
778
|
+
def get_char_at(self, count: int = 0) -> Union[str, Literal[False]]:
|
779
|
+
# Why not use something simpler? Because try/except in python is a faster alternative to an "if" statement that is often True
|
780
|
+
try:
|
781
|
+
return self.json_str[self.index + count]
|
782
|
+
except IndexError:
|
783
|
+
return False
|
784
|
+
|
785
|
+
def skip_whitespaces_at(self, idx: int = 0, move_main_index=True) -> int:
|
786
|
+
"""
|
787
|
+
This function quickly iterates on whitespaces, syntactic sugar to make the code more concise
|
788
|
+
"""
|
789
|
+
try:
|
790
|
+
char = self.json_str[self.index + idx]
|
791
|
+
except IndexError:
|
792
|
+
return idx
|
793
|
+
while char.isspace():
|
794
|
+
if move_main_index:
|
795
|
+
self.index += 1
|
796
|
+
else:
|
797
|
+
idx += 1
|
798
|
+
try:
|
799
|
+
char = self.json_str[self.index + idx]
|
800
|
+
except IndexError:
|
801
|
+
return idx
|
802
|
+
return idx
|
803
|
+
|
804
|
+
def skip_to_character(self, character: str, idx: int = 0) -> int:
|
805
|
+
"""
|
806
|
+
This function quickly iterates to find a character, syntactic sugar to make the code more concise
|
807
|
+
"""
|
808
|
+
try:
|
809
|
+
char = self.json_str[self.index + idx]
|
810
|
+
except IndexError:
|
811
|
+
return idx
|
812
|
+
while char != character:
|
813
|
+
idx += 1
|
814
|
+
try:
|
815
|
+
char = self.json_str[self.index + idx]
|
816
|
+
except IndexError:
|
817
|
+
return idx
|
818
|
+
if self.index + idx > 0 and self.json_str[self.index + idx - 1] == "\\":
|
819
|
+
# Ah this is an escaped character, try again
|
820
|
+
return self.skip_to_character(character=character, idx=idx + 1)
|
821
|
+
return idx
|
822
|
+
|
816
823
|
def _log(self, text: str) -> None:
|
817
824
|
window: int = 10
|
818
825
|
start: int = max(self.index - window, 0)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.40.0
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -227,12 +227,12 @@ In this example, any version that starts with `0.` will be acceptable, allowing
|
|
227
227
|
If you are using this library in your academic work (as I know many folks are) please find the BibTex here:
|
228
228
|
|
229
229
|
@software{Baccianella_JSON_Repair_-_2025,
|
230
|
-
author
|
231
|
-
month
|
232
|
-
title
|
233
|
-
url
|
234
|
-
version =
|
235
|
-
year
|
230
|
+
author = "Stefano {Baccianella}",
|
231
|
+
month = "feb",
|
232
|
+
title = "JSON Repair - A python module to repair invalid JSON, commonly used to parse the output of LLMs",
|
233
|
+
url = "https://github.com/mangiucugna/json_repair",
|
234
|
+
version = "0.39.1",
|
235
|
+
year = 2025
|
236
236
|
}
|
237
237
|
|
238
238
|
Thank you for citing my work and please send me a link to the paper if you can!
|
@@ -1,13 +1,13 @@
|
|
1
1
|
json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
|
2
2
|
json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
|
3
3
|
json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
|
4
|
-
json_repair/json_parser.py,sha256=
|
4
|
+
json_repair/json_parser.py,sha256=aw-iCtblc9iL24w5zljHbbblK7Ao6G49MPoj513D2KE,38750
|
5
5
|
json_repair/json_repair.py,sha256=k-5HRRlCqrxNmJi0u1KE3IUeL4HXqi1XZ7oAL-NFDLo,10314
|
6
6
|
json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
7
|
json_repair/string_file_wrapper.py,sha256=koZmdq2-Z5K7XF1bDqX6dEbNaVMJYcMTjq-aGe6NQvA,4526
|
8
|
-
json_repair-0.
|
9
|
-
json_repair-0.
|
10
|
-
json_repair-0.
|
11
|
-
json_repair-0.
|
12
|
-
json_repair-0.
|
13
|
-
json_repair-0.
|
8
|
+
json_repair-0.40.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
9
|
+
json_repair-0.40.0.dist-info/METADATA,sha256=i43pAASjiIvd0XJ3CMO1nqaV14JNE2MjPx0U8lMJVYc,11838
|
10
|
+
json_repair-0.40.0.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
11
|
+
json_repair-0.40.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
|
12
|
+
json_repair-0.40.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
13
|
+
json_repair-0.40.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|