json-repair 0.39.1__py3-none-any.whl → 0.41.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_parser.py +57 -46
- json_repair/object_comparer.py +63 -0
- {json_repair-0.39.1.dist-info → json_repair-0.41.0.dist-info}/METADATA +9 -8
- json_repair-0.41.0.dist-info/RECORD +14 -0
- {json_repair-0.39.1.dist-info → json_repair-0.41.0.dist-info}/WHEEL +1 -1
- json_repair-0.39.1.dist-info/RECORD +0 -13
- {json_repair-0.39.1.dist-info → json_repair-0.41.0.dist-info}/entry_points.txt +0 -0
- {json_repair-0.39.1.dist-info → json_repair-0.41.0.dist-info/licenses}/LICENSE +0 -0
- {json_repair-0.39.1.dist-info → json_repair-0.41.0.dist-info}/top_level.txt +0 -0
json_repair/json_parser.py
CHANGED
@@ -2,6 +2,7 @@ from typing import Any, Dict, List, Literal, Optional, TextIO, Tuple, Union
|
|
2
2
|
|
3
3
|
from .json_context import ContextValues, JsonContext
|
4
4
|
from .string_file_wrapper import StringFileWrapper
|
5
|
+
from .object_comparer import ObjectComparer
|
5
6
|
|
6
7
|
JSONReturnType = Union[Dict[str, Any], List[Any], str, float, int, bool, None]
|
7
8
|
|
@@ -54,6 +55,9 @@ class JSONParser:
|
|
54
55
|
while self.index < len(self.json_str):
|
55
56
|
j = self.parse_json()
|
56
57
|
if j != "":
|
58
|
+
if ObjectComparer.is_same_object(json[-1], j):
|
59
|
+
# replace the last entry with the new one since the new one seems an update
|
60
|
+
json.pop()
|
57
61
|
json.append(j)
|
58
62
|
if self.index == last_index:
|
59
63
|
self.index += 1
|
@@ -617,7 +621,14 @@ class JSONParser:
|
|
617
621
|
string_acc += str(char)
|
618
622
|
self.index += 1
|
619
623
|
char = self.get_char_at()
|
620
|
-
|
624
|
+
elif self.context.current == ContextValues.OBJECT_KEY:
|
625
|
+
# In this case we just ignore this and move on
|
626
|
+
self.log(
|
627
|
+
"While parsing a string in Object Key context, we detected a quoted section that would have closed the string but has a different meaning here, ignoring it",
|
628
|
+
)
|
629
|
+
string_acc += str(char)
|
630
|
+
self.index += 1
|
631
|
+
char = self.get_char_at()
|
621
632
|
if (
|
622
633
|
char
|
623
634
|
and missing_quotes
|
@@ -698,51 +709,6 @@ class JSONParser:
|
|
698
709
|
self.index = starting_index
|
699
710
|
return ""
|
700
711
|
|
701
|
-
def get_char_at(self, count: int = 0) -> Union[str, Literal[False]]:
|
702
|
-
# Why not use something simpler? Because try/except in python is a faster alternative to an "if" statement that is often True
|
703
|
-
try:
|
704
|
-
return self.json_str[self.index + count]
|
705
|
-
except IndexError:
|
706
|
-
return False
|
707
|
-
|
708
|
-
def skip_whitespaces_at(self, idx: int = 0, move_main_index=True) -> int:
|
709
|
-
"""
|
710
|
-
This function quickly iterates on whitespaces, syntactic sugar to make the code more concise
|
711
|
-
"""
|
712
|
-
try:
|
713
|
-
char = self.json_str[self.index + idx]
|
714
|
-
except IndexError:
|
715
|
-
return idx
|
716
|
-
while char.isspace():
|
717
|
-
if move_main_index:
|
718
|
-
self.index += 1
|
719
|
-
else:
|
720
|
-
idx += 1
|
721
|
-
try:
|
722
|
-
char = self.json_str[self.index + idx]
|
723
|
-
except IndexError:
|
724
|
-
return idx
|
725
|
-
return idx
|
726
|
-
|
727
|
-
def skip_to_character(self, character: str, idx: int = 0) -> int:
|
728
|
-
"""
|
729
|
-
This function quickly iterates to find a character, syntactic sugar to make the code more concise
|
730
|
-
"""
|
731
|
-
try:
|
732
|
-
char = self.json_str[self.index + idx]
|
733
|
-
except IndexError:
|
734
|
-
return idx
|
735
|
-
while char != character:
|
736
|
-
idx += 1
|
737
|
-
try:
|
738
|
-
char = self.json_str[self.index + idx]
|
739
|
-
except IndexError:
|
740
|
-
return idx
|
741
|
-
if self.index + idx > 0 and self.json_str[self.index + idx - 1] == "\\":
|
742
|
-
# Ah this is an escaped character, try again
|
743
|
-
return self.skip_to_character(character=character, idx=idx + 1)
|
744
|
-
return idx
|
745
|
-
|
746
712
|
def parse_comment(self) -> str:
|
747
713
|
"""
|
748
714
|
Parse code-like comments:
|
@@ -813,6 +779,51 @@ class JSONParser:
|
|
813
779
|
self.index += 1
|
814
780
|
return ""
|
815
781
|
|
782
|
+
def get_char_at(self, count: int = 0) -> Union[str, Literal[False]]:
|
783
|
+
# Why not use something simpler? Because try/except in python is a faster alternative to an "if" statement that is often True
|
784
|
+
try:
|
785
|
+
return self.json_str[self.index + count]
|
786
|
+
except IndexError:
|
787
|
+
return False
|
788
|
+
|
789
|
+
def skip_whitespaces_at(self, idx: int = 0, move_main_index=True) -> int:
|
790
|
+
"""
|
791
|
+
This function quickly iterates on whitespaces, syntactic sugar to make the code more concise
|
792
|
+
"""
|
793
|
+
try:
|
794
|
+
char = self.json_str[self.index + idx]
|
795
|
+
except IndexError:
|
796
|
+
return idx
|
797
|
+
while char.isspace():
|
798
|
+
if move_main_index:
|
799
|
+
self.index += 1
|
800
|
+
else:
|
801
|
+
idx += 1
|
802
|
+
try:
|
803
|
+
char = self.json_str[self.index + idx]
|
804
|
+
except IndexError:
|
805
|
+
return idx
|
806
|
+
return idx
|
807
|
+
|
808
|
+
def skip_to_character(self, character: str, idx: int = 0) -> int:
|
809
|
+
"""
|
810
|
+
This function quickly iterates to find a character, syntactic sugar to make the code more concise
|
811
|
+
"""
|
812
|
+
try:
|
813
|
+
char = self.json_str[self.index + idx]
|
814
|
+
except IndexError:
|
815
|
+
return idx
|
816
|
+
while char != character:
|
817
|
+
idx += 1
|
818
|
+
try:
|
819
|
+
char = self.json_str[self.index + idx]
|
820
|
+
except IndexError:
|
821
|
+
return idx
|
822
|
+
if self.index + idx > 0 and self.json_str[self.index + idx - 1] == "\\":
|
823
|
+
# Ah this is an escaped character, try again
|
824
|
+
return self.skip_to_character(character=character, idx=idx + 1)
|
825
|
+
return idx
|
826
|
+
|
816
827
|
def _log(self, text: str) -> None:
|
817
828
|
window: int = 10
|
818
829
|
start: int = max(self.index - window, 0)
|
@@ -0,0 +1,63 @@
|
|
1
|
+
from typing import Any
|
2
|
+
|
3
|
+
|
4
|
+
class ObjectComparer:
|
5
|
+
def __init__(self) -> None:
|
6
|
+
return
|
7
|
+
|
8
|
+
@staticmethod
|
9
|
+
def is_same_object(obj1: Any, obj2: Any, path: str = "") -> bool:
|
10
|
+
"""
|
11
|
+
Recursively compares two objects and ensures that:
|
12
|
+
- Their types match
|
13
|
+
- Their keys/structure match
|
14
|
+
"""
|
15
|
+
if type(obj1) is not type(obj2):
|
16
|
+
# Fail immediately if the types don't match
|
17
|
+
print(
|
18
|
+
f"Type mismatch at {path}: {type(obj1).__name__} vs {type(obj2).__name__}"
|
19
|
+
)
|
20
|
+
return False
|
21
|
+
|
22
|
+
if isinstance(obj1, dict) and isinstance(obj2, dict):
|
23
|
+
# Compare dictionary keys
|
24
|
+
keys1, keys2 = set(obj1.keys()), set(obj2.keys())
|
25
|
+
common_keys = keys1 & keys2
|
26
|
+
extra_keys1 = keys1 - keys2
|
27
|
+
extra_keys2 = keys2 - keys1
|
28
|
+
|
29
|
+
if extra_keys1:
|
30
|
+
print(f"Extra keys in first object at {path}: {extra_keys1}")
|
31
|
+
return False
|
32
|
+
if extra_keys2:
|
33
|
+
print(f"Extra keys in second object at {path}: {extra_keys2}")
|
34
|
+
return False
|
35
|
+
|
36
|
+
# Recursively compare the common keys
|
37
|
+
for key in common_keys:
|
38
|
+
if not ObjectComparer.is_same_object(
|
39
|
+
obj1[key], obj2[key], path=f"{path}/{key}"
|
40
|
+
):
|
41
|
+
return False
|
42
|
+
|
43
|
+
elif isinstance(obj1, list) and isinstance(obj2, list):
|
44
|
+
# Compare lists
|
45
|
+
min_length = min(len(obj1), len(obj2))
|
46
|
+
if len(obj1) != len(obj2):
|
47
|
+
print(f"Length mismatch at {path}: {len(obj1)} vs {len(obj2)}")
|
48
|
+
return False
|
49
|
+
|
50
|
+
for i in range(min_length):
|
51
|
+
if not ObjectComparer.is_same_object(
|
52
|
+
obj1[i], obj2[i], path=f"{path}[{i}]"
|
53
|
+
):
|
54
|
+
return False
|
55
|
+
|
56
|
+
if len(obj1) > len(obj2):
|
57
|
+
print(f"Extra items in first list at {path}: {obj1[min_length:]}")
|
58
|
+
return False
|
59
|
+
elif len(obj2) > len(obj1):
|
60
|
+
print(f"Extra items in second list at {path}: {obj2[min_length:]}")
|
61
|
+
return False
|
62
|
+
|
63
|
+
return True
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.41.0
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -35,6 +35,7 @@ Classifier: Operating System :: OS Independent
|
|
35
35
|
Requires-Python: >=3.9
|
36
36
|
Description-Content-Type: text/markdown
|
37
37
|
License-File: LICENSE
|
38
|
+
Dynamic: license-file
|
38
39
|
|
39
40
|
[](https://pypi.org/project/json-repair/)
|
40
41
|

|
@@ -227,12 +228,12 @@ In this example, any version that starts with `0.` will be acceptable, allowing
|
|
227
228
|
If you are using this library in your academic work (as I know many folks are) please find the BibTex here:
|
228
229
|
|
229
230
|
@software{Baccianella_JSON_Repair_-_2025,
|
230
|
-
author
|
231
|
-
month
|
232
|
-
title
|
233
|
-
url
|
234
|
-
version =
|
235
|
-
year
|
231
|
+
author = "Stefano {Baccianella}",
|
232
|
+
month = "feb",
|
233
|
+
title = "JSON Repair - A python module to repair invalid JSON, commonly used to parse the output of LLMs",
|
234
|
+
url = "https://github.com/mangiucugna/json_repair",
|
235
|
+
version = "0.39.1",
|
236
|
+
year = 2025
|
236
237
|
}
|
237
238
|
|
238
239
|
Thank you for citing my work and please send me a link to the paper if you can!
|
@@ -0,0 +1,14 @@
|
|
1
|
+
json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
|
2
|
+
json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
|
3
|
+
json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
|
4
|
+
json_repair/json_parser.py,sha256=ff5LPEHVfaRJ7ujVxUm7dxiwlHPzcJK0cGItm_OilpU,38996
|
5
|
+
json_repair/json_repair.py,sha256=k-5HRRlCqrxNmJi0u1KE3IUeL4HXqi1XZ7oAL-NFDLo,10314
|
6
|
+
json_repair/object_comparer.py,sha256=oMWtBySgrHDH8q2v72HnNQm8SRmudtEsVPkaydSXckE,2210
|
7
|
+
json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
+
json_repair/string_file_wrapper.py,sha256=koZmdq2-Z5K7XF1bDqX6dEbNaVMJYcMTjq-aGe6NQvA,4526
|
9
|
+
json_repair-0.41.0.dist-info/licenses/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
10
|
+
json_repair-0.41.0.dist-info/METADATA,sha256=9cD1PLAiAi5giNrDwS0TVerOD-wuzEd6wKrE1OeF-Jw,11860
|
11
|
+
json_repair-0.41.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
12
|
+
json_repair-0.41.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
|
13
|
+
json_repair-0.41.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
14
|
+
json_repair-0.41.0.dist-info/RECORD,,
|
@@ -1,13 +0,0 @@
|
|
1
|
-
json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
|
2
|
-
json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
|
3
|
-
json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
|
4
|
-
json_repair/json_parser.py,sha256=kt58S7pHxCOfqktzn48iMrvd3vi7HTfK6OD02PWwWcc,38189
|
5
|
-
json_repair/json_repair.py,sha256=k-5HRRlCqrxNmJi0u1KE3IUeL4HXqi1XZ7oAL-NFDLo,10314
|
6
|
-
json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
-
json_repair/string_file_wrapper.py,sha256=koZmdq2-Z5K7XF1bDqX6dEbNaVMJYcMTjq-aGe6NQvA,4526
|
8
|
-
json_repair-0.39.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
9
|
-
json_repair-0.39.1.dist-info/METADATA,sha256=T1k1afyqqWG-NXYHpgntQsPYpmIdkLg72eSo_iNwZZk,11827
|
10
|
-
json_repair-0.39.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
11
|
-
json_repair-0.39.1.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
|
12
|
-
json_repair-0.39.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
13
|
-
json_repair-0.39.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|