json-repair 0.39.1__py3-none-any.whl → 0.41.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@ from typing import Any, Dict, List, Literal, Optional, TextIO, Tuple, Union
2
2
 
3
3
  from .json_context import ContextValues, JsonContext
4
4
  from .string_file_wrapper import StringFileWrapper
5
+ from .object_comparer import ObjectComparer
5
6
 
6
7
  JSONReturnType = Union[Dict[str, Any], List[Any], str, float, int, bool, None]
7
8
 
@@ -54,6 +55,9 @@ class JSONParser:
54
55
  while self.index < len(self.json_str):
55
56
  j = self.parse_json()
56
57
  if j != "":
58
+ if ObjectComparer.is_same_object(json[-1], j):
59
+ # replace the last entry with the new one since the new one seems an update
60
+ json.pop()
57
61
  json.append(j)
58
62
  if self.index == last_index:
59
63
  self.index += 1
@@ -617,7 +621,14 @@ class JSONParser:
617
621
  string_acc += str(char)
618
622
  self.index += 1
619
623
  char = self.get_char_at()
620
-
624
+ elif self.context.current == ContextValues.OBJECT_KEY:
625
+ # In this case we just ignore this and move on
626
+ self.log(
627
+ "While parsing a string in Object Key context, we detected a quoted section that would have closed the string but has a different meaning here, ignoring it",
628
+ )
629
+ string_acc += str(char)
630
+ self.index += 1
631
+ char = self.get_char_at()
621
632
  if (
622
633
  char
623
634
  and missing_quotes
@@ -698,51 +709,6 @@ class JSONParser:
698
709
  self.index = starting_index
699
710
  return ""
700
711
 
701
- def get_char_at(self, count: int = 0) -> Union[str, Literal[False]]:
702
- # Why not use something simpler? Because try/except in python is a faster alternative to an "if" statement that is often True
703
- try:
704
- return self.json_str[self.index + count]
705
- except IndexError:
706
- return False
707
-
708
- def skip_whitespaces_at(self, idx: int = 0, move_main_index=True) -> int:
709
- """
710
- This function quickly iterates on whitespaces, syntactic sugar to make the code more concise
711
- """
712
- try:
713
- char = self.json_str[self.index + idx]
714
- except IndexError:
715
- return idx
716
- while char.isspace():
717
- if move_main_index:
718
- self.index += 1
719
- else:
720
- idx += 1
721
- try:
722
- char = self.json_str[self.index + idx]
723
- except IndexError:
724
- return idx
725
- return idx
726
-
727
- def skip_to_character(self, character: str, idx: int = 0) -> int:
728
- """
729
- This function quickly iterates to find a character, syntactic sugar to make the code more concise
730
- """
731
- try:
732
- char = self.json_str[self.index + idx]
733
- except IndexError:
734
- return idx
735
- while char != character:
736
- idx += 1
737
- try:
738
- char = self.json_str[self.index + idx]
739
- except IndexError:
740
- return idx
741
- if self.index + idx > 0 and self.json_str[self.index + idx - 1] == "\\":
742
- # Ah this is an escaped character, try again
743
- return self.skip_to_character(character=character, idx=idx + 1)
744
- return idx
745
-
746
712
  def parse_comment(self) -> str:
747
713
  """
748
714
  Parse code-like comments:
@@ -813,6 +779,51 @@ class JSONParser:
813
779
  self.index += 1
814
780
  return ""
815
781
 
782
+ def get_char_at(self, count: int = 0) -> Union[str, Literal[False]]:
783
+ # Why not use something simpler? Because try/except in python is a faster alternative to an "if" statement that is often True
784
+ try:
785
+ return self.json_str[self.index + count]
786
+ except IndexError:
787
+ return False
788
+
789
+ def skip_whitespaces_at(self, idx: int = 0, move_main_index=True) -> int:
790
+ """
791
+ This function quickly iterates on whitespaces, syntactic sugar to make the code more concise
792
+ """
793
+ try:
794
+ char = self.json_str[self.index + idx]
795
+ except IndexError:
796
+ return idx
797
+ while char.isspace():
798
+ if move_main_index:
799
+ self.index += 1
800
+ else:
801
+ idx += 1
802
+ try:
803
+ char = self.json_str[self.index + idx]
804
+ except IndexError:
805
+ return idx
806
+ return idx
807
+
808
+ def skip_to_character(self, character: str, idx: int = 0) -> int:
809
+ """
810
+ This function quickly iterates to find a character, syntactic sugar to make the code more concise
811
+ """
812
+ try:
813
+ char = self.json_str[self.index + idx]
814
+ except IndexError:
815
+ return idx
816
+ while char != character:
817
+ idx += 1
818
+ try:
819
+ char = self.json_str[self.index + idx]
820
+ except IndexError:
821
+ return idx
822
+ if self.index + idx > 0 and self.json_str[self.index + idx - 1] == "\\":
823
+ # Ah this is an escaped character, try again
824
+ return self.skip_to_character(character=character, idx=idx + 1)
825
+ return idx
826
+
816
827
  def _log(self, text: str) -> None:
817
828
  window: int = 10
818
829
  start: int = max(self.index - window, 0)
@@ -0,0 +1,63 @@
1
+ from typing import Any
2
+
3
+
4
+ class ObjectComparer:
5
+ def __init__(self) -> None:
6
+ return
7
+
8
+ @staticmethod
9
+ def is_same_object(obj1: Any, obj2: Any, path: str = "") -> bool:
10
+ """
11
+ Recursively compares two objects and ensures that:
12
+ - Their types match
13
+ - Their keys/structure match
14
+ """
15
+ if type(obj1) is not type(obj2):
16
+ # Fail immediately if the types don't match
17
+ print(
18
+ f"Type mismatch at {path}: {type(obj1).__name__} vs {type(obj2).__name__}"
19
+ )
20
+ return False
21
+
22
+ if isinstance(obj1, dict) and isinstance(obj2, dict):
23
+ # Compare dictionary keys
24
+ keys1, keys2 = set(obj1.keys()), set(obj2.keys())
25
+ common_keys = keys1 & keys2
26
+ extra_keys1 = keys1 - keys2
27
+ extra_keys2 = keys2 - keys1
28
+
29
+ if extra_keys1:
30
+ print(f"Extra keys in first object at {path}: {extra_keys1}")
31
+ return False
32
+ if extra_keys2:
33
+ print(f"Extra keys in second object at {path}: {extra_keys2}")
34
+ return False
35
+
36
+ # Recursively compare the common keys
37
+ for key in common_keys:
38
+ if not ObjectComparer.is_same_object(
39
+ obj1[key], obj2[key], path=f"{path}/{key}"
40
+ ):
41
+ return False
42
+
43
+ elif isinstance(obj1, list) and isinstance(obj2, list):
44
+ # Compare lists
45
+ min_length = min(len(obj1), len(obj2))
46
+ if len(obj1) != len(obj2):
47
+ print(f"Length mismatch at {path}: {len(obj1)} vs {len(obj2)}")
48
+ return False
49
+
50
+ for i in range(min_length):
51
+ if not ObjectComparer.is_same_object(
52
+ obj1[i], obj2[i], path=f"{path}[{i}]"
53
+ ):
54
+ return False
55
+
56
+ if len(obj1) > len(obj2):
57
+ print(f"Extra items in first list at {path}: {obj1[min_length:]}")
58
+ return False
59
+ elif len(obj2) > len(obj1):
60
+ print(f"Extra items in second list at {path}: {obj2[min_length:]}")
61
+ return False
62
+
63
+ return True
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: json_repair
3
- Version: 0.39.1
3
+ Version: 0.41.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -35,6 +35,7 @@ Classifier: Operating System :: OS Independent
35
35
  Requires-Python: >=3.9
36
36
  Description-Content-Type: text/markdown
37
37
  License-File: LICENSE
38
+ Dynamic: license-file
38
39
 
39
40
  [![PyPI](https://img.shields.io/pypi/v/json-repair)](https://pypi.org/project/json-repair/)
40
41
  ![Python version](https://img.shields.io/badge/python-3.9+-important)
@@ -227,12 +228,12 @@ In this example, any version that starts with `0.` will be acceptable, allowing
227
228
  If you are using this library in your academic work (as I know many folks are) please find the BibTex here:
228
229
 
229
230
  @software{Baccianella_JSON_Repair_-_2025,
230
- author = {Baccianella, Stefano},
231
- month = feb,
232
- title = {{JSON Repair - A python module to repair invalid JSON, commonly used to parse the output of LLMs}},
233
- url = {https://github.com/mangiucugna/json_repair},
234
- version = {0.39.0},
235
- year = {2025}
231
+ author = "Stefano {Baccianella}",
232
+ month = "feb",
233
+ title = "JSON Repair - A python module to repair invalid JSON, commonly used to parse the output of LLMs",
234
+ url = "https://github.com/mangiucugna/json_repair",
235
+ version = "0.39.1",
236
+ year = 2025
236
237
  }
237
238
 
238
239
  Thank you for citing my work and please send me a link to the paper if you can!
@@ -0,0 +1,14 @@
1
+ json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
2
+ json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
+ json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
4
+ json_repair/json_parser.py,sha256=ff5LPEHVfaRJ7ujVxUm7dxiwlHPzcJK0cGItm_OilpU,38996
5
+ json_repair/json_repair.py,sha256=k-5HRRlCqrxNmJi0u1KE3IUeL4HXqi1XZ7oAL-NFDLo,10314
6
+ json_repair/object_comparer.py,sha256=oMWtBySgrHDH8q2v72HnNQm8SRmudtEsVPkaydSXckE,2210
7
+ json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ json_repair/string_file_wrapper.py,sha256=koZmdq2-Z5K7XF1bDqX6dEbNaVMJYcMTjq-aGe6NQvA,4526
9
+ json_repair-0.41.0.dist-info/licenses/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
10
+ json_repair-0.41.0.dist-info/METADATA,sha256=9cD1PLAiAi5giNrDwS0TVerOD-wuzEd6wKrE1OeF-Jw,11860
11
+ json_repair-0.41.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
12
+ json_repair-0.41.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
13
+ json_repair-0.41.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
14
+ json_repair-0.41.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (78.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,13 +0,0 @@
1
- json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
2
- json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
- json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
4
- json_repair/json_parser.py,sha256=kt58S7pHxCOfqktzn48iMrvd3vi7HTfK6OD02PWwWcc,38189
5
- json_repair/json_repair.py,sha256=k-5HRRlCqrxNmJi0u1KE3IUeL4HXqi1XZ7oAL-NFDLo,10314
6
- json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- json_repair/string_file_wrapper.py,sha256=koZmdq2-Z5K7XF1bDqX6dEbNaVMJYcMTjq-aGe6NQvA,4526
8
- json_repair-0.39.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
9
- json_repair-0.39.1.dist-info/METADATA,sha256=T1k1afyqqWG-NXYHpgntQsPYpmIdkLg72eSo_iNwZZk,11827
10
- json_repair-0.39.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
11
- json_repair-0.39.1.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
12
- json_repair-0.39.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
13
- json_repair-0.39.1.dist-info/RECORD,,