json-repair 0.27.2__py3-none-any.whl → 0.28.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_repair.py +107 -24
- json_repair/py.typed +0 -0
- {json_repair-0.27.2.dist-info → json_repair-0.28.1.dist-info}/METADATA +2 -1
- json_repair-0.28.1.dist-info/RECORD +8 -0
- {json_repair-0.27.2.dist-info → json_repair-0.28.1.dist-info}/WHEEL +1 -1
- json_repair-0.27.2.dist-info/RECORD +0 -7
- {json_repair-0.27.2.dist-info → json_repair-0.28.1.dist-info}/LICENSE +0 -0
- {json_repair-0.27.2.dist-info → json_repair-0.28.1.dist-info}/top_level.txt +0 -0
json_repair/json_repair.py
CHANGED
@@ -24,7 +24,7 @@ All supported use cases are in the unit tests
|
|
24
24
|
|
25
25
|
import os
|
26
26
|
import json
|
27
|
-
from typing import Any, Dict, List, Optional, Union, TextIO, Tuple
|
27
|
+
from typing import Any, Dict, List, Optional, Union, TextIO, Tuple, overload, Literal
|
28
28
|
|
29
29
|
|
30
30
|
class StringFileWrapper:
|
@@ -51,9 +51,6 @@ class StringFileWrapper:
|
|
51
51
|
self.fd.seek(current_position)
|
52
52
|
return self.length
|
53
53
|
|
54
|
-
def __setitem__(self) -> None:
|
55
|
-
raise Exception("This is read-only!")
|
56
|
-
|
57
54
|
|
58
55
|
class LoggerConfig:
|
59
56
|
# This is a type class to simplify the declaration
|
@@ -180,7 +177,7 @@ class JSONParser:
|
|
180
177
|
# <member> starts with a <string>
|
181
178
|
key = ""
|
182
179
|
while self.get_char_at():
|
183
|
-
key = self.parse_string()
|
180
|
+
key = str(self.parse_string())
|
184
181
|
|
185
182
|
if key != "" or (key == "" and self.get_char_at() == ":"):
|
186
183
|
# If the string is empty but there is a object divider, we are done here
|
@@ -258,7 +255,7 @@ class JSONParser:
|
|
258
255
|
self.reset_context()
|
259
256
|
return arr
|
260
257
|
|
261
|
-
def parse_string(self) -> Union[str,
|
258
|
+
def parse_string(self) -> Union[str, bool, None]:
|
262
259
|
# <string> is a string of valid characters enclosed in quotes
|
263
260
|
# i.e. { name: "John" }
|
264
261
|
# Somehow all weird cases in an invalid JSON happen to be resolved in this function, so be careful here
|
@@ -310,8 +307,7 @@ class JSONParser:
|
|
310
307
|
if self.get_context() == "object_key" and self.get_char_at(1) == ":":
|
311
308
|
self.index += 1
|
312
309
|
return ""
|
313
|
-
|
314
|
-
# This is a valid exception only if it's closed by a double delimiter again
|
310
|
+
# Find the next delimiter
|
315
311
|
i = 1
|
316
312
|
next_c = self.get_char_at(i)
|
317
313
|
while next_c and next_c != rstring_delimiter:
|
@@ -386,7 +382,7 @@ class JSONParser:
|
|
386
382
|
string_acc += char
|
387
383
|
self.index += 1
|
388
384
|
char = self.get_char_at()
|
389
|
-
if len(string_acc) > 0 and string_acc[-1] == "\\":
|
385
|
+
if char and len(string_acc) > 0 and string_acc[-1] == "\\":
|
390
386
|
# This is a special case, if people use real strings this might happen
|
391
387
|
self.log("Found a stray escape sequence, normalizing it", "info")
|
392
388
|
string_acc = string_acc[:-1]
|
@@ -442,7 +438,7 @@ class JSONParser:
|
|
442
438
|
]:
|
443
439
|
# This is a bit of a weird workaround, essentially in object_value context we don't always break on commas
|
444
440
|
# This is because the routine after will make sure to correct any bad guess and this solves a corner case
|
445
|
-
if next_c.isalpha():
|
441
|
+
if check_comma_in_object_value and next_c.isalpha():
|
446
442
|
check_comma_in_object_value = False
|
447
443
|
# If we are in an object context, let's check for the right delimiters
|
448
444
|
if (
|
@@ -477,7 +473,7 @@ class JSONParser:
|
|
477
473
|
"While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here since this is the last element of the object, ignoring it",
|
478
474
|
"info",
|
479
475
|
)
|
480
|
-
string_acc += char
|
476
|
+
string_acc += str(char)
|
481
477
|
self.index += 1
|
482
478
|
char = self.get_char_at()
|
483
479
|
elif next_c == rstring_delimiter:
|
@@ -507,7 +503,7 @@ class JSONParser:
|
|
507
503
|
"While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
|
508
504
|
"info",
|
509
505
|
)
|
510
|
-
string_acc += char
|
506
|
+
string_acc += str(char)
|
511
507
|
self.index += 1
|
512
508
|
char = self.get_char_at()
|
513
509
|
|
@@ -525,7 +521,8 @@ class JSONParser:
|
|
525
521
|
if self.get_char_at() not in [":", ","]:
|
526
522
|
return ""
|
527
523
|
|
528
|
-
# A fallout of the previous special case in the while loop,
|
524
|
+
# A fallout of the previous special case in the while loop,
|
525
|
+
# we need to update the index only if we had a closing quote
|
529
526
|
if char != rstring_delimiter:
|
530
527
|
self.log(
|
531
528
|
"While parsing a string, we missed the closing quote, ignoring",
|
@@ -567,6 +564,7 @@ class JSONParser:
|
|
567
564
|
# <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
|
568
565
|
starting_index = self.index
|
569
566
|
char = (self.get_char_at() or "").lower()
|
567
|
+
value = None
|
570
568
|
if char == "t":
|
571
569
|
value = ("true", True)
|
572
570
|
elif char == "f":
|
@@ -587,7 +585,7 @@ class JSONParser:
|
|
587
585
|
self.index = starting_index
|
588
586
|
return ""
|
589
587
|
|
590
|
-
def get_char_at(self, count: int = 0) -> Union[str,
|
588
|
+
def get_char_at(self, count: int = 0) -> Union[str, Literal[False]]:
|
591
589
|
# Why not use something simpler? Because try/except in python is a faster alternative to an "if" statement that is often True
|
592
590
|
try:
|
593
591
|
return self.json_str[self.index + count]
|
@@ -615,16 +613,10 @@ class JSONParser:
|
|
615
613
|
self.context.append(value)
|
616
614
|
|
617
615
|
def reset_context(self) -> None:
|
618
|
-
|
619
|
-
self.context.pop()
|
620
|
-
except Exception:
|
621
|
-
return
|
616
|
+
self.context.pop()
|
622
617
|
|
623
618
|
def get_context(self) -> str:
|
624
|
-
|
625
|
-
return self.context[-1]
|
626
|
-
except Exception:
|
627
|
-
return ""
|
619
|
+
return self.context[-1]
|
628
620
|
|
629
621
|
def log(self, text: str, level: str) -> None:
|
630
622
|
if level == self.logger.log_level:
|
@@ -640,6 +632,50 @@ class JSONParser:
|
|
640
632
|
)
|
641
633
|
|
642
634
|
|
635
|
+
@overload
|
636
|
+
def repair_json(
|
637
|
+
json_str: str = "",
|
638
|
+
return_objects: Optional[Literal[False]] = False,
|
639
|
+
skip_json_loads: Optional[bool] = False,
|
640
|
+
logging: Optional[Literal[False]] = False, # None is treated as False
|
641
|
+
json_fd: Optional[TextIO] = None,
|
642
|
+
ensure_ascii: Optional[bool] = True,
|
643
|
+
) -> str: ...
|
644
|
+
|
645
|
+
|
646
|
+
@overload
|
647
|
+
def repair_json(
|
648
|
+
json_str: str = "",
|
649
|
+
return_objects: Literal[True] = True,
|
650
|
+
skip_json_loads: Optional[bool] = False,
|
651
|
+
logging: Optional[Literal[False]] = False, # None is treated as False
|
652
|
+
json_fd: Optional[TextIO] = None,
|
653
|
+
ensure_ascii: Optional[bool] = True,
|
654
|
+
) -> JSONReturnType: ...
|
655
|
+
|
656
|
+
|
657
|
+
@overload
|
658
|
+
def repair_json(
|
659
|
+
json_str: str = "",
|
660
|
+
return_objects: Optional[Literal[False]] = False, # None is treated as False
|
661
|
+
skip_json_loads: Optional[bool] = False,
|
662
|
+
logging: Literal[True] = True,
|
663
|
+
json_fd: Optional[TextIO] = None,
|
664
|
+
ensure_ascii: Optional[bool] = True,
|
665
|
+
) -> Tuple[str, List[Dict[str, str]]]: ...
|
666
|
+
|
667
|
+
|
668
|
+
@overload
|
669
|
+
def repair_json(
|
670
|
+
json_str: str = "",
|
671
|
+
return_objects: Literal[True] = True,
|
672
|
+
skip_json_loads: Optional[bool] = False,
|
673
|
+
logging: Literal[True] = True,
|
674
|
+
json_fd: Optional[TextIO] = None,
|
675
|
+
ensure_ascii: Optional[bool] = True,
|
676
|
+
) -> Tuple[JSONReturnType, List[Dict[str, str]]]: ...
|
677
|
+
|
678
|
+
|
643
679
|
def repair_json(
|
644
680
|
json_str: str = "",
|
645
681
|
return_objects: Optional[bool] = False,
|
@@ -653,7 +689,7 @@ def repair_json(
|
|
653
689
|
It will return the fixed string by default.
|
654
690
|
When `return_objects=True` is passed, it will return the decoded data structure instead.
|
655
691
|
When `skip_json_loads=True` is passed, it will not call the built-in json.loads() function
|
656
|
-
When `logging=True` is passed, it will return
|
692
|
+
When `logging=True` is passed, it will return a tuple with the repaired json and a log of all repair actions
|
657
693
|
"""
|
658
694
|
parser = JSONParser(json_str, json_fd, logging)
|
659
695
|
if skip_json_loads:
|
@@ -666,12 +702,29 @@ def repair_json(
|
|
666
702
|
parsed_json = json.loads(json_str)
|
667
703
|
except json.JSONDecodeError:
|
668
704
|
parsed_json = parser.parse()
|
669
|
-
# It's useful to return the actual object instead of the json string,
|
705
|
+
# It's useful to return the actual object instead of the json string,
|
706
|
+
# it allows this lib to be a replacement of the json library
|
670
707
|
if return_objects or logging:
|
671
708
|
return parsed_json
|
672
709
|
return json.dumps(parsed_json, ensure_ascii=ensure_ascii)
|
673
710
|
|
674
711
|
|
712
|
+
@overload
|
713
|
+
def loads(
|
714
|
+
json_str: str,
|
715
|
+
skip_json_loads: Optional[bool] = False,
|
716
|
+
logging: Optional[Literal[False]] = False, # None is treated as False
|
717
|
+
) -> JSONReturnType: ...
|
718
|
+
|
719
|
+
|
720
|
+
@overload
|
721
|
+
def loads(
|
722
|
+
json_str: str,
|
723
|
+
skip_json_loads: Optional[bool] = False,
|
724
|
+
logging: Literal[True] = True,
|
725
|
+
) -> Tuple[JSONReturnType, List[Dict[str, str]]]: ...
|
726
|
+
|
727
|
+
|
675
728
|
def loads(
|
676
729
|
json_str: str,
|
677
730
|
skip_json_loads: Optional[bool] = False,
|
@@ -689,6 +742,20 @@ def loads(
|
|
689
742
|
)
|
690
743
|
|
691
744
|
|
745
|
+
@overload
|
746
|
+
def load(
|
747
|
+
fd: TextIO,
|
748
|
+
skip_json_loads: Optional[bool] = False,
|
749
|
+
logging: Optional[Literal[False]] = False,
|
750
|
+
) -> JSONReturnType: ...
|
751
|
+
|
752
|
+
|
753
|
+
@overload
|
754
|
+
def load(
|
755
|
+
fd: TextIO, skip_json_loads: Optional[bool] = False, logging: Literal[True] = True
|
756
|
+
) -> Tuple[JSONReturnType, List[Dict[str, str]]]: ...
|
757
|
+
|
758
|
+
|
692
759
|
def load(
|
693
760
|
fd: TextIO, skip_json_loads: Optional[bool] = False, logging: Optional[bool] = False
|
694
761
|
) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
|
@@ -704,6 +771,22 @@ def load(
|
|
704
771
|
)
|
705
772
|
|
706
773
|
|
774
|
+
@overload
|
775
|
+
def from_file(
|
776
|
+
filename: str,
|
777
|
+
skip_json_loads: Optional[bool] = False,
|
778
|
+
logging: Optional[Literal[False]] = False,
|
779
|
+
) -> JSONReturnType: ...
|
780
|
+
|
781
|
+
|
782
|
+
@overload
|
783
|
+
def from_file(
|
784
|
+
filename: str,
|
785
|
+
skip_json_loads: Optional[bool] = False,
|
786
|
+
logging: Literal[True] = True,
|
787
|
+
) -> Tuple[JSONReturnType, List[Dict[str, str]]]: ...
|
788
|
+
|
789
|
+
|
707
790
|
def from_file(
|
708
791
|
filename: str,
|
709
792
|
skip_json_loads: Optional[bool] = False,
|
json_repair/py.typed
ADDED
File without changes
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.28.1
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -27,6 +27,7 @@ License: MIT License
|
|
27
27
|
|
28
28
|
Project-URL: Homepage, https://github.com/mangiucugna/json_repair/
|
29
29
|
Project-URL: Bug Tracker, https://github.com/mangiucugna/json_repair/issues
|
30
|
+
Project-URL: Live demo, https://mangiucugna.github.io/json_repair/
|
30
31
|
Keywords: JSON,REPAIR,LLM,PARSER
|
31
32
|
Classifier: Programming Language :: Python :: 3
|
32
33
|
Classifier: License :: OSI Approved :: MIT License
|
@@ -0,0 +1,8 @@
|
|
1
|
+
json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
|
2
|
+
json_repair/json_repair.py,sha256=_NBAaY6iqIp1cB1W-lnQ3uS6nc5DjZZyf_HeklYmDyY,32502
|
3
|
+
json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
+
json_repair-0.28.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
5
|
+
json_repair-0.28.1.dist-info/METADATA,sha256=i9SaIoWFc7YjuQLLN1Rd8GsmVAy0rWJ9-fNwsVDR_KA,8043
|
6
|
+
json_repair-0.28.1.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
|
7
|
+
json_repair-0.28.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
8
|
+
json_repair-0.28.1.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
|
2
|
-
json_repair/json_repair.py,sha256=599pWb3Wn7Lltvy8X3eWN9u7ccnSGdAaHt5De_L219s,30337
|
3
|
-
json_repair-0.27.2.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
-
json_repair-0.27.2.dist-info/METADATA,sha256=yTnkoMdKmX0_E48cLHflA8grpL00MQJb91yLfWpgxdA,7976
|
5
|
-
json_repair-0.27.2.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
6
|
-
json_repair-0.27.2.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
-
json_repair-0.27.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|