json-repair 0.27.2__py3-none-any.whl → 0.28.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_repair.py +107 -24
- json_repair/py.typed +0 -0
- {json_repair-0.27.2.dist-info → json_repair-0.28.1.dist-info}/METADATA +2 -1
- json_repair-0.28.1.dist-info/RECORD +8 -0
- {json_repair-0.27.2.dist-info → json_repair-0.28.1.dist-info}/WHEEL +1 -1
- json_repair-0.27.2.dist-info/RECORD +0 -7
- {json_repair-0.27.2.dist-info → json_repair-0.28.1.dist-info}/LICENSE +0 -0
- {json_repair-0.27.2.dist-info → json_repair-0.28.1.dist-info}/top_level.txt +0 -0
    
        json_repair/json_repair.py
    CHANGED
    
    | @@ -24,7 +24,7 @@ All supported use cases are in the unit tests | |
| 24 24 |  | 
| 25 25 | 
             
            import os
         | 
| 26 26 | 
             
            import json
         | 
| 27 | 
            -
            from typing import Any, Dict, List, Optional, Union, TextIO, Tuple
         | 
| 27 | 
            +
            from typing import Any, Dict, List, Optional, Union, TextIO, Tuple, overload, Literal
         | 
| 28 28 |  | 
| 29 29 |  | 
| 30 30 | 
             
            class StringFileWrapper:
         | 
| @@ -51,9 +51,6 @@ class StringFileWrapper: | |
| 51 51 | 
             
                        self.fd.seek(current_position)
         | 
| 52 52 | 
             
                    return self.length
         | 
| 53 53 |  | 
| 54 | 
            -
                def __setitem__(self) -> None:
         | 
| 55 | 
            -
                    raise Exception("This is read-only!")
         | 
| 56 | 
            -
             | 
| 57 54 |  | 
| 58 55 | 
             
            class LoggerConfig:
         | 
| 59 56 | 
             
                # This is a type class to simplify the declaration
         | 
| @@ -180,7 +177,7 @@ class JSONParser: | |
| 180 177 | 
             
                        # <member> starts with a <string>
         | 
| 181 178 | 
             
                        key = ""
         | 
| 182 179 | 
             
                        while self.get_char_at():
         | 
| 183 | 
            -
                            key = self.parse_string()
         | 
| 180 | 
            +
                            key = str(self.parse_string())
         | 
| 184 181 |  | 
| 185 182 | 
             
                            if key != "" or (key == "" and self.get_char_at() == ":"):
         | 
| 186 183 | 
             
                                # If the string is empty but there is a object divider, we are done here
         | 
| @@ -258,7 +255,7 @@ class JSONParser: | |
| 258 255 | 
             
                    self.reset_context()
         | 
| 259 256 | 
             
                    return arr
         | 
| 260 257 |  | 
| 261 | 
            -
                def parse_string(self) -> Union[str,  | 
| 258 | 
            +
                def parse_string(self) -> Union[str, bool, None]:
         | 
| 262 259 | 
             
                    # <string> is a string of valid characters enclosed in quotes
         | 
| 263 260 | 
             
                    # i.e. { name: "John" }
         | 
| 264 261 | 
             
                    # Somehow all weird cases in an invalid JSON happen to be resolved in this function, so be careful here
         | 
| @@ -310,8 +307,7 @@ class JSONParser: | |
| 310 307 | 
             
                        if self.get_context() == "object_key" and self.get_char_at(1) == ":":
         | 
| 311 308 | 
             
                            self.index += 1
         | 
| 312 309 | 
             
                            return ""
         | 
| 313 | 
            -
             | 
| 314 | 
            -
                        # This is a valid exception only if it's closed by a double delimiter again
         | 
| 310 | 
            +
                        # Find the next delimiter
         | 
| 315 311 | 
             
                        i = 1
         | 
| 316 312 | 
             
                        next_c = self.get_char_at(i)
         | 
| 317 313 | 
             
                        while next_c and next_c != rstring_delimiter:
         | 
| @@ -386,7 +382,7 @@ class JSONParser: | |
| 386 382 | 
             
                        string_acc += char
         | 
| 387 383 | 
             
                        self.index += 1
         | 
| 388 384 | 
             
                        char = self.get_char_at()
         | 
| 389 | 
            -
                        if len(string_acc) > 0 and string_acc[-1] == "\\":
         | 
| 385 | 
            +
                        if char and len(string_acc) > 0 and string_acc[-1] == "\\":
         | 
| 390 386 | 
             
                            # This is a special case, if people use real strings this might happen
         | 
| 391 387 | 
             
                            self.log("Found a stray escape sequence, normalizing it", "info")
         | 
| 392 388 | 
             
                            string_acc = string_acc[:-1]
         | 
| @@ -442,7 +438,7 @@ class JSONParser: | |
| 442 438 | 
             
                                ]:
         | 
| 443 439 | 
             
                                    # This is a bit of a weird workaround, essentially in object_value context we don't always break on commas
         | 
| 444 440 | 
             
                                    # This is because the routine after will make sure to correct any bad guess and this solves a corner case
         | 
| 445 | 
            -
                                    if next_c.isalpha():
         | 
| 441 | 
            +
                                    if check_comma_in_object_value and next_c.isalpha():
         | 
| 446 442 | 
             
                                        check_comma_in_object_value = False
         | 
| 447 443 | 
             
                                    # If we are in an object context, let's check for the right delimiters
         | 
| 448 444 | 
             
                                    if (
         | 
| @@ -477,7 +473,7 @@ class JSONParser: | |
| 477 473 | 
             
                                            "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here since this is the last element of the object, ignoring it",
         | 
| 478 474 | 
             
                                            "info",
         | 
| 479 475 | 
             
                                        )
         | 
| 480 | 
            -
                                        string_acc += char
         | 
| 476 | 
            +
                                        string_acc += str(char)
         | 
| 481 477 | 
             
                                        self.index += 1
         | 
| 482 478 | 
             
                                        char = self.get_char_at()
         | 
| 483 479 | 
             
                                elif next_c == rstring_delimiter:
         | 
| @@ -507,7 +503,7 @@ class JSONParser: | |
| 507 503 | 
             
                                                "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
         | 
| 508 504 | 
             
                                                "info",
         | 
| 509 505 | 
             
                                            )
         | 
| 510 | 
            -
                                            string_acc += char
         | 
| 506 | 
            +
                                            string_acc += str(char)
         | 
| 511 507 | 
             
                                            self.index += 1
         | 
| 512 508 | 
             
                                            char = self.get_char_at()
         | 
| 513 509 |  | 
| @@ -525,7 +521,8 @@ class JSONParser: | |
| 525 521 | 
             
                        if self.get_char_at() not in [":", ","]:
         | 
| 526 522 | 
             
                            return ""
         | 
| 527 523 |  | 
| 528 | 
            -
                    # A fallout of the previous special case in the while loop, | 
| 524 | 
            +
                    # A fallout of the previous special case in the while loop,
         | 
| 525 | 
            +
                    # we need to update the index only if we had a closing quote
         | 
| 529 526 | 
             
                    if char != rstring_delimiter:
         | 
| 530 527 | 
             
                        self.log(
         | 
| 531 528 | 
             
                            "While parsing a string, we missed the closing quote, ignoring",
         | 
| @@ -567,6 +564,7 @@ class JSONParser: | |
| 567 564 | 
             
                    # <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
         | 
| 568 565 | 
             
                    starting_index = self.index
         | 
| 569 566 | 
             
                    char = (self.get_char_at() or "").lower()
         | 
| 567 | 
            +
                    value = None
         | 
| 570 568 | 
             
                    if char == "t":
         | 
| 571 569 | 
             
                        value = ("true", True)
         | 
| 572 570 | 
             
                    elif char == "f":
         | 
| @@ -587,7 +585,7 @@ class JSONParser: | |
| 587 585 | 
             
                    self.index = starting_index
         | 
| 588 586 | 
             
                    return ""
         | 
| 589 587 |  | 
| 590 | 
            -
                def get_char_at(self, count: int = 0) -> Union[str,  | 
| 588 | 
            +
                def get_char_at(self, count: int = 0) -> Union[str, Literal[False]]:
         | 
| 591 589 | 
             
                    # Why not use something simpler? Because try/except in python is a faster alternative to an "if" statement that is often True
         | 
| 592 590 | 
             
                    try:
         | 
| 593 591 | 
             
                        return self.json_str[self.index + count]
         | 
| @@ -615,16 +613,10 @@ class JSONParser: | |
| 615 613 | 
             
                        self.context.append(value)
         | 
| 616 614 |  | 
| 617 615 | 
             
                def reset_context(self) -> None:
         | 
| 618 | 
            -
                     | 
| 619 | 
            -
                        self.context.pop()
         | 
| 620 | 
            -
                    except Exception:
         | 
| 621 | 
            -
                        return
         | 
| 616 | 
            +
                    self.context.pop()
         | 
| 622 617 |  | 
| 623 618 | 
             
                def get_context(self) -> str:
         | 
| 624 | 
            -
                     | 
| 625 | 
            -
                        return self.context[-1]
         | 
| 626 | 
            -
                    except Exception:
         | 
| 627 | 
            -
                        return ""
         | 
| 619 | 
            +
                    return self.context[-1]
         | 
| 628 620 |  | 
| 629 621 | 
             
                def log(self, text: str, level: str) -> None:
         | 
| 630 622 | 
             
                    if level == self.logger.log_level:
         | 
| @@ -640,6 +632,50 @@ class JSONParser: | |
| 640 632 | 
             
                        )
         | 
| 641 633 |  | 
| 642 634 |  | 
| 635 | 
            +
            @overload
         | 
| 636 | 
            +
            def repair_json(
         | 
| 637 | 
            +
                json_str: str = "",
         | 
| 638 | 
            +
                return_objects: Optional[Literal[False]] = False,
         | 
| 639 | 
            +
                skip_json_loads: Optional[bool] = False,
         | 
| 640 | 
            +
                logging: Optional[Literal[False]] = False,  # None is treated as False
         | 
| 641 | 
            +
                json_fd: Optional[TextIO] = None,
         | 
| 642 | 
            +
                ensure_ascii: Optional[bool] = True,
         | 
| 643 | 
            +
            ) -> str: ...
         | 
| 644 | 
            +
             | 
| 645 | 
            +
             | 
| 646 | 
            +
            @overload
         | 
| 647 | 
            +
            def repair_json(
         | 
| 648 | 
            +
                json_str: str = "",
         | 
| 649 | 
            +
                return_objects: Literal[True] = True,
         | 
| 650 | 
            +
                skip_json_loads: Optional[bool] = False,
         | 
| 651 | 
            +
                logging: Optional[Literal[False]] = False,  # None is treated as False
         | 
| 652 | 
            +
                json_fd: Optional[TextIO] = None,
         | 
| 653 | 
            +
                ensure_ascii: Optional[bool] = True,
         | 
| 654 | 
            +
            ) -> JSONReturnType: ...
         | 
| 655 | 
            +
             | 
| 656 | 
            +
             | 
| 657 | 
            +
            @overload
         | 
| 658 | 
            +
            def repair_json(
         | 
| 659 | 
            +
                json_str: str = "",
         | 
| 660 | 
            +
                return_objects: Optional[Literal[False]] = False,  # None is treated as False
         | 
| 661 | 
            +
                skip_json_loads: Optional[bool] = False,
         | 
| 662 | 
            +
                logging: Literal[True] = True,
         | 
| 663 | 
            +
                json_fd: Optional[TextIO] = None,
         | 
| 664 | 
            +
                ensure_ascii: Optional[bool] = True,
         | 
| 665 | 
            +
            ) -> Tuple[str, List[Dict[str, str]]]: ...
         | 
| 666 | 
            +
             | 
| 667 | 
            +
             | 
| 668 | 
            +
            @overload
         | 
| 669 | 
            +
            def repair_json(
         | 
| 670 | 
            +
                json_str: str = "",
         | 
| 671 | 
            +
                return_objects: Literal[True] = True,
         | 
| 672 | 
            +
                skip_json_loads: Optional[bool] = False,
         | 
| 673 | 
            +
                logging: Literal[True] = True,
         | 
| 674 | 
            +
                json_fd: Optional[TextIO] = None,
         | 
| 675 | 
            +
                ensure_ascii: Optional[bool] = True,
         | 
| 676 | 
            +
            ) -> Tuple[JSONReturnType, List[Dict[str, str]]]: ...
         | 
| 677 | 
            +
             | 
| 678 | 
            +
             | 
| 643 679 | 
             
            def repair_json(
         | 
| 644 680 | 
             
                json_str: str = "",
         | 
| 645 681 | 
             
                return_objects: Optional[bool] = False,
         | 
| @@ -653,7 +689,7 @@ def repair_json( | |
| 653 689 | 
             
                It will return the fixed string by default.
         | 
| 654 690 | 
             
                When `return_objects=True` is passed, it will return the decoded data structure instead.
         | 
| 655 691 | 
             
                When `skip_json_loads=True` is passed, it will not call the built-in json.loads() function
         | 
| 656 | 
            -
                When `logging=True` is passed, it will return  | 
| 692 | 
            +
                When `logging=True` is passed, it will return a tuple with the repaired json and a log of all repair actions
         | 
| 657 693 | 
             
                """
         | 
| 658 694 | 
             
                parser = JSONParser(json_str, json_fd, logging)
         | 
| 659 695 | 
             
                if skip_json_loads:
         | 
| @@ -666,12 +702,29 @@ def repair_json( | |
| 666 702 | 
             
                            parsed_json = json.loads(json_str)
         | 
| 667 703 | 
             
                    except json.JSONDecodeError:
         | 
| 668 704 | 
             
                        parsed_json = parser.parse()
         | 
| 669 | 
            -
                # It's useful to return the actual object instead of the json string, | 
| 705 | 
            +
                # It's useful to return the actual object instead of the json string,
         | 
| 706 | 
            +
                # it allows this lib to be a replacement of the json library
         | 
| 670 707 | 
             
                if return_objects or logging:
         | 
| 671 708 | 
             
                    return parsed_json
         | 
| 672 709 | 
             
                return json.dumps(parsed_json, ensure_ascii=ensure_ascii)
         | 
| 673 710 |  | 
| 674 711 |  | 
| 712 | 
            +
            @overload
         | 
| 713 | 
            +
            def loads(
         | 
| 714 | 
            +
                json_str: str,
         | 
| 715 | 
            +
                skip_json_loads: Optional[bool] = False,
         | 
| 716 | 
            +
                logging: Optional[Literal[False]] = False,  # None is treated as False
         | 
| 717 | 
            +
            ) -> JSONReturnType: ...
         | 
| 718 | 
            +
             | 
| 719 | 
            +
             | 
| 720 | 
            +
            @overload
         | 
| 721 | 
            +
            def loads(
         | 
| 722 | 
            +
                json_str: str,
         | 
| 723 | 
            +
                skip_json_loads: Optional[bool] = False,
         | 
| 724 | 
            +
                logging: Literal[True] = True,
         | 
| 725 | 
            +
            ) -> Tuple[JSONReturnType, List[Dict[str, str]]]: ...
         | 
| 726 | 
            +
             | 
| 727 | 
            +
             | 
| 675 728 | 
             
            def loads(
         | 
| 676 729 | 
             
                json_str: str,
         | 
| 677 730 | 
             
                skip_json_loads: Optional[bool] = False,
         | 
| @@ -689,6 +742,20 @@ def loads( | |
| 689 742 | 
             
                )
         | 
| 690 743 |  | 
| 691 744 |  | 
| 745 | 
            +
            @overload
         | 
| 746 | 
            +
            def load(
         | 
| 747 | 
            +
                fd: TextIO,
         | 
| 748 | 
            +
                skip_json_loads: Optional[bool] = False,
         | 
| 749 | 
            +
                logging: Optional[Literal[False]] = False,
         | 
| 750 | 
            +
            ) -> JSONReturnType: ...
         | 
| 751 | 
            +
             | 
| 752 | 
            +
             | 
| 753 | 
            +
            @overload
         | 
| 754 | 
            +
            def load(
         | 
| 755 | 
            +
                fd: TextIO, skip_json_loads: Optional[bool] = False, logging: Literal[True] = True
         | 
| 756 | 
            +
            ) -> Tuple[JSONReturnType, List[Dict[str, str]]]: ...
         | 
| 757 | 
            +
             | 
| 758 | 
            +
             | 
| 692 759 | 
             
            def load(
         | 
| 693 760 | 
             
                fd: TextIO, skip_json_loads: Optional[bool] = False, logging: Optional[bool] = False
         | 
| 694 761 | 
             
            ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
         | 
| @@ -704,6 +771,22 @@ def load( | |
| 704 771 | 
             
                )
         | 
| 705 772 |  | 
| 706 773 |  | 
| 774 | 
            +
            @overload
         | 
| 775 | 
            +
            def from_file(
         | 
| 776 | 
            +
                filename: str,
         | 
| 777 | 
            +
                skip_json_loads: Optional[bool] = False,
         | 
| 778 | 
            +
                logging: Optional[Literal[False]] = False,
         | 
| 779 | 
            +
            ) -> JSONReturnType: ...
         | 
| 780 | 
            +
             | 
| 781 | 
            +
             | 
| 782 | 
            +
            @overload
         | 
| 783 | 
            +
            def from_file(
         | 
| 784 | 
            +
                filename: str,
         | 
| 785 | 
            +
                skip_json_loads: Optional[bool] = False,
         | 
| 786 | 
            +
                logging: Literal[True] = True,
         | 
| 787 | 
            +
            ) -> Tuple[JSONReturnType, List[Dict[str, str]]]: ...
         | 
| 788 | 
            +
             | 
| 789 | 
            +
             | 
| 707 790 | 
             
            def from_file(
         | 
| 708 791 | 
             
                filename: str,
         | 
| 709 792 | 
             
                skip_json_loads: Optional[bool] = False,
         | 
    
        json_repair/py.typed
    ADDED
    
    | 
            File without changes
         | 
| @@ -1,6 +1,6 @@ | |
| 1 1 | 
             
            Metadata-Version: 2.1
         | 
| 2 2 | 
             
            Name: json_repair
         | 
| 3 | 
            -
            Version: 0. | 
| 3 | 
            +
            Version: 0.28.1
         | 
| 4 4 | 
             
            Summary: A package to repair broken json strings
         | 
| 5 5 | 
             
            Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
         | 
| 6 6 | 
             
            License: MIT License
         | 
| @@ -27,6 +27,7 @@ License: MIT License | |
| 27 27 |  | 
| 28 28 | 
             
            Project-URL: Homepage, https://github.com/mangiucugna/json_repair/
         | 
| 29 29 | 
             
            Project-URL: Bug Tracker, https://github.com/mangiucugna/json_repair/issues
         | 
| 30 | 
            +
            Project-URL: Live demo, https://mangiucugna.github.io/json_repair/
         | 
| 30 31 | 
             
            Keywords: JSON,REPAIR,LLM,PARSER
         | 
| 31 32 | 
             
            Classifier: Programming Language :: Python :: 3
         | 
| 32 33 | 
             
            Classifier: License :: OSI Approved :: MIT License
         | 
| @@ -0,0 +1,8 @@ | |
| 1 | 
            +
            json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
         | 
| 2 | 
            +
            json_repair/json_repair.py,sha256=_NBAaY6iqIp1cB1W-lnQ3uS6nc5DjZZyf_HeklYmDyY,32502
         | 
| 3 | 
            +
            json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
         | 
| 4 | 
            +
            json_repair-0.28.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
         | 
| 5 | 
            +
            json_repair-0.28.1.dist-info/METADATA,sha256=i9SaIoWFc7YjuQLLN1Rd8GsmVAy0rWJ9-fNwsVDR_KA,8043
         | 
| 6 | 
            +
            json_repair-0.28.1.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
         | 
| 7 | 
            +
            json_repair-0.28.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
         | 
| 8 | 
            +
            json_repair-0.28.1.dist-info/RECORD,,
         | 
| @@ -1,7 +0,0 @@ | |
| 1 | 
            -
            json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
         | 
| 2 | 
            -
            json_repair/json_repair.py,sha256=599pWb3Wn7Lltvy8X3eWN9u7ccnSGdAaHt5De_L219s,30337
         | 
| 3 | 
            -
            json_repair-0.27.2.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
         | 
| 4 | 
            -
            json_repair-0.27.2.dist-info/METADATA,sha256=yTnkoMdKmX0_E48cLHflA8grpL00MQJb91yLfWpgxdA,7976
         | 
| 5 | 
            -
            json_repair-0.27.2.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
         | 
| 6 | 
            -
            json_repair-0.27.2.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
         | 
| 7 | 
            -
            json_repair-0.27.2.dist-info/RECORD,,
         | 
| 
            File without changes
         | 
| 
            File without changes
         |