PyPI - json-repair - Versions diffs - 0.46.1__py3-none-any.whl → 0.47.0__py3-none-any.whl - Mend

json-repair 0.46.1py3-none-any.whl → 0.47.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

json_repair/__init__.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from .json_repair import from_file as from_file
-from .json_repair import load as load
-from .json_repair import loads as loads
-from .json_repair import repair_json as repair_json
+from .json_repair import from_file, load, loads, repair_json
+__all__ = ["from_file", "load", "loads", "repair_json"]

json_repair/json_parser.py CHANGED Viewed

@@ -105,14 +105,10 @@ class JSONParser:
                 )
                 return ""
             # <string> starts with a quote
-            elif not self.context.empty and (
-                char in self.STRING_DELIMITERS or char.isalpha()
-            ):
+            elif not self.context.empty and (char in self.STRING_DELIMITERS or char.isalpha()):
                 return self.parse_string()
             # <number> starts with [0-9] or minus
-            elif not self.context.empty and (
-                char.isdigit() or char == "-" or char == "."
-            ):
+            elif not self.context.empty and (char.isdigit() or char == "-" or char == "."):
                 return self.parse_number()
             elif char in ["#", "/"]:
                 return self.parse_comment()
@@ -164,8 +160,7 @@ class JSONParser:
                             if isinstance(prev_value, list):
                                 prev_value.extend(
                                     new_array[0]
-                                    if len(new_array) == 1
-                                    and isinstance(new_array[0], list)
+                                    if len(new_array) == 1 and isinstance(new_array[0], list)
                                     else new_array
                                 )
                             self.skip_whitespaces_at()
@@ -185,11 +180,7 @@ class JSONParser:
                 )
                 self.index = rollback_index - 1
                 # add an opening curly brace to make this work
-                self.json_str = (
-                    self.json_str[: self.index + 1]
-                    + "{"
-                    + self.json_str[self.index + 1 :]
-                )
+                self.json_str = self.json_str[: self.index + 1] + "{" + self.json_str[self.index + 1 :]
                 break
             # Skip filler whitespaces
@@ -242,10 +233,7 @@ class JSONParser:
                 i = 1
                 i = self.skip_to_character(char, i)
                 i = self.skip_whitespaces_at(idx=i + 1, move_main_index=False)
-                if self.get_char_at(i) == ":":
-                    value = self.parse_object()
-                else:
-                    value = self.parse_string()
+                value = self.parse_object() if self.get_char_at(i) == ":" else self.parse_string()
             else:
                 value = self.parse_json()
@@ -307,10 +295,7 @@ class JSONParser:
         elif char.isalnum():
             # This could be a <boolean> and not a string. Because (T)rue or (F)alse or (N)ull are valid
             # But remember, object keys are only of type string
-            if (
-                char.lower() in ["t", "f", "n"]
-                and self.context.current != ContextValues.OBJECT_KEY
-            ):
+            if char.lower() in ["t", "f", "n"] and self.context.current != ContextValues.OBJECT_KEY:
                 value = self.parse_boolean_or_null()
                 if value != "":
                     return value
@@ -323,15 +308,9 @@ class JSONParser:
             self.index += 1
         # There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
-        if (
-            self.get_char_at() in self.STRING_DELIMITERS
-            and self.get_char_at() == lstring_delimiter
-        ):
+        if self.get_char_at() in self.STRING_DELIMITERS and self.get_char_at() == lstring_delimiter:
             # If it's an empty key, this was easy
-            if (
-                self.context.current == ContextValues.OBJECT_KEY
-                and self.get_char_at(1) == ":"
-            ):
+            if self.context.current == ContextValues.OBJECT_KEY and self.get_char_at(1) == ":":
                 self.index += 1
                 return ""
             if self.get_char_at(1) == lstring_delimiter:
@@ -380,23 +359,20 @@ class JSONParser:
         char = self.get_char_at()
         unmatched_delimiter = False
         while char and char != rstring_delimiter:
-            if (
-                missing_quotes
-                and self.context.current == ContextValues.OBJECT_KEY
-                and (char == ":" or char.isspace())
-            ):
+            if missing_quotes and self.context.current == ContextValues.OBJECT_KEY and (char == ":" or char.isspace()):
                 self.log(
                     "While parsing a string missing the left delimiter in object key context, we found a :, stopping here",
                 )
                 break
             if (
-                (missing_quotes or not self.stream_stable)
+                not self.stream_stable
                 and self.context.current == ContextValues.OBJECT_VALUE
                 and char
                 in [
                     ",",
                     "}",
                 ]
+                and string_acc[-1] != rstring_delimiter
             ):
                 rstring_delimiter_missing = True
                 # check if this is a case in which the closing comma is NOT missing instead
@@ -421,9 +397,7 @@ class JSONParser:
                         else:
                             # But again, this could just be something a bit stupid like "lorem, "ipsum" sic"
                             # Check if we find a : afterwards (skipping space)
-                            i = self.skip_whitespaces_at(
-                                idx=i + 1, move_main_index=False
-                            )
+                            i = self.skip_whitespaces_at(idx=i + 1, move_main_index=False)
                             next_c = self.get_char_at(i)
                             if next_c and next_c != ":":
                                 rstring_delimiter_missing = False
@@ -461,9 +435,10 @@ class JSONParser:
                     )
                     break
             if (
-                (missing_quotes or not self.stream_stable)
+                not self.stream_stable
                 and char == "]"
                 and ContextValues.ARRAY in self.context.context
+                and string_acc[-1] != rstring_delimiter
             ):
                 # We found the end of an array and we are in array context
                 # So let's check if we find a rstring_delimiter forward otherwise end early
@@ -483,15 +458,30 @@ class JSONParser:
                 if char in [rstring_delimiter, "t", "n", "r", "b", "\\"]:
                     string_acc = string_acc[:-1]
                     escape_seqs = {"t": "\t", "n": "\n", "r": "\r", "b": "\b"}
-                    string_acc += escape_seqs.get(char, char) or char
+                    string_acc += escape_seqs.get(char, char)
                     self.index += 1
                     char = self.get_char_at()
+                    while char and string_acc[-1] == "\\" and char in [rstring_delimiter, "\\"]:
+                        # this is a bit of a special case, if I don't do this it will close the loop or create a train of \\
+                        # I don't love it though
+                        string_acc = string_acc[:-1]
+                        string_acc += char
+                        self.index += 1
+                        char = self.get_char_at()
+                    continue
+                elif char in ["u", "x"]:
+                    # If we find a unicode escape sequence, normalize it
+                    num_chars = 4 if char == "u" else 2
+                    next_chars = self.json_str[self.index + 1 : self.index + 1 + num_chars]
+                    if len(next_chars) == num_chars and all(c in "0123456789abcdefABCDEF" for c in next_chars):
+                        self.log("Found a unicode escape sequence, normalizing it")
+                        string_acc = string_acc[:-1]
+                        string_acc += chr(int(next_chars, 16))
+                        self.index += 1 + num_chars
+                        char = self.get_char_at()
+                        continue
             # If we are in object key context and we find a colon, it could be a missing right quote
-            if (
-                char == ":"
-                and not missing_quotes
-                and self.context.current == ContextValues.OBJECT_KEY
-            ):
+            if char == ":" and not missing_quotes and self.context.current == ContextValues.OBJECT_KEY:
                 # Ok now we need to check if this is followed by a value like "..."
                 i = self.skip_to_character(character=lstring_delimiter, idx=1)
                 next_c = self.get_char_at(i)
@@ -519,17 +509,12 @@ class JSONParser:
                     )
                     break
             # ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
-            if char == rstring_delimiter:
+            if char == rstring_delimiter and string_acc[-1] != "\\":
                 # Special case here, in case of double quotes one after another
                 if doubled_quotes and self.get_char_at(1) == rstring_delimiter:
-                    self.log(
-                        "While parsing a string, we found a doubled quote, ignoring it"
-                    )
+                    self.log("While parsing a string, we found a doubled quote, ignoring it")
                     self.index += 1
-                elif (
-                    missing_quotes
-                    and self.context.current == ContextValues.OBJECT_VALUE
-                ):
+                elif missing_quotes and self.context.current == ContextValues.OBJECT_VALUE:
                     # In case of missing starting quote I need to check if the delimeter is the end or the beginning of a key
                     i = 1
                     next_c = self.get_char_at(i)
@@ -573,18 +558,9 @@ class JSONParser:
                             check_comma_in_object_value = False
                         # If we are in an object context, let's check for the right delimiters
                         if (
-                            (
-                                ContextValues.OBJECT_KEY in self.context.context
-                                and next_c in [":", "}"]
-                            )
-                            or (
-                                ContextValues.OBJECT_VALUE in self.context.context
-                                and next_c == "}"
-                            )
-                            or (
-                                ContextValues.ARRAY in self.context.context
-                                and next_c in ["]", ","]
-                            )
+                            (ContextValues.OBJECT_KEY in self.context.context and next_c in [":", "}"])
+                            or (ContextValues.OBJECT_VALUE in self.context.context and next_c == "}")
+                            or (ContextValues.ARRAY in self.context.context and next_c in ["]", ","])
                             or (
                                 check_comma_in_object_value
                                 and self.context.current == ContextValues.OBJECT_VALUE
@@ -595,10 +571,7 @@ class JSONParser:
                         i += 1
                         next_c = self.get_char_at(i)
                     # If we stopped for a comma in object_value context, let's check if find a "} at the end of the string
-                    if (
-                        next_c == ","
-                        and self.context.current == ContextValues.OBJECT_VALUE
-                    ):
+                    if next_c == "," and self.context.current == ContextValues.OBJECT_VALUE:
                         i += 1
                         i = self.skip_to_character(character=rstring_delimiter, idx=i)
                         next_c = self.get_char_at(i)
@@ -606,29 +579,20 @@ class JSONParser:
                         i += 1
                         i = self.skip_whitespaces_at(idx=i, move_main_index=False)
                         next_c = self.get_char_at(i)
-                    elif (
-                        next_c == rstring_delimiter and self.get_char_at(i - 1) != "\\"
-                    ):
+                    elif next_c == rstring_delimiter and self.get_char_at(i - 1) != "\\":
                         # Check if self.index:self.index+i is only whitespaces, break if that's the case
-                        if all(
-                            str(self.get_char_at(j)).isspace()
-                            for j in range(1, i)
-                            if self.get_char_at(j)
-                        ):
+                        if all(str(self.get_char_at(j)).isspace() for j in range(1, i) if self.get_char_at(j)):
                             break
                         if self.context.current == ContextValues.OBJECT_VALUE:
                             # But this might not be it! This could be just a missing comma
                             # We found a delimiter and we need to check if this is a key
                             # so find a rstring_delimiter and a colon after
-                            i = self.skip_to_character(
-                                character=rstring_delimiter, idx=i + 1
-                            )
+                            i = self.skip_to_character(character=rstring_delimiter, idx=i + 1)
                             i += 1
                             next_c = self.get_char_at(i)
                             while next_c and next_c != ":":
                                 if next_c in [",", "]", "}"] or (
-                                    next_c == rstring_delimiter
-                                    and self.get_char_at(i - 1) != "\\"
+                                    next_c == rstring_delimiter and self.get_char_at(i - 1) != "\\"
                                 ):
                                     break
                                 i += 1
@@ -661,12 +625,7 @@ class JSONParser:
                             string_acc += str(char)
                             self.index += 1
                             char = self.get_char_at()
-        if (
-            char
-            and missing_quotes
-            and self.context.current == ContextValues.OBJECT_KEY
-            and char.isspace()
-        ):
+        if char and missing_quotes and self.context.current == ContextValues.OBJECT_KEY and char.isspace():
             self.log(
                 "While parsing a string, handling an extreme corner case in which the LLM added a comment instead of valid string, invalidate the string and return an empty value",
             )
@@ -686,9 +645,7 @@ class JSONParser:
         else:
             self.index += 1
-        if not self.stream_stable and (
-            missing_quotes or (string_acc and string_acc[-1] == "\n")
-        ):
+        if not self.stream_stable and (missing_quotes or (string_acc and string_acc[-1] == "\n")):
             # Clean the whitespaces for some corner cases
             string_acc = string_acc.rstrip()
@@ -796,9 +753,7 @@ class JSONParser:
                 while True:
                     char = self.get_char_at()
                     if not char:
-                        self.log(
-                            "Reached end-of-string while parsing block comment; unclosed block comment."
-                        )
+                        self.log("Reached end-of-string while parsing block comment; unclosed block comment.")
                         break
                     comment += char
                     self.index += 1

json_repair/json_repair.py CHANGED Viewed

@@ -236,10 +236,7 @@ def cli(inline_args: list[str] | None = None) -> int:
         help="Number of spaces for indentation (Default 2)",
     )
-    if inline_args is None:  # pragma: no cover
-        args = parser.parse_args()
-    else:
-        args = parser.parse_args(inline_args)
+    args = parser.parse_args() if inline_args is None else parser.parse_args(inline_args)
     # Inline mode requires a filename, so error out if none was provided.
     if args.inline and not args.filename:  # pragma: no cover

json_repair/object_comparer.py CHANGED Viewed

@@ -30,10 +30,7 @@ class ObjectComparer:  # pragma: no cover
         elif isinstance(obj1, list):
             if len(obj1) != len(obj2):
                 return False
-            for i in range(len(obj1)):
-                if not ObjectComparer.is_same_object(obj1[i], obj2[i]):
-                    return False
-            return True
+            return all(ObjectComparer.is_same_object(obj1[i], obj2[i]) for i in range(len(obj1)))
         # For atoms: types already match, so just return True
         return True

json_repair/string_file_wrapper.py CHANGED Viewed

@@ -4,7 +4,7 @@ from typing import TextIO
 class StringFileWrapper:
     # This is a trick to simplify the code, transform the filedescriptor handling into a string handling
-    def __init__(self, fd: TextIO, CHUNK_LENGTH: int) -> None:
+    def __init__(self, fd: TextIO, chunk_length: int) -> None:
         """
         Initialize the StringFileWrapper with a file descriptor and chunk length.
@@ -23,10 +23,10 @@ class StringFileWrapper:
         # Buffers are 1MB strings that are read from the file
         # and kept in memory to keep reads low
         self.buffers: dict[int, str] = {}
-        # CHUNK_LENGTH is in bytes
-        if not CHUNK_LENGTH or CHUNK_LENGTH < 2:
-            CHUNK_LENGTH = 1_000_000
-        self.buffer_length = CHUNK_LENGTH
+        # chunk_length is in bytes
+        if not chunk_length or chunk_length < 2:
+            chunk_length = 1_000_000
+        self.buffer_length = chunk_length
     def get_buffer(self, index: int) -> str:
         """
@@ -65,19 +65,11 @@ class StringFileWrapper:
             buffer_index = index.start // self.buffer_length
             buffer_end = index.stop // self.buffer_length
             if buffer_index == buffer_end:
-                return self.get_buffer(buffer_index)[
-                    index.start % self.buffer_length : index.stop % self.buffer_length
-                ]
+                return self.get_buffer(buffer_index)[index.start % self.buffer_length : index.stop % self.buffer_length]
             else:
-                start_slice = self.get_buffer(buffer_index)[
-                    index.start % self.buffer_length :
-                ]
-                end_slice = self.get_buffer(buffer_end)[
-                    : index.stop % self.buffer_length
-                ]
-                middle_slices = [
-                    self.get_buffer(i) for i in range(buffer_index + 1, buffer_end)
-                ]
+                start_slice = self.get_buffer(buffer_index)[index.start % self.buffer_length :]
+                end_slice = self.get_buffer(buffer_end)[: index.stop % self.buffer_length]
+                middle_slices = [self.get_buffer(i) for i in range(buffer_index + 1, buffer_end)]
                 return start_slice + "".join(middle_slices) + end_slice
         else:
             buffer_index = index // self.buffer_length

{json_repair-0.46.1.dist-info → json_repair-0.47.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: json_repair
-Version: 0.46.1
+Version: 0.47.0
 Summary: A package to repair broken json strings
 Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
 License: MIT License

json_repair-0.47.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,14 @@
+json_repair/__init__.py,sha256=6FDD6dEVM5Pb5o4Zodgw4ex30Hzy-YvNRy0vts9SQ4I,118
+json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
+json_repair/json_context.py,sha256=WsMOjqpGSr6aaDONcrk8UFtTurzWon2Qq9AoBBYseoI,934
+json_repair/json_parser.py,sha256=YBi07AfBGoZ54locsc6j1Y7WfdretFzmt0wXDEWwRo8,40321
+json_repair/json_repair.py,sha256=pyH5fCkS1lyNPVjkqXerQ91lBz3eTHDPgV1QtnvJm-Y,11243
+json_repair/object_comparer.py,sha256=LlIF0MisRglzC-CiG5AxAEDCBWBHeJd-6uXYx0uRmCk,1175
+json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+json_repair/string_file_wrapper.py,sha256=tGkWBEUPE-CZPf4uSM5NE9oSDTpskX0myJiXsl-gbds,4333
+json_repair-0.47.0.dist-info/licenses/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
+json_repair-0.47.0.dist-info/METADATA,sha256=HDyogQyOe0FUVMSnZ-_wm9HlOzXWkRyp4zjQPgZCfMU,12208
+json_repair-0.47.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+json_repair-0.47.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
+json_repair-0.47.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
+json_repair-0.47.0.dist-info/RECORD,,

json_repair-0.46.1.dist-info/RECORD DELETED Viewed

@@ -1,14 +0,0 @@
-json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
-json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
-json_repair/json_context.py,sha256=WsMOjqpGSr6aaDONcrk8UFtTurzWon2Qq9AoBBYseoI,934
-json_repair/json_parser.py,sha256=7IPu-tin9jLX_y1F9tn3UVpqILARhZYFaTTvq9xrLnU,40451
-json_repair/json_repair.py,sha256=9wxf0vVNfr_RNQI1rbVPvxQ9feEwwvgnvkiYXwGEBX8,11292
-json_repair/object_comparer.py,sha256=5-LK-s_2MAHddTxqXSzSkaIFvPXKGLh6swC1gyN74Lk,1245
-json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-json_repair/string_file_wrapper.py,sha256=uwW4B1s9Cf-iF3ANsCz-RPu2ddCqDETrt8bdojh8ufA,4485
-json_repair-0.46.1.dist-info/licenses/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
-json_repair-0.46.1.dist-info/METADATA,sha256=y-p_aOKtX4eu7p-JNj6IO3s8svB06IityZRnRKEN_xE,12208
-json_repair-0.46.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-json_repair-0.46.1.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
-json_repair-0.46.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
-json_repair-0.46.1.dist-info/RECORD,,

{json_repair-0.46.1.dist-info → json_repair-0.47.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{json_repair-0.46.1.dist-info → json_repair-0.47.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{json_repair-0.46.1.dist-info → json_repair-0.47.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{json_repair-0.46.1.dist-info → json_repair-0.47.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

json-repair 0.46.1__py3-none-any.whl → 0.47.0__py3-none-any.whl

json-repair 0.46.1py3-none-any.whl → 0.47.0py3-none-any.whl