PyPI - json-repair - Versions diffs - 0.33.0__tar.gz → 0.35.0__tar.gz - Mend

json-repair 0.33.0tar.gz → 0.35.0tar.gz

Files changed (20) hide show

{json_repair-0.33.0/src/json_repair.egg-info → json_repair-0.35.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: json_repair
-Version: 0.33.0
+Version: 0.35.0
 Summary: A package to repair broken json strings
 Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
 License: MIT License

{json_repair-0.33.0 → json_repair-0.35.0}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
 build-backend = "setuptools.build_meta"
 [project]
 name = "json_repair"
-version = "0.33.0"
+version = "0.35.0"
 license = {file = "LICENSE"}
 authors = [
   { name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },

{json_repair-0.33.0 → json_repair-0.35.0}/src/json_repair/json_parser.py RENAMED Viewed

@@ -7,6 +7,9 @@ JSONReturnType = Union[Dict[str, Any], List[Any], str, float, int, bool, None]
 class JSONParser:
+    # Constants
+    STRING_DELIMITERS = ['"', "'", "“", "”"]
     def __init__(
         self,
         json_str: Union[str, StringFileWrapper],
@@ -89,7 +92,9 @@ class JSONParser:
                 )
                 return ""
             # <string> starts with a quote
-            elif not self.context.empty and (char in ['"', "'", "“"] or char.isalpha()):
+            elif not self.context.empty and (
+                char in self.STRING_DELIMITERS or char.isalpha()
+            ):
                 return self.parse_string()
             # <number> starts with [0-9] or minus
             elif not self.context.empty and (
@@ -130,6 +135,8 @@ class JSONParser:
             # <member> starts with a <string>
             key = ""
             while self.get_char_at():
+                # The rollback index needs to be updated here in case the key is empty
+                rollback_index = self.index
                 key = str(self.parse_string())
                 if key != "" or (key == "" and self.get_char_at() == ":"):
@@ -140,6 +147,12 @@ class JSONParser:
                     "While parsing an object we found a duplicate key, closing the object here and rolling back the index",
                 )
                 self.index = rollback_index - 1
+                # add an opening curly brace to make this work
+                self.json_str = (
+                    self.json_str[: self.index + 1]
+                    + "{"
+                    + self.json_str[self.index + 1 :]
+                )
                 break
             # Skip filler whitespaces
@@ -227,7 +240,7 @@ class JSONParser:
         char = self.get_char_at()
         # A valid string can only start with a valid quote or, in our case, with a literal
-        while char and char not in ['"', "'", "“"] and not char.isalnum():
+        while char and char not in self.STRING_DELIMITERS and not char.isalnum():
             self.index += 1
             char = self.get_char_at()
@@ -262,35 +275,61 @@ class JSONParser:
         if not missing_quotes:
             self.index += 1
+        self.skip_whitespaces_at()
         # There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
-        if self.get_char_at() == lstring_delimiter:
-            # If it's an empty key, this was easy
-            if (
-                self.context.current == ContextValues.OBJECT_KEY
-                and self.get_char_at(1) == ":"
-            ):
-                self.index += 1
-                return ""
-            # Find the next delimiter
-            i = self.skip_to_character(character=rstring_delimiter, idx=1)
-            next_c = self.get_char_at(i)
-            # Now check that the next character is also a delimiter to ensure that we have "".....""
-            # In that case we ignore this rstring delimiter
-            if next_c and (self.get_char_at(i + 1) or "") == rstring_delimiter:
-                self.log(
-                    "While parsing a string, we found a valid starting doubled quote, ignoring it",
-                )
-                doubled_quotes = True
-                self.index += 1
-            else:
-                # Ok this is not a doubled quote, check if this is an empty string or not
-                i = self.skip_whitespaces_at(idx=1, move_main_index=False)
+        if self.get_char_at() in self.STRING_DELIMITERS:
+            # If the next character is the same type of quote, then we manage it as double quotes
+            if self.get_char_at() == lstring_delimiter:
+                # If it's an empty key, this was easy
+                if (
+                    self.context.current == ContextValues.OBJECT_KEY
+                    and self.get_char_at(1) == ":"
+                ):
+                    self.index += 1
+                    return ""
+                if self.get_char_at(1) == lstring_delimiter:
+                    # There's something fishy about this, we found doubled quotes and then again quotes
+                    self.log(
+                        "While parsing a string, we found a doubled quote and then a quote again, ignoring it",
+                    )
+                    return ""
+                # Find the next delimiter
+                i = self.skip_to_character(character=rstring_delimiter, idx=1)
                 next_c = self.get_char_at(i)
-                if next_c not in [",", "]", "}"]:
+                # Now check that the next character is also a delimiter to ensure that we have "".....""
+                # In that case we ignore this rstring delimiter
+                if next_c and (self.get_char_at(i + 1) or "") == rstring_delimiter:
                     self.log(
-                        "While parsing a string, we found a doubled quote but it was a mistake, removing one quote",
+                        "While parsing a string, we found a valid starting doubled quote",
                     )
+                    doubled_quotes = True
                     self.index += 1
+                else:
+                    # Ok this is not a doubled quote, check if this is an empty string or not
+                    i = self.skip_whitespaces_at(idx=1, move_main_index=False)
+                    next_c = self.get_char_at(i)
+                    if next_c in self.STRING_DELIMITERS + ["{", "["]:
+                        # something fishy is going on here
+                        self.log(
+                            "While parsing a string, we found a doubled quote but also another quote afterwards, ignoring it",
+                        )
+                        self.index += 1
+                        return ""
+                    elif next_c not in [",", "]", "}"]:
+                        self.log(
+                            "While parsing a string, we found a doubled quote but it was a mistake, removing one quote",
+                        )
+                        self.index += 1
+            else:
+                # Otherwise we need to do another check before continuing
+                i = self.skip_to_character(character=rstring_delimiter, idx=1)
+                next_c = self.get_char_at(i)
+                if not next_c:
+                    # mmmm that delimiter never appears again, this is a mistake
+                    self.log(
+                        "While parsing a string, we found a quote but it was a mistake, ignoring it",
+                    )
+                    return ""
         # Initialize our return value
         string_acc = ""
@@ -404,6 +443,38 @@ class JSONParser:
                     string_acc += escape_seqs.get(char, char) or char
                     self.index += 1
                     char = self.get_char_at()
+            # If we are in object key context and we find a colon, it could be a missing right quote
+            if (
+                char == ":"
+                and not missing_quotes
+                and self.context.current == ContextValues.OBJECT_KEY
+            ):
+                # Ok now we need to check if this is followed by a value like "..."
+                i = self.skip_to_character(character=lstring_delimiter, idx=1)
+                next_c = self.get_char_at(i)
+                if next_c:
+                    i += 1
+                    # found the first delimiter
+                    i = self.skip_to_character(character=rstring_delimiter, idx=i)
+                    next_c = self.get_char_at(i)
+                    if next_c:
+                        # found a second delimiter
+                        i += 1
+                        # Skip spaces
+                        i = self.skip_whitespaces_at(idx=i, move_main_index=False)
+                        next_c = self.get_char_at(i)
+                        if next_c and next_c in [",", "}"]:
+                            # Ok then this is a missing right quote
+                            self.log(
+                                "While parsing a string missing the right delimiter in object key context, we found a :, stopping here",
+                            )
+                            break
+                else:
+                    # The string ended without finding a lstring_delimiter, I will assume this is a missing right quote
+                    self.log(
+                        "While parsing a string missing the right delimiter in object key context, we found a :, stopping here",
+                    )
+                    break
             # ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
             if char == rstring_delimiter:
                 # Special case here, in case of double quotes one after another

{json_repair-0.33.0 → json_repair-0.35.0}/src/json_repair/string_file_wrapper.py RENAMED Viewed

@@ -96,3 +96,24 @@ class StringFileWrapper:
             self.length = self.fd.tell()
             self.fd.seek(current_position)
         return self.length
+    def __setitem__(self, index: Union[int, slice], value: str) -> None:
+        """
+        Set a character or a slice of characters in the file.
+        Args:
+            index (slice): The slice of characters to set.
+            value (str): The value to set at the specified index or slice.
+        """
+        if isinstance(index, slice):
+            start = index.start or 0
+        else:
+            start = index or 0
+        if start < 0:
+            start += len(self)
+        current_position = self.fd.tell()
+        self.fd.seek(start)
+        self.fd.write(value)
+        self.fd.seek(current_position)

{json_repair-0.33.0 → json_repair-0.35.0/src/json_repair.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: json_repair
-Version: 0.33.0
+Version: 0.35.0
 Summary: A package to repair broken json strings
 Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
 License: MIT License

{json_repair-0.33.0 → json_repair-0.35.0}/tests/test_json_repair.py RENAMED Viewed

@@ -146,14 +146,16 @@ def test_object_edge_cases():
     assert repair_json('{"lorem": ipsum, sic, datum.",}') == '{"lorem": "ipsum, sic, datum."}'
     assert repair_json('{"lorem": sic tamet. "ipsum": sic tamet, quick brown fox. "sic": ipsum}') == '{"lorem": "sic tamet.", "ipsum": "sic tamet", "sic": "ipsum"}'
     assert repair_json('{"lorem_ipsum": "sic tamet, quick brown fox. }') == '{"lorem_ipsum": "sic tamet, quick brown fox."}'
-    assert repair_json('{"key":value, " key2":"value2" }') == '{"key": "value", " key2": "value2"}'
+    assert repair_json('{"key":value, " key2":"value2" }') == '{"key": "value", "key2": "value2"}'
     assert repair_json('{"key":value "key2":"value2" }') == '{"key": "value", "key2": "value2"}'
     assert repair_json("{'text': 'words{words in brackets}more words'}") == '{"text": "words{words in brackets}more words"}'
     assert repair_json('{text:words{words in brackets}}') == '{"text": "words{words in brackets}"}'
     assert repair_json('{text:words{words in brackets}m}') == '{"text": "words{words in brackets}m"}'
     assert repair_json('{"key": "value, value2"```') == '{"key": "value, value2"}'
     assert repair_json('{key:value,key2:value2}') == '{"key": "value", "key2": "value2"}'
-    assert repair_json('[{"lorem": {"ipsum": "sic"}, "lorem": {"ipsum": "sic"}]') == '[{"lorem": {"ipsum": "sic"}}, "lorem", {"ipsum": "sic"}]'
+    assert repair_json('{"key:"value"}') == '{"key": "value"}'
+    assert repair_json('{"key:value}') == '{"key": "value"}'
+    assert repair_json('[{"lorem": {"ipsum": "sic"}, """" "lorem": {"ipsum": "sic"}]') == '[{"lorem": {"ipsum": "sic"}}, {"lorem": {"ipsum": "sic"}}]'
 def test_number_edge_cases():
     assert repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'

{json_repair-0.33.0 → json_repair-0.35.0}/tests/test_performance.py RENAMED Viewed

@@ -97,7 +97,7 @@ def test_false_false_incorrect(benchmark):
   mean_time = benchmark.stats.get("median")
   # Define your time threshold in seconds
-  max_time = 1.9 / 10 ** 3  # 1.9 millisecond
+  max_time = 2 / 10 ** 3  # 2 millisecond
   # Assert that the average time is below the threshold
   assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"