PyPI - json-repair - Versions diffs - 0.31.0__tar.gz → 0.33.0__tar.gz - Mend

json-repair 0.31.0tar.gz → 0.33.0tar.gz

Files changed (20) hide show

{json_repair-0.31.0/src/json_repair.egg-info → json_repair-0.33.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: json_repair
-Version: 0.31.0
+Version: 0.33.0
 Summary: A package to repair broken json strings
 Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
 License: MIT License

{json_repair-0.31.0 → json_repair-0.33.0}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
 build-backend = "setuptools.build_meta"
 [project]
 name = "json_repair"
-version = "0.31.0"
+version = "0.33.0"
 license = {file = "LICENSE"}
 authors = [
   { name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },

{json_repair-0.31.0 → json_repair-0.33.0}/src/json_repair/json_parser.py RENAMED Viewed

@@ -124,6 +124,9 @@ class JSONParser:
             self.skip_whitespaces_at()
+            # Save this index in case we need find a duplicate key
+            rollback_index = self.index
             # <member> starts with a <string>
             key = ""
             while self.get_char_at():
@@ -132,7 +135,14 @@ class JSONParser:
                 if key != "" or (key == "" and self.get_char_at() == ":"):
                     # If the string is empty but there is a object divider, we are done here
                     break
+            if ContextValues.ARRAY in self.context.context and key in obj:
+                self.log(
+                    "While parsing an object we found a duplicate key, closing the object here and rolling back the index",
+                )
+                self.index = rollback_index - 1
+                break
+            # Skip filler whitespaces
             self.skip_whitespaces_at()
             # We reached the end here
@@ -498,9 +508,8 @@ class JSONParser:
                             # But this might not be it! This could be just a missing comma
                             # We found a delimiter and we need to check if this is a key
                             # so find a rstring_delimiter and a colon after
-                            i += 1
                             i = self.skip_to_character(
-                                character=rstring_delimiter, idx=i
+                                character=rstring_delimiter, idx=i + 1
                             )
                             i += 1
                             next_c = self.get_char_at(i)
@@ -521,6 +530,27 @@ class JSONParser:
                                 string_acc += str(char)
                                 self.index += 1
                                 char = self.get_char_at()
+                        elif self.context.current == ContextValues.ARRAY:
+                            # In array context this could be something like "lorem "ipsum" sic"
+                            # So let's check if we find a rstring_delimiter forward otherwise end early
+                            i = self.skip_to_character(rstring_delimiter, idx=i + 1)
+                            next_c = self.get_char_at(i)
+                            if next_c and next_c == rstring_delimiter:
+                                # Ok now if I find a comma or a closing ], that can be have also an optional rstring_delimiter before them
+                                # We can consider this a misplaced quote
+                                i += 1
+                                i = self.skip_whitespaces_at(
+                                    idx=i, move_main_index=False
+                                )
+                                next_c = self.get_char_at(i)
+                                if next_c and next_c in [",", "]"]:
+                                    self.log(
+                                        "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
+                                    )
+                                    unmatched_delimiter = not unmatched_delimiter
+                                    string_acc += str(char)
+                                    self.index += 1
+                                    char = self.get_char_at()
         if (
             char

{json_repair-0.31.0 → json_repair-0.33.0/src/json_repair.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: json_repair
-Version: 0.31.0
+Version: 0.33.0
 Summary: A package to repair broken json strings
 Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
 License: MIT License

{json_repair-0.31.0 → json_repair-0.33.0}/tests/test_json_repair.py RENAMED Viewed

@@ -121,6 +121,7 @@ def test_array_edge_cases():
     assert repair_json('{"employees":["John", "Anna", "Peter') == '{"employees": ["John", "Anna", "Peter"]}'
     assert repair_json('{"key1": {"key2": [1, 2, 3') == '{"key1": {"key2": [1, 2, 3]}}'
     assert repair_json('{"key": ["value]}') == '{"key": ["value"]}'
+    assert repair_json('["lorem "ipsum" sic"]') == '["lorem \\"ipsum\\" sic"]'
 def test_escaping():
     assert repair_json("'\"'") == '""'
@@ -152,6 +153,7 @@ def test_object_edge_cases():
     assert repair_json('{text:words{words in brackets}m}') == '{"text": "words{words in brackets}m"}'
     assert repair_json('{"key": "value, value2"```') == '{"key": "value, value2"}'
     assert repair_json('{key:value,key2:value2}') == '{"key": "value", "key2": "value2"}'
+    assert repair_json('[{"lorem": {"ipsum": "sic"}, "lorem": {"ipsum": "sic"}]') == '[{"lorem": {"ipsum": "sic"}}, "lorem", {"ipsum": "sic"}]'
 def test_number_edge_cases():
     assert repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'

{json_repair-0.31.0 → json_repair-0.33.0}/tests/test_performance.py RENAMED Viewed

@@ -19,7 +19,7 @@ def test_true_true_correct(benchmark):
   mean_time = benchmark.stats.get("median")
   # Define your time threshold in seconds
-  max_time = 1.8 / 10 ** 3  # 1.8 millisecond
+  max_time = 1.9 / 10 ** 3  # 1.9 millisecond
   # Assert that the average time is below the threshold
   assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -31,7 +31,7 @@ def test_true_true_incorrect(benchmark):
   mean_time = benchmark.stats.get("median")
   # Define your time threshold in seconds
-  max_time = 1.8 / 10 ** 3  # 1.8 millisecond
+  max_time = 9 / 10 ** 3  # 1.9 millisecond
   # Assert that the average time is below the threshold
   assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -53,7 +53,7 @@ def test_true_false_incorrect(benchmark):
   mean_time = benchmark.stats.get("median")
   # Define your time threshold in seconds
-  max_time = 1.8 / 10 ** 3  # 1.8 millisecond
+  max_time = 1.9 / 10 ** 3  # 1.9 millisecond
   # Assert that the average time is below the threshold
   assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -64,7 +64,7 @@ def test_false_true_correct(benchmark):
   mean_time = benchmark.stats.get("median")
   # Define your time threshold in seconds
-  max_time = 1.8 / 10 ** 3  # 1.8 millisecond
+  max_time = 1.9 / 10 ** 3  # 1.9 millisecond
   # Assert that the average time is below the threshold
   assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -75,7 +75,7 @@ def test_false_true_incorrect(benchmark):
   mean_time = benchmark.stats.get("median")
   # Define your time threshold in seconds
-  max_time = 1.8 / 10 ** 3  # 1.8 millisecond
+  max_time = 1.9 / 10 ** 3  # 1.9 millisecond
   # Assert that the average time is below the threshold
   assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"