PyPI - json-repair - Versions diffs - 0.23.1__tar.gz → 0.25.0__tar.gz - Mend

json-repair 0.23.1tar.gz → 0.25.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

{json_repair-0.23.1/src/json_repair.egg-info → json_repair-0.25.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: json_repair
-Version: 0.23.1
+Version: 0.25.0
 Summary: A package to repair broken json strings
 Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
 License: MIT License
@@ -45,6 +45,11 @@ If you find this library useful, you can help me by donating toward my monthly b
 ---
+# Demo
+If you are unsure if this library will fix your specific problem, or simply want your json validated online, you can visit the demo site on GitHub pages: https://mangiucugna.github.io/json_repair/
+---
 # Motivation
 Some LLMs are a bit iffy when it comes to returning well formed JSON data, sometimes they skip a parentheses and sometimes they add some words in it, because that's what an LLM does.
 Luckily, the mistakes LLMs make are simple enough to be fixed without destroying the content.
@@ -160,6 +165,7 @@ You will need owner access to this repository
 # Repair JSON in other programming languages
 - Typescript: https://github.com/josdejong/jsonrepair
 - Go: https://github.com/RealAlexandreAI/json-repair
+- Ruby: https://github.com/sashazykov/json-repair-rb
 ---
 ## Star History

{json_repair-0.23.1 → json_repair-0.25.0}/README.md RENAMED Viewed

@@ -8,6 +8,11 @@ If you find this library useful, you can help me by donating toward my monthly b
 ---
+# Demo
+If you are unsure if this library will fix your specific problem, or simply want your json validated online, you can visit the demo site on GitHub pages: https://mangiucugna.github.io/json_repair/
+---
 # Motivation
 Some LLMs are a bit iffy when it comes to returning well formed JSON data, sometimes they skip a parentheses and sometimes they add some words in it, because that's what an LLM does.
 Luckily, the mistakes LLMs make are simple enough to be fixed without destroying the content.
@@ -123,6 +128,7 @@ You will need owner access to this repository
 # Repair JSON in other programming languages
 - Typescript: https://github.com/josdejong/jsonrepair
 - Go: https://github.com/RealAlexandreAI/json-repair
+- Ruby: https://github.com/sashazykov/json-repair-rb
 ---
 ## Star History

{json_repair-0.23.1 → json_repair-0.25.0}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
 build-backend = "setuptools.build_meta"
 [project]
 name = "json_repair"
-version = "0.23.1"
+version = "0.25.0"
 license = {file = "LICENSE"}
 authors = [
   { name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },

{json_repair-0.23.1 → json_repair-0.25.0}/src/json_repair/json_repair.py RENAMED Viewed

@@ -91,6 +91,10 @@ class JSONParser:
     ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
         json = self.parse_json()
         if self.index < len(self.json_str):
+            self.log(
+                "The parser returned early, checking if there's more json elements",
+                "info",
+            )
             json = [json]
             last_index = self.index
             while self.index < len(self.json_str):
@@ -100,10 +104,13 @@ class JSONParser:
                 if self.index == last_index:
                     self.index += 1
                 last_index = self.index
+            # If nothing extra was found, don't return an array
             if len(json) == 1:
+                self.log(
+                    "There were no more elements, returning the element without the array",
+                    "info",
+                )
                 json = json[0]
-            elif len(json) == 0:
-                json = ""
         if self.logger.log_level == "none":
             return json
         else:
@@ -363,9 +370,34 @@ class JSONParser:
                 if self.get_context() == "object_key" and (
                     char == ":" or char.isspace()
                 ):
+                    self.log(
+                        "While parsing a string missing the left delimiter in object key context, we found a :, stopping here",
+                        "info",
+                    )
                     break
                 elif self.get_context() == "object_value" and char in [",", "}"]:
-                    break
+                    rstring_delimiter_missing = True
+                    # check if this is a case in which the closing comma is NOT missing instead
+                    i = 1
+                    next_c = self.get_char_at(i)
+                    while next_c and next_c != rstring_delimiter:
+                        i += 1
+                        next_c = self.get_char_at(i)
+                    if next_c:
+                        i += 1
+                        next_c = self.get_char_at(i)
+                        # found a delimiter, now we need to check that is followed strictly by a comma or brace
+                        while next_c and next_c.isspace():
+                            i += 1
+                            next_c = self.get_char_at(i)
+                        if next_c and next_c in [",", "}"]:
+                            rstring_delimiter_missing = False
+                    if rstring_delimiter_missing:
+                        self.log(
+                            "While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn't determine that a right delimiter was present. Stopping here",
+                            "info",
+                        )
+                        break
             string_acc += char
             self.index += 1
             char = self.get_char_at()
@@ -386,6 +418,33 @@ class JSONParser:
                         "While parsing a string, we found a doubled quote, ignoring it",
                         "info",
                     )
+                elif missing_quotes and self.get_context() == "object_value":
+                    # In case of missing starting quote I need to check if the delimeter is the end or the beginning of a key
+                    i = 1
+                    next_c = self.get_char_at(i)
+                    while next_c and next_c not in [
+                        rstring_delimiter,
+                        lstring_delimiter,
+                    ]:
+                        i += 1
+                        next_c = self.get_char_at(i)
+                    if next_c:
+                        # We found a quote, now let's make sure there's a ":" following
+                        i += 1
+                        next_c = self.get_char_at(i)
+                        # found a delimiter, now we need to check that is followed strictly by a comma or brace
+                        while next_c and next_c.isspace():
+                            i += 1
+                            next_c = self.get_char_at(i)
+                        if next_c and next_c == ":":
+                            # Reset the cursor
+                            self.index -= 1
+                            char = self.get_char_at()
+                            self.log(
+                                "In a string with missing quotes and object value context, I found a delimeter but it turns out it was the beginning on the next key. Stopping here.",
+                                "info",
+                            )
+                            break
                 else:
                     # Check if eventually there is a rstring delimiter, otherwise we bail
                     i = 1
@@ -496,7 +555,8 @@ class JSONParser:
         number_str = ""
         number_chars = set("0123456789-.eE/,")
         char = self.get_char_at()
-        while char and char in number_chars:
+        is_array = self.get_context() == "array"
+        while char and char in number_chars and (char != "," or not is_array):
             number_str += char
             self.index += 1
             char = self.get_char_at()

{json_repair-0.23.1 → json_repair-0.25.0/src/json_repair.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: json_repair
-Version: 0.23.1
+Version: 0.25.0
 Summary: A package to repair broken json strings
 Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
 License: MIT License
@@ -45,6 +45,11 @@ If you find this library useful, you can help me by donating toward my monthly b
 ---
+# Demo
+If you are unsure if this library will fix your specific problem, or simply want your json validated online, you can visit the demo site on GitHub pages: https://mangiucugna.github.io/json_repair/
+---
 # Motivation
 Some LLMs are a bit iffy when it comes to returning well formed JSON data, sometimes they skip a parentheses and sometimes they add some words in it, because that's what an LLM does.
 Luckily, the mistakes LLMs make are simple enough to be fixed without destroying the content.
@@ -160,6 +165,7 @@ You will need owner access to this repository
 # Repair JSON in other programming languages
 - Typescript: https://github.com/josdejong/jsonrepair
 - Go: https://github.com/RealAlexandreAI/json-repair
+- Ruby: https://github.com/sashazykov/json-repair-rb
 ---
 ## Star History

{json_repair-0.23.1 → json_repair-0.25.0}/tests/test_json_repair.py RENAMED Viewed

@@ -141,6 +141,8 @@ def test_object_edge_cases():
     assert repair_json('''{ "a" : "{ b": {} }" }''') == '{"a": "{ b"}'
     assert repair_json("""{"b": "xxxxx" true}""") == '{"b": "xxxxx"}'
     assert repair_json('{"key": "Lorem "ipsum" s,"}') == '{"key": "Lorem \\"ipsum\\" s,"}'
+    assert repair_json('{"lorem": ipsum, sic, datum.",}') == '{"lorem": "ipsum, sic, datum."}'
+    assert repair_json('{"lorem": sic tamet. "ipsum": sic tamet, quick brown fox. "sic": ipsum}') == '{"lorem": "sic tamet.", "ipsum": "sic tamet", "sic": "ipsum"}'
 def test_number_edge_cases():
     assert repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'
@@ -148,7 +150,7 @@ def test_number_edge_cases():
     assert repair_json('{"key": .25}') == '{"key": 0.25}'
     assert repair_json('{"here": "now", "key": 1/3, "foo": "bar"}') == '{"here": "now", "key": "1/3", "foo": "bar"}'
     assert repair_json('{"key": 12345/67890}') == '{"key": "12345/67890"}'
-    assert repair_json('[105,12') == '["105,12"]'
+    assert repair_json('[105,12') == '[105, 12]'
     assert repair_json('{"key", 105,12,') == '{"key": "105,12"}'
     assert repair_json('{"key": 1/3, "foo": "bar"}') == '{"key": "1/3", "foo": "bar"}'
     assert repair_json('{"key": 10-20}') == '{"key": "10-20"}'
@@ -171,8 +173,8 @@ def test_leading_trailing_characters():
 def test_multiple_jsons():
     assert repair_json("[]{}") == "[[], {}]"
     assert repair_json("{}[]{}") == "[{}, [], {}]"
-    assert repair_json('{"key":"value"}[1,2,3,True]') == '[{"key": "value"}, ["1,2,3", true]]'
-    assert repair_json('lorem ```json {"key":"value"} ``` ipsum ```json [1,2,3,True] ``` 42') == '[{"key": "value"}, ["1,2,3", true]]'
+    assert repair_json('{"key":"value"}[1,2,3,True]') == '[{"key": "value"}, [1, 2, 3, true]]'
+    assert repair_json('lorem ```json {"key":"value"} ``` ipsum ```json [1,2,3,True] ``` 42') == '[{"key": "value"}, [1, 2, 3, true]]'
 def test_repair_json_with_objects():
     # Test with valid JSON strings
@@ -242,7 +244,7 @@ def test_repair_json_from_file():
     # Write content to the temporary file
         with os.fdopen(temp_fd, 'w') as tmp:
             tmp.write("{key:value}")
-        assert(from_file(temp_path, logging=True)) == ({'key': 'value'}, [{'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'text': 'While parsing a string, we found no starting quote. Will add the quote back', 'context': '{key:value}'}, {'text': 'While parsing a string, we missed the closing quote, ignoring', 'context': '{key:value}'}, {'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'text': 'While parsing a string, we found no starting quote. Will add the quote back', 'context': '{key:value}'}, {'text': 'While parsing a string, we missed the closing quote, ignoring', 'context': '{key:value}'}])
+        assert(from_file(temp_path, logging=True)) == ({'key': 'value'}, [{'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'text': 'While parsing a string, we found no starting quote. Will add the quote back', 'context': '{key:value}'}, {'context': '{key:value}', 'text': 'While parsing a string missing the left delimiter in object key context, we found a :, stopping here',}, {'text': 'While parsing a string, we missed the closing quote, ignoring', 'context': '{key:value}'}, {'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'text': 'While parsing a string, we found no starting quote. Will add the quote back', 'context': '{key:value}'}, {'context': '{key:value}', 'text': 'While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn\'t determine that a right delimiter was present. Stopping here'}, {'text': 'While parsing a string, we missed the closing quote, ignoring', 'context': '{key:value}'}])
     finally:
         # Clean up - delete the temporary file
         os.remove(temp_path)

{json_repair-0.23.1 → json_repair-0.25.0}/tests/test_performance.py RENAMED Viewed

@@ -97,7 +97,7 @@ def test_false_false_incorrect(benchmark):
   mean_time = benchmark.stats.get("median")
   # Define your time threshold in seconds
-  max_time = 14 / 10 ** 4  # 1.4 millisecond
+  max_time = 15 / 10 ** 4  # 1.5 millisecond
   # Assert that the average time is below the threshold
   assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"

{json_repair-0.23.1 → json_repair-0.25.0}/LICENSE RENAMED Viewed

File without changes

{json_repair-0.23.1 → json_repair-0.25.0}/setup.cfg RENAMED Viewed

File without changes

{json_repair-0.23.1 → json_repair-0.25.0}/src/json_repair/__init__.py RENAMED Viewed

File without changes

{json_repair-0.23.1 → json_repair-0.25.0}/src/json_repair.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{json_repair-0.23.1 → json_repair-0.25.0}/src/json_repair.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{json_repair-0.23.1 → json_repair-0.25.0}/src/json_repair.egg-info/top_level.txt RENAMED Viewed

File without changes

json-repair 0.23.1__tar.gz → 0.25.0__tar.gz

json-repair 0.23.1tar.gz → 0.25.0tar.gz