PyPI - json-repair - Versions diffs - 0.37.0__tar.gz → 0.39.0__tar.gz - Mend

json-repair 0.37.0tar.gz → 0.39.0tar.gz

Files changed (20) hide show

{json_repair-0.37.0/src/json_repair.egg-info → json_repair-0.39.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: json_repair
-Version: 0.37.0
+Version: 0.39.0
 Summary: A package to repair broken json strings
 Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
 License: MIT License

{json_repair-0.37.0 → json_repair-0.39.0}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
 build-backend = "setuptools.build_meta"
 [project]
 name = "json_repair"
-version = "0.37.0"
+version = "0.39.0"
 license = {file = "LICENSE"}
 authors = [
   { name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },

{json_repair-0.37.0 → json_repair-0.39.0}/src/json_repair/json_parser.py RENAMED Viewed

@@ -101,6 +101,8 @@ class JSONParser:
                 char.isdigit() or char == "-" or char == "."
             ):
                 return self.parse_number()
+            elif char in ["#", "/"]:
+                return self.parse_comment()
             # If everything else fails, we just ignore and move on
             else:
                 self.index += 1
@@ -138,8 +140,9 @@ class JSONParser:
                 # The rollback index needs to be updated here in case the key is empty
                 rollback_index = self.index
                 key = str(self.parse_string())
-                if key != "" or (key == "" and self.get_char_at() == ":"):
+                if key == "":
+                    self.skip_whitespaces_at()
+                if key != "" or (key == "" and self.get_char_at() in [":", "}"]):
                     # If the string is empty but there is a object divider, we are done here
                     break
             if ContextValues.ARRAY in self.context.context and key in obj:
@@ -199,11 +202,10 @@ class JSONParser:
             self.skip_whitespaces_at()
             value = self.parse_json()
-            # It is possible that parse_json() returns nothing valid, so we stop
+            # It is possible that parse_json() returns nothing valid, so we increase by 1
             if value == "":
-                break
-            if value == "..." and self.get_char_at(-1) == ".":
+                self.index += 1
+            elif value == "..." and self.get_char_at(-1) == ".":
                 self.log(
                     "While parsing an array, found a stray '...'; ignoring it",
                 )
@@ -243,6 +245,8 @@ class JSONParser:
         lstring_delimiter = rstring_delimiter = '"'
         char = self.get_char_at()
+        if char in ["#", "/"]:
+            return self.parse_comment()
         # A valid string can only start with a valid quote or, in our case, with a literal
         while char and char not in self.STRING_DELIMITERS and not char.isalnum():
             self.index += 1
@@ -753,6 +757,76 @@ class JSONParser:
             return self.skip_to_character(character=character, idx=idx + 1)
         return idx
+    def parse_comment(self) -> str:
+        """
+        Parse code-like comments:
+        - "# comment": A line comment that continues until a newline.
+        - "// comment": A line comment that continues until a newline.
+        - "/* comment */": A block comment that continues until the closing delimiter "*/".
+        The comment is skipped over and an empty string is returned so that comments do not interfere
+        with the actual JSON elements.
+        """
+        char = self.get_char_at()
+        termination_characters = ["\n", "\r"]
+        if ContextValues.ARRAY in self.context.context:
+            termination_characters.append("]")
+        if ContextValues.OBJECT_VALUE in self.context.context:
+            termination_characters.append("}")
+        if ContextValues.OBJECT_KEY in self.context.context:
+            termination_characters.append(":")
+        # Line comment starting with #
+        if char == "#":
+            comment = ""
+            while char and char not in termination_characters:
+                comment += char
+                self.index += 1
+                char = self.get_char_at()
+            self.log(f"Found line comment: {comment}")
+            return ""
+        # Comments starting with '/'
+        elif char == "/":
+            next_char = self.get_char_at(1)
+            # Handle line comment starting with //
+            if next_char == "/":
+                comment = "//"
+                self.index += 2  # Skip both slashes.
+                char = self.get_char_at()
+                while char and char not in termination_characters:
+                    comment += char
+                    self.index += 1
+                    char = self.get_char_at()
+                self.log(f"Found line comment: {comment}")
+                return ""
+            # Handle block comment starting with /*
+            elif next_char == "*":
+                comment = "/*"
+                self.index += 2  # Skip '/*'
+                while True:
+                    char = self.get_char_at()
+                    if not char:
+                        self.log(
+                            "Reached end-of-string while parsing block comment; unclosed block comment."
+                        )
+                        break
+                    comment += char
+                    self.index += 1
+                    if comment.endswith("*/"):
+                        break
+                self.log(f"Found block comment: {comment}")
+                return ""
+            else:
+                # Not a recognized comment pattern, skip the slash.
+                self.index += 1
+                return ""
+        else:
+            # Should not be reached: if for some reason the current character does not start a comment, skip it.
+            self.index += 1
+            return ""
     def _log(self, text: str) -> None:
         window: int = 10
         start: int = max(self.index - window, 0)

{json_repair-0.37.0 → json_repair-0.39.0}/src/json_repair/json_repair.py RENAMED Viewed

@@ -160,7 +160,7 @@ def cli(inline_args: Optional[List[str]] = None) -> int:
     Args:
         inline_args (Optional[List[str]]): List of command-line arguments for testing purposes. Defaults to None.
-            - filename (str): The JSON file to repair
+            - filename (str): The JSON file to repair. If omitted, the JSON is read from stdin.
             - -i, --inline (bool): Replace the file inline instead of returning the output to stdout.
             - -o, --output TARGET (str): If specified, the output will be written to TARGET filename instead of stdout.
             - --ensure_ascii (bool): Pass ensure_ascii=True to json.dumps(). Will pass False otherwise.
@@ -174,9 +174,15 @@ def cli(inline_args: Optional[List[str]] = None) -> int:
     Example:
         >>> cli(['example.json', '--indent', '4'])
+        >>> cat json.txt | json_repair
     """
     parser = argparse.ArgumentParser(description="Repair and parse JSON files.")
-    parser.add_argument("filename", help="The JSON file to repair")
+    # Make the filename argument optional; if omitted, we will read from stdin.
+    parser.add_argument(
+        "filename",
+        nargs="?",
+        help="The JSON file to repair (if omitted, reads from stdin)",
+    )
     parser.add_argument(
         "-i",
         "--inline",
@@ -204,9 +210,12 @@ def cli(inline_args: Optional[List[str]] = None) -> int:
     if inline_args is None:  # pragma: no cover
         args = parser.parse_args()
     else:
-        args = parser.parse_args(
-            inline_args
-        )  # This is needed so this function is testable
+        args = parser.parse_args(inline_args)
+    # Inline mode requires a filename, so error out if none was provided.
+    if args.inline and not args.filename:  # pragma: no cover
+        print("Error: Inline mode requires a filename", file=sys.stderr)
+        sys.exit(1)
     if args.inline and args.output:  # pragma: no cover
         print("Error: You cannot pass both --inline and --output", file=sys.stderr)
@@ -217,8 +226,12 @@ def cli(inline_args: Optional[List[str]] = None) -> int:
         ensure_ascii = True
     try:
-        result = from_file(args.filename)
+        # Use from_file if a filename is provided; otherwise read from stdin.
+        if args.filename:
+            result = from_file(args.filename)
+        else:
+            data = sys.stdin.read()
+            result = loads(data)
         if args.inline or args.output:
             with open(args.output or args.filename, mode="w") as fd:
                 json.dump(result, fd, indent=args.indent, ensure_ascii=ensure_ascii)

{json_repair-0.37.0 → json_repair-0.39.0/src/json_repair.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: json_repair
-Version: 0.37.0
+Version: 0.39.0
 Summary: A package to repair broken json strings
 Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
 License: MIT License

{json_repair-0.37.0 → json_repair-0.39.0}/tests/test_json_repair.py RENAMED Viewed

@@ -3,6 +3,7 @@ from unittest.mock import patch
 import os.path
 import pathlib
 import tempfile
+import io
 def test_basic_types_valid():
     assert repair_json("True", return_objects=True) == ""
@@ -124,6 +125,7 @@ def test_array_edge_cases():
     assert repair_json('{"key": ["value]}') == '{"key": ["value"]}'
     assert repair_json('["lorem "ipsum" sic"]') == '["lorem \\"ipsum\\" sic"]'
     assert repair_json('{"key1": ["value1", "value2"}, "key2": ["value3", "value4"]}') == '{"key1": ["value1", "value2"], "key2": ["value3", "value4"]}'
+    assert repair_json('[ "value", /* comment */ "value2" ]') == '["value", "value2"]'
 def test_escaping():
     assert repair_json("'\"'") == '""'
@@ -158,6 +160,9 @@ def test_object_edge_cases():
     assert repair_json('{"key:"value"}') == '{"key": "value"}'
     assert repair_json('{"key:value}') == '{"key": "value"}'
     assert repair_json('[{"lorem": {"ipsum": "sic"}, """" "lorem": {"ipsum": "sic"}]') == '[{"lorem": {"ipsum": "sic"}}, {"lorem": {"ipsum": "sic"}}]'
+    assert repair_json('{ "key": { "key2": "value2" // comment }, "key3": "value3" }') == '{"key": {"key2": "value2"}, "key3": "value3"}'
+    assert repair_json('{ "key": { "key2": "value2" # comment }, "key3": "value3" }') == '{"key": {"key2": "value2"}, "key3": "value3"}'
+    assert repair_json('{ "key": { "key2": "value2" /* comment */ }, "key3": "value3" }') == '{"key": {"key2": "value2"}, "key3": "value3"}'
 def test_number_edge_cases():
     assert repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'
@@ -313,18 +318,13 @@ def test_cli(capsys):
         # Clean up - delete the temporary file
         os.remove(temp_path)
         os.remove(tempout_path)
-"""
-def test_cli_inline(sample_json_file):
-    with patch('sys.argv', ['json_repair', sample_json_file, '-i']):
-        cli()
-    with open(sample_json_file, 'r') as f:
-        assert json.load(f) == {"key": "value"}
-def test_cli_output_file(sample_json_file, tmp_path):
-    output_file = tmp_path / "output.json"
-    with patch('sys.argv', ['json_repair', sample_json_file, '-o', str(output_file)]):
-        cli()
-    with open(output_file, 'r') as f:
-        assert json.load(f) == {"key": "value"}
-"""
+    # Prepare a JSON string that needs to be repaired.
+    test_input = "{key:value"
+    # Expected output when running cli with --indent 0.
+    expected_output = '{\n"key": "value"\n}\n'
+    # Patch sys.stdin so that cli() reads from it instead of a file.
+    with patch('sys.stdin', new=io.StringIO(test_input)):
+        cli(inline_args=['--indent', 0])
+    captured = capsys.readouterr()
+    assert captured.out == expected_output