PyPI - json-repair - Versions diffs - 0.28.4__tar.gz → 0.29.1__tar.gz - Mend

json-repair 0.28.4tar.gz → 0.29.1tar.gz

Files changed (17) hide show

{json_repair-0.28.4/src/json_repair.egg-info → json_repair-0.29.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: json_repair
-Version: 0.28.4
+Version: 0.29.1
 Summary: A package to repair broken json strings
 Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
 License: MIT License
@@ -150,6 +150,31 @@ Some rules of thumb to use:
 - `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
 - If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
+### Use json_repair from CLI
+Install the library for command-line with:
+```
+pipx install json-repair
+```
+to know all options available:
+```
+$ json_repair -h
+usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] filename
+Repair and parse JSON files.
+positional arguments:
+  filename              The JSON file to repair
+options:
+  -h, --help            show this help message and exit
+  -i, --inline          Replace the file inline instead of returning the output to stdout
+  -o TARGET, --output TARGET
+                        If specified, the output will be written to TARGET filename instead of stdout
+  --ensure_ascii        Pass ensure_ascii=True to json.dumps()
+  --indent INDENT       Number of spaces for indentation (Default 2)
+```
 ## Adding to requirements
 **Please pin this library only on the major version!**

{json_repair-0.28.4 → json_repair-0.29.1}/README.md RENAMED Viewed

@@ -112,6 +112,31 @@ Some rules of thumb to use:
 - `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
 - If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
+### Use json_repair from CLI
+Install the library for command-line with:
+```
+pipx install json-repair
+```
+to know all options available:
+```
+$ json_repair -h
+usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] filename
+Repair and parse JSON files.
+positional arguments:
+  filename              The JSON file to repair
+options:
+  -h, --help            show this help message and exit
+  -i, --inline          Replace the file inline instead of returning the output to stdout
+  -o TARGET, --output TARGET
+                        If specified, the output will be written to TARGET filename instead of stdout
+  --ensure_ascii        Pass ensure_ascii=True to json.dumps()
+  --indent INDENT       Number of spaces for indentation (Default 2)
+```
 ## Adding to requirements
 **Please pin this library only on the major version!**

{json_repair-0.28.4 → json_repair-0.29.1}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
 build-backend = "setuptools.build_meta"
 [project]
 name = "json_repair"
-version = "0.28.4"
+version = "0.29.1"
 license = {file = "LICENSE"}
 authors = [
   { name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
@@ -29,3 +29,5 @@ pythonpath = [
 "pkgname" = ["py.typed"]
 [tool.setuptools.packages.find]
 where = ["src"]
+[project.scripts]
+json_repair = "json_repair.__main__:cli"

json_repair-0.29.1/src/json_repair/__main__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .json_repair import cli
+if __name__ == "__main__":
+    cli()

{json_repair-0.28.4 → json_repair-0.29.1}/src/json_repair/json_repair.py RENAMED Viewed

@@ -22,7 +22,9 @@ If something is wrong (a missing parantheses or quotes for example) it will use
 All supported use cases are in the unit tests
 """
+import argparse
 import os
+import sys
 import json
 from typing import Any, Dict, List, Optional, Union, TextIO, Tuple, Literal
@@ -40,14 +42,16 @@ class StringFileWrapper:
             CHUNK_LENGTH = 1_000_000
         self.buffer_length = CHUNK_LENGTH
-    def fill_buffer(self, index: int) -> None:
+    def get_buffer(self, index: int) -> str:
         if self.buffers.get(index) is None:
             self.fd.seek(index * self.buffer_length)
             self.buffers[index] = self.fd.read(self.buffer_length)
             # Save memory by keeping max 2MB buffer chunks and min 2 chunks
             if len(self.buffers) > max(2, 2_000_000 / self.buffer_length):
                 oldest_key = next(iter(self.buffers))
-                self.buffers.pop(oldest_key)
+                if oldest_key != index:
+                    self.buffers.pop(oldest_key)
+        return self.buffers[index]
     def __getitem__(self, index: Union[int, slice]) -> str:
         # The buffer is an array that is seek like a RAM:
@@ -56,25 +60,24 @@ class StringFileWrapper:
         if isinstance(index, slice):
             buffer_index = index.start // self.buffer_length
             buffer_end = index.stop // self.buffer_length
-            for i in range(buffer_index, buffer_end + 1):
-                self.fill_buffer(i)
             if buffer_index == buffer_end:
-                return self.buffers[buffer_index][
+                return self.get_buffer(buffer_index)[
                     index.start % self.buffer_length : index.stop % self.buffer_length
                 ]
             else:
-                start_slice = self.buffers[buffer_index][
+                start_slice = self.get_buffer(buffer_index)[
                     index.start % self.buffer_length :
                 ]
-                end_slice = self.buffers[buffer_end][: index.stop % self.buffer_length]
+                end_slice = self.get_buffer(buffer_end)[
+                    : index.stop % self.buffer_length
+                ]
                 middle_slices = [
-                    self.buffers[i] for i in range(buffer_index + 1, buffer_end)
+                    self.get_buffer(i) for i in range(buffer_index + 1, buffer_end)
                 ]
                 return start_slice + "".join(middle_slices) + end_slice
         else:
             buffer_index = index // self.buffer_length
-            self.fill_buffer(buffer_index)
-            return self.buffers[buffer_index][index % self.buffer_length]
+            return self.get_buffer(buffer_index)[index % self.buffer_length]
     def __len__(self) -> int:
         if self.length < 1:
@@ -755,3 +758,65 @@ def from_file(
     fd.close()
     return jsonobj
+def cli(inline_args: Optional[List[str]] = None) -> int:
+    parser = argparse.ArgumentParser(description="Repair and parse JSON files.")
+    parser.add_argument("filename", help="The JSON file to repair")
+    parser.add_argument(
+        "-i",
+        "--inline",
+        action="store_true",
+        help="Replace the file inline instead of returning the output to stdout",
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        metavar="TARGET",
+        help="If specified, the output will be written to TARGET filename instead of stdout",
+    )
+    parser.add_argument(
+        "--ensure_ascii",
+        action="store_true",
+        help="Pass ensure_ascii=True to json.dumps()",
+    )
+    parser.add_argument(
+        "--indent",
+        type=int,
+        default=2,
+        help="Number of spaces for indentation (Default 2)",
+    )
+    if inline_args is None:  # pragma: no cover
+        args = parser.parse_args()
+    else:
+        args = parser.parse_args(
+            inline_args
+        )  # This is needed so this function is testable
+    if args.inline and args.output:  # pragma: no cover
+        print("Error: You cannot pass both --inline and --output", file=sys.stderr)
+        sys.exit(1)
+    ensure_ascii = False
+    if args.ensure_ascii:
+        ensure_ascii = True
+    try:
+        result = from_file(args.filename)
+        if args.inline or args.output:
+            fd = open(args.output or args.filename, mode="w")
+            json.dump(result, fd, indent=args.indent, ensure_ascii=ensure_ascii)
+            fd.close()
+        else:
+            print(json.dumps(result, indent=args.indent, ensure_ascii=ensure_ascii))
+    except Exception as e:  # pragma: no cover
+        print(f"Error: {str(e)}", file=sys.stderr)
+        sys.exit(1)
+    return 0  # Success
+if __name__ == "__main__":  # pragma: no cover
+    sys.exit(cli())

{json_repair-0.28.4 → json_repair-0.29.1/src/json_repair.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: json_repair
-Version: 0.28.4
+Version: 0.29.1
 Summary: A package to repair broken json strings
 Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
 License: MIT License
@@ -150,6 +150,31 @@ Some rules of thumb to use:
 - `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
 - If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
+### Use json_repair from CLI
+Install the library for command-line with:
+```
+pipx install json-repair
+```
+to know all options available:
+```
+$ json_repair -h
+usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] filename
+Repair and parse JSON files.
+positional arguments:
+  filename              The JSON file to repair
+options:
+  -h, --help            show this help message and exit
+  -i, --inline          Replace the file inline instead of returning the output to stdout
+  -o TARGET, --output TARGET
+                        If specified, the output will be written to TARGET filename instead of stdout
+  --ensure_ascii        Pass ensure_ascii=True to json.dumps()
+  --indent INDENT       Number of spaces for indentation (Default 2)
+```
 ## Adding to requirements
 **Please pin this library only on the major version!**

{json_repair-0.28.4 → json_repair-0.29.1}/src/json_repair.egg-info/SOURCES.txt RENAMED Viewed

@@ -2,11 +2,13 @@ LICENSE
 README.md
 pyproject.toml
 src/json_repair/__init__.py
+src/json_repair/__main__.py
 src/json_repair/json_repair.py
 src/json_repair/py.typed
 src/json_repair.egg-info/PKG-INFO
 src/json_repair.egg-info/SOURCES.txt
 src/json_repair.egg-info/dependency_links.txt
+src/json_repair.egg-info/entry_points.txt
 src/json_repair.egg-info/top_level.txt
 tests/test_coverage.py
 tests/test_json_repair.py

json_repair-0.29.1/src/json_repair.egg-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ json_repair = json_repair.__main__:cli

{json_repair-0.28.4 → json_repair-0.29.1}/tests/test_json_repair.py RENAMED Viewed

@@ -1,4 +1,8 @@
-from src.json_repair.json_repair import from_file, repair_json, loads
+from src.json_repair.json_repair import from_file, repair_json, loads, cli
+from unittest.mock import patch
+import os.path
+import pathlib
+import tempfile
 def test_basic_types_valid():
     assert repair_json("True", return_objects=True) == ""
@@ -225,10 +229,6 @@ def test_repair_json_skip_json_loads():
 def test_repair_json_from_file():
-    import os.path
-    import pathlib
-    import tempfile
     path = pathlib.Path(__file__).parent.resolve()
     # Use chunk_length 2 to test the buffering feature
@@ -263,3 +263,50 @@ def test_repair_json_from_file():
 def test_ensure_ascii():
     assert repair_json("{'test_中国人_ascii':'统一码'}", ensure_ascii=False) == '{"test_中国人_ascii": "统一码"}'
+def test_cli(capsys):
+    # Create a temporary file
+    temp_fd, temp_path = tempfile.mkstemp(suffix=".json")
+    try:
+        # Write content to the temporary file
+        with os.fdopen(temp_fd, 'w') as tmp:
+            tmp.write("{key:value")
+        cli(inline_args=[temp_path, '--indent', 0, '--ensure_ascii'])
+        captured = capsys.readouterr()
+        assert captured.out == '{\n"key": "value"\n}\n'
+        # Test the output option
+        tempout_fd, tempout_path = tempfile.mkstemp(suffix=".json")
+        cli(inline_args=[temp_path, '--indent', 0, '-o', tempout_path])
+        with open(tempout_path, 'r') as tmp:
+            out = tmp.read()
+        assert out == '{\n"key": "value"\n}'
+        # Test the inline option
+        cli(inline_args=[temp_path, '--indent', 0, '-i'])
+        with open(temp_path, 'r') as tmp:
+            out = tmp.read()
+        assert out == '{\n"key": "value"\n}'
+    finally:
+        # Clean up - delete the temporary file
+        os.remove(temp_path)
+        os.remove(tempout_path)
+"""
+def test_cli_inline(sample_json_file):
+    with patch('sys.argv', ['json_repair', sample_json_file, '-i']):
+        cli()
+    with open(sample_json_file, 'r') as f:
+        assert json.load(f) == {"key": "value"}
+def test_cli_output_file(sample_json_file, tmp_path):
+    output_file = tmp_path / "output.json"
+    with patch('sys.argv', ['json_repair', sample_json_file, '-o', str(output_file)]):
+        cli()
+    with open(output_file, 'r') as f:
+        assert json.load(f) == {"key": "value"}
+"""

{json_repair-0.28.4 → json_repair-0.29.1}/tests/test_performance.py RENAMED Viewed

@@ -19,7 +19,7 @@ def test_true_true_correct(benchmark):
   mean_time = benchmark.stats.get("median")
   # Define your time threshold in seconds
-  max_time = 14 / 10 ** 4  # 1.4 millisecond
+  max_time = 15 / 10 ** 4  # 1.5 millisecond
   # Assert that the average time is below the threshold
   assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -31,7 +31,7 @@ def test_true_true_incorrect(benchmark):
   mean_time = benchmark.stats.get("median")
   # Define your time threshold in seconds
-  max_time = 14 / 10 ** 4  # 1.4 millisecond
+  max_time = 15 / 10 ** 4  # 1.5 millisecond
   # Assert that the average time is below the threshold
   assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -53,7 +53,7 @@ def test_true_false_incorrect(benchmark):
   mean_time = benchmark.stats.get("median")
   # Define your time threshold in seconds
-  max_time = 14 / 10 ** 4  # 1.4 millisecond
+  max_time = 15 / 10 ** 4  # 1.5 millisecond
   # Assert that the average time is below the threshold
   assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -64,7 +64,7 @@ def test_false_true_correct(benchmark):
   mean_time = benchmark.stats.get("median")
   # Define your time threshold in seconds
-  max_time = 14 / 10 ** 4  # 1.4 millisecond
+  max_time = 15 / 10 ** 4  # 1.5 millisecond
   # Assert that the average time is below the threshold
   assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -75,7 +75,7 @@ def test_false_true_incorrect(benchmark):
   mean_time = benchmark.stats.get("median")
   # Define your time threshold in seconds
-  max_time = 14 / 10 ** 4  # 1.4 millisecond
+  max_time = 15 / 10 ** 4  # 1.5 millisecond
   # Assert that the average time is below the threshold
   assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"

{json_repair-0.28.4 → json_repair-0.29.1}/LICENSE RENAMED Viewed

File without changes

{json_repair-0.28.4 → json_repair-0.29.1}/setup.cfg RENAMED Viewed

File without changes

{json_repair-0.28.4 → json_repair-0.29.1}/src/json_repair/__init__.py RENAMED Viewed

File without changes

{json_repair-0.28.4 → json_repair-0.29.1}/src/json_repair/py.typed RENAMED Viewed

File without changes

{json_repair-0.28.4 → json_repair-0.29.1}/src/json_repair.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{json_repair-0.28.4 → json_repair-0.29.1}/src/json_repair.egg-info/top_level.txt RENAMED Viewed

File without changes

{json_repair-0.28.4 → json_repair-0.29.1}/tests/test_coverage.py RENAMED Viewed

File without changes

json-repair 0.28.4__tar.gz → 0.29.1__tar.gz

json-repair 0.28.4tar.gz → 0.29.1tar.gz