PyPI - json-repair - Versions diffs - 0.28.4__tar.gz → 0.29.0__tar.gz - Mend

json-repair 0.28.4tar.gz → 0.29.0tar.gz

Files changed (17) hide show

{json_repair-0.28.4/src/json_repair.egg-info → json_repair-0.29.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: json_repair
-Version: 0.28.4
+Version: 0.29.0
 Summary: A package to repair broken json strings
 Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
 License: MIT License
@@ -150,6 +150,31 @@ Some rules of thumb to use:
 - `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
 - If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
+### Use json_repair from CLI
+Install the library for command-line with:
+```
+pipx install json-repair
+```
+then run
+```
+$ json_repair -h
+usage: json_repair [-h] [-i] [--ensure_ascii] [--indent INDENT] filename
+Repair and parse JSON files.
+positional arguments:
+  filename         The JSON file to repair
+options:
+  -h, --help       show this help message and exit
+  -i, --inline     Replace the file inline instead of returning the output to stdout
+  --ensure_ascii   Pass the ensure_ascii parameter to json.dumps()
+  --indent INDENT  Number of spaces for indentation (Default 2)
+```
+to learn how to use it
 ## Adding to requirements
 **Please pin this library only on the major version!**

{json_repair-0.28.4 → json_repair-0.29.0}/README.md RENAMED Viewed

@@ -112,6 +112,31 @@ Some rules of thumb to use:
 - `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
 - If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
+### Use json_repair from CLI
+Install the library for command-line with:
+```
+pipx install json-repair
+```
+then run
+```
+$ json_repair -h
+usage: json_repair [-h] [-i] [--ensure_ascii] [--indent INDENT] filename
+Repair and parse JSON files.
+positional arguments:
+  filename         The JSON file to repair
+options:
+  -h, --help       show this help message and exit
+  -i, --inline     Replace the file inline instead of returning the output to stdout
+  --ensure_ascii   Pass the ensure_ascii parameter to json.dumps()
+  --indent INDENT  Number of spaces for indentation (Default 2)
+```
+to learn how to use it
 ## Adding to requirements
 **Please pin this library only on the major version!**

{json_repair-0.28.4 → json_repair-0.29.0}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
 build-backend = "setuptools.build_meta"
 [project]
 name = "json_repair"
-version = "0.28.4"
+version = "0.29.0"
 license = {file = "LICENSE"}
 authors = [
   { name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
@@ -29,3 +29,5 @@ pythonpath = [
 "pkgname" = ["py.typed"]
 [tool.setuptools.packages.find]
 where = ["src"]
+[project.scripts]
+json_repair = "json_repair.__main__:cli"

json_repair-0.29.0/src/json_repair/__main__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .json_repair import cli
+if __name__ == "__main__":
+    cli()

{json_repair-0.28.4 → json_repair-0.29.0}/src/json_repair/json_repair.py RENAMED Viewed

@@ -22,7 +22,9 @@ If something is wrong (a missing parantheses or quotes for example) it will use
 All supported use cases are in the unit tests
 """
+import argparse
 import os
+import sys
 import json
 from typing import Any, Dict, List, Optional, Union, TextIO, Tuple, Literal
@@ -40,14 +42,16 @@ class StringFileWrapper:
             CHUNK_LENGTH = 1_000_000
         self.buffer_length = CHUNK_LENGTH
-    def fill_buffer(self, index: int) -> None:
+    def get_buffer(self, index: int) -> str:
         if self.buffers.get(index) is None:
             self.fd.seek(index * self.buffer_length)
             self.buffers[index] = self.fd.read(self.buffer_length)
             # Save memory by keeping max 2MB buffer chunks and min 2 chunks
             if len(self.buffers) > max(2, 2_000_000 / self.buffer_length):
                 oldest_key = next(iter(self.buffers))
-                self.buffers.pop(oldest_key)
+                if oldest_key != index:
+                    self.buffers.pop(oldest_key)
+        return self.buffers[index]
     def __getitem__(self, index: Union[int, slice]) -> str:
         # The buffer is an array that is seek like a RAM:
@@ -56,25 +60,24 @@ class StringFileWrapper:
         if isinstance(index, slice):
             buffer_index = index.start // self.buffer_length
             buffer_end = index.stop // self.buffer_length
-            for i in range(buffer_index, buffer_end + 1):
-                self.fill_buffer(i)
             if buffer_index == buffer_end:
-                return self.buffers[buffer_index][
+                return self.get_buffer(buffer_index)[
                     index.start % self.buffer_length : index.stop % self.buffer_length
                 ]
             else:
-                start_slice = self.buffers[buffer_index][
+                start_slice = self.get_buffer(buffer_index)[
                     index.start % self.buffer_length :
                 ]
-                end_slice = self.buffers[buffer_end][: index.stop % self.buffer_length]
+                end_slice = self.get_buffer(buffer_end)[
+                    : index.stop % self.buffer_length
+                ]
                 middle_slices = [
-                    self.buffers[i] for i in range(buffer_index + 1, buffer_end)
+                    self.get_buffer(i) for i in range(buffer_index + 1, buffer_end)
                 ]
                 return start_slice + "".join(middle_slices) + end_slice
         else:
             buffer_index = index // self.buffer_length
-            self.fill_buffer(buffer_index)
-            return self.buffers[buffer_index][index % self.buffer_length]
+            return self.get_buffer(buffer_index)[index % self.buffer_length]
     def __len__(self) -> int:
         if self.length < 1:
@@ -755,3 +758,47 @@ def from_file(
     fd.close()
     return jsonobj
+def cli():  # pragma: no cover
+    parser = argparse.ArgumentParser(description="Repair and parse JSON files.")
+    parser.add_argument("filename", help="The JSON file to repair")
+    parser.add_argument(
+        "-i",
+        "--inline",
+        action="store_true",
+        help="Replace the file inline instead of returning the output to stdout",
+    )
+    parser.add_argument(
+        "--ensure_ascii",
+        action="store_true",
+        help="Pass the ensure_ascii parameter to json.dumps()",
+    )
+    parser.add_argument(
+        "--indent",
+        type=int,
+        default=2,
+        help="Number of spaces for indentation (Default 2)",
+    )
+    args = parser.parse_args()
+    ensure_ascii = False
+    if args.ensure_ascii:
+        ensure_ascii = True
+    try:
+        result = from_file(args.filename)
+        if args.inline:
+            fd = open(args.filename, mode="w")
+            json.dump(result, fd, indent=args.indent, ensure_ascii=ensure_ascii)
+            fd.close()
+        else:
+            print(json.dumps(result, indent=args.indent, ensure_ascii=ensure_ascii))
+    except Exception as e:
+        print(f"Error: {str(e)}", file=sys.stderr)
+        sys.exit(1)
+if __name__ == "__main__":  # pragma: no cover
+    cli()

{json_repair-0.28.4 → json_repair-0.29.0/src/json_repair.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: json_repair
-Version: 0.28.4
+Version: 0.29.0
 Summary: A package to repair broken json strings
 Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
 License: MIT License
@@ -150,6 +150,31 @@ Some rules of thumb to use:
 - `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
 - If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
+### Use json_repair from CLI
+Install the library for command-line with:
+```
+pipx install json-repair
+```
+then run
+```
+$ json_repair -h
+usage: json_repair [-h] [-i] [--ensure_ascii] [--indent INDENT] filename
+Repair and parse JSON files.
+positional arguments:
+  filename         The JSON file to repair
+options:
+  -h, --help       show this help message and exit
+  -i, --inline     Replace the file inline instead of returning the output to stdout
+  --ensure_ascii   Pass the ensure_ascii parameter to json.dumps()
+  --indent INDENT  Number of spaces for indentation (Default 2)
+```
+to learn how to use it
 ## Adding to requirements
 **Please pin this library only on the major version!**

{json_repair-0.28.4 → json_repair-0.29.0}/src/json_repair.egg-info/SOURCES.txt RENAMED Viewed

@@ -2,11 +2,13 @@ LICENSE
 README.md
 pyproject.toml
 src/json_repair/__init__.py
+src/json_repair/__main__.py
 src/json_repair/json_repair.py
 src/json_repair/py.typed
 src/json_repair.egg-info/PKG-INFO
 src/json_repair.egg-info/SOURCES.txt
 src/json_repair.egg-info/dependency_links.txt
+src/json_repair.egg-info/entry_points.txt
 src/json_repair.egg-info/top_level.txt
 tests/test_coverage.py
 tests/test_json_repair.py

json_repair-0.29.0/src/json_repair.egg-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ json_repair = json_repair.__main__:cli

{json_repair-0.28.4 → json_repair-0.29.0}/tests/test_performance.py RENAMED Viewed

@@ -19,7 +19,7 @@ def test_true_true_correct(benchmark):
   mean_time = benchmark.stats.get("median")
   # Define your time threshold in seconds
-  max_time = 14 / 10 ** 4  # 1.4 millisecond
+  max_time = 15 / 10 ** 4  # 1.5 millisecond
   # Assert that the average time is below the threshold
   assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -31,7 +31,7 @@ def test_true_true_incorrect(benchmark):
   mean_time = benchmark.stats.get("median")
   # Define your time threshold in seconds
-  max_time = 14 / 10 ** 4  # 1.4 millisecond
+  max_time = 15 / 10 ** 4  # 1.5 millisecond
   # Assert that the average time is below the threshold
   assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -53,7 +53,7 @@ def test_true_false_incorrect(benchmark):
   mean_time = benchmark.stats.get("median")
   # Define your time threshold in seconds
-  max_time = 14 / 10 ** 4  # 1.4 millisecond
+  max_time = 15 / 10 ** 4  # 1.5 millisecond
   # Assert that the average time is below the threshold
   assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -64,7 +64,7 @@ def test_false_true_correct(benchmark):
   mean_time = benchmark.stats.get("median")
   # Define your time threshold in seconds
-  max_time = 14 / 10 ** 4  # 1.4 millisecond
+  max_time = 15 / 10 ** 4  # 1.5 millisecond
   # Assert that the average time is below the threshold
   assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -75,7 +75,7 @@ def test_false_true_incorrect(benchmark):
   mean_time = benchmark.stats.get("median")
   # Define your time threshold in seconds
-  max_time = 14 / 10 ** 4  # 1.4 millisecond
+  max_time = 15 / 10 ** 4  # 1.5 millisecond
   # Assert that the average time is below the threshold
   assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"