json-repair 0.28.4__tar.gz → 0.29.1__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.28.4
3
+ Version: 0.29.1
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -150,6 +150,31 @@ Some rules of thumb to use:
150
150
  - `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
151
151
  - If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
152
152
 
153
+ ### Use json_repair from CLI
154
+
155
+ Install the library for command-line with:
156
+ ```
157
+ pipx install json-repair
158
+ ```
159
+ to know all options available:
160
+ ```
161
+ $ json_repair -h
162
+ usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] filename
163
+
164
+ Repair and parse JSON files.
165
+
166
+ positional arguments:
167
+ filename The JSON file to repair
168
+
169
+ options:
170
+ -h, --help show this help message and exit
171
+ -i, --inline Replace the file inline instead of returning the output to stdout
172
+ -o TARGET, --output TARGET
173
+ If specified, the output will be written to TARGET filename instead of stdout
174
+ --ensure_ascii Pass ensure_ascii=True to json.dumps()
175
+ --indent INDENT Number of spaces for indentation (Default 2)
176
+ ```
177
+
153
178
  ## Adding to requirements
154
179
  **Please pin this library only on the major version!**
155
180
 
@@ -112,6 +112,31 @@ Some rules of thumb to use:
112
112
  - `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
113
113
  - If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
114
114
 
115
+ ### Use json_repair from CLI
116
+
117
+ Install the library for command-line with:
118
+ ```
119
+ pipx install json-repair
120
+ ```
121
+ to know all options available:
122
+ ```
123
+ $ json_repair -h
124
+ usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] filename
125
+
126
+ Repair and parse JSON files.
127
+
128
+ positional arguments:
129
+ filename The JSON file to repair
130
+
131
+ options:
132
+ -h, --help show this help message and exit
133
+ -i, --inline Replace the file inline instead of returning the output to stdout
134
+ -o TARGET, --output TARGET
135
+ If specified, the output will be written to TARGET filename instead of stdout
136
+ --ensure_ascii Pass ensure_ascii=True to json.dumps()
137
+ --indent INDENT Number of spaces for indentation (Default 2)
138
+ ```
139
+
115
140
  ## Adding to requirements
116
141
  **Please pin this library only on the major version!**
117
142
 
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
3
3
  build-backend = "setuptools.build_meta"
4
4
  [project]
5
5
  name = "json_repair"
6
- version = "0.28.4"
6
+ version = "0.29.1"
7
7
  license = {file = "LICENSE"}
8
8
  authors = [
9
9
  { name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
@@ -29,3 +29,5 @@ pythonpath = [
29
29
  "pkgname" = ["py.typed"]
30
30
  [tool.setuptools.packages.find]
31
31
  where = ["src"]
32
+ [project.scripts]
33
+ json_repair = "json_repair.__main__:cli"
@@ -0,0 +1,4 @@
1
+ from .json_repair import cli
2
+
3
+ if __name__ == "__main__":
4
+ cli()
@@ -22,7 +22,9 @@ If something is wrong (a missing parantheses or quotes for example) it will use
22
22
  All supported use cases are in the unit tests
23
23
  """
24
24
 
25
+ import argparse
25
26
  import os
27
+ import sys
26
28
  import json
27
29
  from typing import Any, Dict, List, Optional, Union, TextIO, Tuple, Literal
28
30
 
@@ -40,14 +42,16 @@ class StringFileWrapper:
40
42
  CHUNK_LENGTH = 1_000_000
41
43
  self.buffer_length = CHUNK_LENGTH
42
44
 
43
- def fill_buffer(self, index: int) -> None:
45
+ def get_buffer(self, index: int) -> str:
44
46
  if self.buffers.get(index) is None:
45
47
  self.fd.seek(index * self.buffer_length)
46
48
  self.buffers[index] = self.fd.read(self.buffer_length)
47
49
  # Save memory by keeping max 2MB buffer chunks and min 2 chunks
48
50
  if len(self.buffers) > max(2, 2_000_000 / self.buffer_length):
49
51
  oldest_key = next(iter(self.buffers))
50
- self.buffers.pop(oldest_key)
52
+ if oldest_key != index:
53
+ self.buffers.pop(oldest_key)
54
+ return self.buffers[index]
51
55
 
52
56
  def __getitem__(self, index: Union[int, slice]) -> str:
53
57
  # The buffer is an array that is seek like a RAM:
@@ -56,25 +60,24 @@ class StringFileWrapper:
56
60
  if isinstance(index, slice):
57
61
  buffer_index = index.start // self.buffer_length
58
62
  buffer_end = index.stop // self.buffer_length
59
- for i in range(buffer_index, buffer_end + 1):
60
- self.fill_buffer(i)
61
63
  if buffer_index == buffer_end:
62
- return self.buffers[buffer_index][
64
+ return self.get_buffer(buffer_index)[
63
65
  index.start % self.buffer_length : index.stop % self.buffer_length
64
66
  ]
65
67
  else:
66
- start_slice = self.buffers[buffer_index][
68
+ start_slice = self.get_buffer(buffer_index)[
67
69
  index.start % self.buffer_length :
68
70
  ]
69
- end_slice = self.buffers[buffer_end][: index.stop % self.buffer_length]
71
+ end_slice = self.get_buffer(buffer_end)[
72
+ : index.stop % self.buffer_length
73
+ ]
70
74
  middle_slices = [
71
- self.buffers[i] for i in range(buffer_index + 1, buffer_end)
75
+ self.get_buffer(i) for i in range(buffer_index + 1, buffer_end)
72
76
  ]
73
77
  return start_slice + "".join(middle_slices) + end_slice
74
78
  else:
75
79
  buffer_index = index // self.buffer_length
76
- self.fill_buffer(buffer_index)
77
- return self.buffers[buffer_index][index % self.buffer_length]
80
+ return self.get_buffer(buffer_index)[index % self.buffer_length]
78
81
 
79
82
  def __len__(self) -> int:
80
83
  if self.length < 1:
@@ -755,3 +758,65 @@ def from_file(
755
758
  fd.close()
756
759
 
757
760
  return jsonobj
761
+
762
+
763
+ def cli(inline_args: Optional[List[str]] = None) -> int:
764
+ parser = argparse.ArgumentParser(description="Repair and parse JSON files.")
765
+ parser.add_argument("filename", help="The JSON file to repair")
766
+ parser.add_argument(
767
+ "-i",
768
+ "--inline",
769
+ action="store_true",
770
+ help="Replace the file inline instead of returning the output to stdout",
771
+ )
772
+ parser.add_argument(
773
+ "-o",
774
+ "--output",
775
+ metavar="TARGET",
776
+ help="If specified, the output will be written to TARGET filename instead of stdout",
777
+ )
778
+ parser.add_argument(
779
+ "--ensure_ascii",
780
+ action="store_true",
781
+ help="Pass ensure_ascii=True to json.dumps()",
782
+ )
783
+ parser.add_argument(
784
+ "--indent",
785
+ type=int,
786
+ default=2,
787
+ help="Number of spaces for indentation (Default 2)",
788
+ )
789
+
790
+ if inline_args is None: # pragma: no cover
791
+ args = parser.parse_args()
792
+ else:
793
+ args = parser.parse_args(
794
+ inline_args
795
+ ) # This is needed so this function is testable
796
+
797
+ if args.inline and args.output: # pragma: no cover
798
+ print("Error: You cannot pass both --inline and --output", file=sys.stderr)
799
+ sys.exit(1)
800
+
801
+ ensure_ascii = False
802
+ if args.ensure_ascii:
803
+ ensure_ascii = True
804
+
805
+ try:
806
+ result = from_file(args.filename)
807
+
808
+ if args.inline or args.output:
809
+ fd = open(args.output or args.filename, mode="w")
810
+ json.dump(result, fd, indent=args.indent, ensure_ascii=ensure_ascii)
811
+ fd.close()
812
+ else:
813
+ print(json.dumps(result, indent=args.indent, ensure_ascii=ensure_ascii))
814
+ except Exception as e: # pragma: no cover
815
+ print(f"Error: {str(e)}", file=sys.stderr)
816
+ sys.exit(1)
817
+
818
+ return 0 # Success
819
+
820
+
821
+ if __name__ == "__main__": # pragma: no cover
822
+ sys.exit(cli())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.28.4
3
+ Version: 0.29.1
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -150,6 +150,31 @@ Some rules of thumb to use:
150
150
  - `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
151
151
  - If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
152
152
 
153
+ ### Use json_repair from CLI
154
+
155
+ Install the library for command-line with:
156
+ ```
157
+ pipx install json-repair
158
+ ```
159
+ to know all options available:
160
+ ```
161
+ $ json_repair -h
162
+ usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] filename
163
+
164
+ Repair and parse JSON files.
165
+
166
+ positional arguments:
167
+ filename The JSON file to repair
168
+
169
+ options:
170
+ -h, --help show this help message and exit
171
+ -i, --inline Replace the file inline instead of returning the output to stdout
172
+ -o TARGET, --output TARGET
173
+ If specified, the output will be written to TARGET filename instead of stdout
174
+ --ensure_ascii Pass ensure_ascii=True to json.dumps()
175
+ --indent INDENT Number of spaces for indentation (Default 2)
176
+ ```
177
+
153
178
  ## Adding to requirements
154
179
  **Please pin this library only on the major version!**
155
180
 
@@ -2,11 +2,13 @@ LICENSE
2
2
  README.md
3
3
  pyproject.toml
4
4
  src/json_repair/__init__.py
5
+ src/json_repair/__main__.py
5
6
  src/json_repair/json_repair.py
6
7
  src/json_repair/py.typed
7
8
  src/json_repair.egg-info/PKG-INFO
8
9
  src/json_repair.egg-info/SOURCES.txt
9
10
  src/json_repair.egg-info/dependency_links.txt
11
+ src/json_repair.egg-info/entry_points.txt
10
12
  src/json_repair.egg-info/top_level.txt
11
13
  tests/test_coverage.py
12
14
  tests/test_json_repair.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ json_repair = json_repair.__main__:cli
@@ -1,4 +1,8 @@
1
- from src.json_repair.json_repair import from_file, repair_json, loads
1
+ from src.json_repair.json_repair import from_file, repair_json, loads, cli
2
+ from unittest.mock import patch
3
+ import os.path
4
+ import pathlib
5
+ import tempfile
2
6
 
3
7
  def test_basic_types_valid():
4
8
  assert repair_json("True", return_objects=True) == ""
@@ -225,10 +229,6 @@ def test_repair_json_skip_json_loads():
225
229
 
226
230
 
227
231
  def test_repair_json_from_file():
228
- import os.path
229
- import pathlib
230
- import tempfile
231
-
232
232
  path = pathlib.Path(__file__).parent.resolve()
233
233
 
234
234
  # Use chunk_length 2 to test the buffering feature
@@ -263,3 +263,50 @@ def test_repair_json_from_file():
263
263
 
264
264
  def test_ensure_ascii():
265
265
  assert repair_json("{'test_中国人_ascii':'统一码'}", ensure_ascii=False) == '{"test_中国人_ascii": "统一码"}'
266
+
267
+
268
+
269
+ def test_cli(capsys):
270
+ # Create a temporary file
271
+ temp_fd, temp_path = tempfile.mkstemp(suffix=".json")
272
+ try:
273
+ # Write content to the temporary file
274
+ with os.fdopen(temp_fd, 'w') as tmp:
275
+ tmp.write("{key:value")
276
+ cli(inline_args=[temp_path, '--indent', 0, '--ensure_ascii'])
277
+ captured = capsys.readouterr()
278
+ assert captured.out == '{\n"key": "value"\n}\n'
279
+
280
+ # Test the output option
281
+ tempout_fd, tempout_path = tempfile.mkstemp(suffix=".json")
282
+ cli(inline_args=[temp_path, '--indent', 0, '-o', tempout_path])
283
+ with open(tempout_path, 'r') as tmp:
284
+ out = tmp.read()
285
+ assert out == '{\n"key": "value"\n}'
286
+
287
+ # Test the inline option
288
+ cli(inline_args=[temp_path, '--indent', 0, '-i'])
289
+ with open(temp_path, 'r') as tmp:
290
+ out = tmp.read()
291
+ assert out == '{\n"key": "value"\n}'
292
+
293
+
294
+ finally:
295
+ # Clean up - delete the temporary file
296
+ os.remove(temp_path)
297
+ os.remove(tempout_path)
298
+
299
+ """
300
+ def test_cli_inline(sample_json_file):
301
+ with patch('sys.argv', ['json_repair', sample_json_file, '-i']):
302
+ cli()
303
+ with open(sample_json_file, 'r') as f:
304
+ assert json.load(f) == {"key": "value"}
305
+
306
+ def test_cli_output_file(sample_json_file, tmp_path):
307
+ output_file = tmp_path / "output.json"
308
+ with patch('sys.argv', ['json_repair', sample_json_file, '-o', str(output_file)]):
309
+ cli()
310
+ with open(output_file, 'r') as f:
311
+ assert json.load(f) == {"key": "value"}
312
+ """
@@ -19,7 +19,7 @@ def test_true_true_correct(benchmark):
19
19
  mean_time = benchmark.stats.get("median")
20
20
 
21
21
  # Define your time threshold in seconds
22
- max_time = 14 / 10 ** 4 # 1.4 millisecond
22
+ max_time = 15 / 10 ** 4 # 1.5 millisecond
23
23
 
24
24
  # Assert that the average time is below the threshold
25
25
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -31,7 +31,7 @@ def test_true_true_incorrect(benchmark):
31
31
  mean_time = benchmark.stats.get("median")
32
32
 
33
33
  # Define your time threshold in seconds
34
- max_time = 14 / 10 ** 4 # 1.4 millisecond
34
+ max_time = 15 / 10 ** 4 # 1.5 millisecond
35
35
 
36
36
  # Assert that the average time is below the threshold
37
37
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -53,7 +53,7 @@ def test_true_false_incorrect(benchmark):
53
53
  mean_time = benchmark.stats.get("median")
54
54
 
55
55
  # Define your time threshold in seconds
56
- max_time = 14 / 10 ** 4 # 1.4 millisecond
56
+ max_time = 15 / 10 ** 4 # 1.5 millisecond
57
57
 
58
58
  # Assert that the average time is below the threshold
59
59
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -64,7 +64,7 @@ def test_false_true_correct(benchmark):
64
64
  mean_time = benchmark.stats.get("median")
65
65
 
66
66
  # Define your time threshold in seconds
67
- max_time = 14 / 10 ** 4 # 1.4 millisecond
67
+ max_time = 15 / 10 ** 4 # 1.5 millisecond
68
68
 
69
69
  # Assert that the average time is below the threshold
70
70
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -75,7 +75,7 @@ def test_false_true_incorrect(benchmark):
75
75
  mean_time = benchmark.stats.get("median")
76
76
 
77
77
  # Define your time threshold in seconds
78
- max_time = 14 / 10 ** 4 # 1.4 millisecond
78
+ max_time = 15 / 10 ** 4 # 1.5 millisecond
79
79
 
80
80
  # Assert that the average time is below the threshold
81
81
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
File without changes
File without changes