json-repair 0.28.4__tar.gz → 0.29.1__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {json_repair-0.28.4/src/json_repair.egg-info → json_repair-0.29.1}/PKG-INFO +26 -1
- {json_repair-0.28.4 → json_repair-0.29.1}/README.md +25 -0
- {json_repair-0.28.4 → json_repair-0.29.1}/pyproject.toml +3 -1
- json_repair-0.29.1/src/json_repair/__main__.py +4 -0
- {json_repair-0.28.4 → json_repair-0.29.1}/src/json_repair/json_repair.py +75 -10
- {json_repair-0.28.4 → json_repair-0.29.1/src/json_repair.egg-info}/PKG-INFO +26 -1
- {json_repair-0.28.4 → json_repair-0.29.1}/src/json_repair.egg-info/SOURCES.txt +2 -0
- json_repair-0.29.1/src/json_repair.egg-info/entry_points.txt +2 -0
- {json_repair-0.28.4 → json_repair-0.29.1}/tests/test_json_repair.py +52 -5
- {json_repair-0.28.4 → json_repair-0.29.1}/tests/test_performance.py +5 -5
- {json_repair-0.28.4 → json_repair-0.29.1}/LICENSE +0 -0
- {json_repair-0.28.4 → json_repair-0.29.1}/setup.cfg +0 -0
- {json_repair-0.28.4 → json_repair-0.29.1}/src/json_repair/__init__.py +0 -0
- {json_repair-0.28.4 → json_repair-0.29.1}/src/json_repair/py.typed +0 -0
- {json_repair-0.28.4 → json_repair-0.29.1}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.28.4 → json_repair-0.29.1}/src/json_repair.egg-info/top_level.txt +0 -0
- {json_repair-0.28.4 → json_repair-0.29.1}/tests/test_coverage.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.29.1
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -150,6 +150,31 @@ Some rules of thumb to use:
|
|
150
150
|
- `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
|
151
151
|
- If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
|
152
152
|
|
153
|
+
### Use json_repair from CLI
|
154
|
+
|
155
|
+
Install the library for command-line with:
|
156
|
+
```
|
157
|
+
pipx install json-repair
|
158
|
+
```
|
159
|
+
to know all options available:
|
160
|
+
```
|
161
|
+
$ json_repair -h
|
162
|
+
usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] filename
|
163
|
+
|
164
|
+
Repair and parse JSON files.
|
165
|
+
|
166
|
+
positional arguments:
|
167
|
+
filename The JSON file to repair
|
168
|
+
|
169
|
+
options:
|
170
|
+
-h, --help show this help message and exit
|
171
|
+
-i, --inline Replace the file inline instead of returning the output to stdout
|
172
|
+
-o TARGET, --output TARGET
|
173
|
+
If specified, the output will be written to TARGET filename instead of stdout
|
174
|
+
--ensure_ascii Pass ensure_ascii=True to json.dumps()
|
175
|
+
--indent INDENT Number of spaces for indentation (Default 2)
|
176
|
+
```
|
177
|
+
|
153
178
|
## Adding to requirements
|
154
179
|
**Please pin this library only on the major version!**
|
155
180
|
|
@@ -112,6 +112,31 @@ Some rules of thumb to use:
|
|
112
112
|
- `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
|
113
113
|
- If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
|
114
114
|
|
115
|
+
### Use json_repair from CLI
|
116
|
+
|
117
|
+
Install the library for command-line with:
|
118
|
+
```
|
119
|
+
pipx install json-repair
|
120
|
+
```
|
121
|
+
to know all options available:
|
122
|
+
```
|
123
|
+
$ json_repair -h
|
124
|
+
usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] filename
|
125
|
+
|
126
|
+
Repair and parse JSON files.
|
127
|
+
|
128
|
+
positional arguments:
|
129
|
+
filename The JSON file to repair
|
130
|
+
|
131
|
+
options:
|
132
|
+
-h, --help show this help message and exit
|
133
|
+
-i, --inline Replace the file inline instead of returning the output to stdout
|
134
|
+
-o TARGET, --output TARGET
|
135
|
+
If specified, the output will be written to TARGET filename instead of stdout
|
136
|
+
--ensure_ascii Pass ensure_ascii=True to json.dumps()
|
137
|
+
--indent INDENT Number of spaces for indentation (Default 2)
|
138
|
+
```
|
139
|
+
|
115
140
|
## Adding to requirements
|
116
141
|
**Please pin this library only on the major version!**
|
117
142
|
|
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.
|
6
|
+
version = "0.29.1"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -29,3 +29,5 @@ pythonpath = [
|
|
29
29
|
"pkgname" = ["py.typed"]
|
30
30
|
[tool.setuptools.packages.find]
|
31
31
|
where = ["src"]
|
32
|
+
[project.scripts]
|
33
|
+
json_repair = "json_repair.__main__:cli"
|
@@ -22,7 +22,9 @@ If something is wrong (a missing parantheses or quotes for example) it will use
|
|
22
22
|
All supported use cases are in the unit tests
|
23
23
|
"""
|
24
24
|
|
25
|
+
import argparse
|
25
26
|
import os
|
27
|
+
import sys
|
26
28
|
import json
|
27
29
|
from typing import Any, Dict, List, Optional, Union, TextIO, Tuple, Literal
|
28
30
|
|
@@ -40,14 +42,16 @@ class StringFileWrapper:
|
|
40
42
|
CHUNK_LENGTH = 1_000_000
|
41
43
|
self.buffer_length = CHUNK_LENGTH
|
42
44
|
|
43
|
-
def
|
45
|
+
def get_buffer(self, index: int) -> str:
|
44
46
|
if self.buffers.get(index) is None:
|
45
47
|
self.fd.seek(index * self.buffer_length)
|
46
48
|
self.buffers[index] = self.fd.read(self.buffer_length)
|
47
49
|
# Save memory by keeping max 2MB buffer chunks and min 2 chunks
|
48
50
|
if len(self.buffers) > max(2, 2_000_000 / self.buffer_length):
|
49
51
|
oldest_key = next(iter(self.buffers))
|
50
|
-
|
52
|
+
if oldest_key != index:
|
53
|
+
self.buffers.pop(oldest_key)
|
54
|
+
return self.buffers[index]
|
51
55
|
|
52
56
|
def __getitem__(self, index: Union[int, slice]) -> str:
|
53
57
|
# The buffer is an array that is seek like a RAM:
|
@@ -56,25 +60,24 @@ class StringFileWrapper:
|
|
56
60
|
if isinstance(index, slice):
|
57
61
|
buffer_index = index.start // self.buffer_length
|
58
62
|
buffer_end = index.stop // self.buffer_length
|
59
|
-
for i in range(buffer_index, buffer_end + 1):
|
60
|
-
self.fill_buffer(i)
|
61
63
|
if buffer_index == buffer_end:
|
62
|
-
return self.
|
64
|
+
return self.get_buffer(buffer_index)[
|
63
65
|
index.start % self.buffer_length : index.stop % self.buffer_length
|
64
66
|
]
|
65
67
|
else:
|
66
|
-
start_slice = self.
|
68
|
+
start_slice = self.get_buffer(buffer_index)[
|
67
69
|
index.start % self.buffer_length :
|
68
70
|
]
|
69
|
-
end_slice = self.
|
71
|
+
end_slice = self.get_buffer(buffer_end)[
|
72
|
+
: index.stop % self.buffer_length
|
73
|
+
]
|
70
74
|
middle_slices = [
|
71
|
-
self.
|
75
|
+
self.get_buffer(i) for i in range(buffer_index + 1, buffer_end)
|
72
76
|
]
|
73
77
|
return start_slice + "".join(middle_slices) + end_slice
|
74
78
|
else:
|
75
79
|
buffer_index = index // self.buffer_length
|
76
|
-
self.
|
77
|
-
return self.buffers[buffer_index][index % self.buffer_length]
|
80
|
+
return self.get_buffer(buffer_index)[index % self.buffer_length]
|
78
81
|
|
79
82
|
def __len__(self) -> int:
|
80
83
|
if self.length < 1:
|
@@ -755,3 +758,65 @@ def from_file(
|
|
755
758
|
fd.close()
|
756
759
|
|
757
760
|
return jsonobj
|
761
|
+
|
762
|
+
|
763
|
+
def cli(inline_args: Optional[List[str]] = None) -> int:
|
764
|
+
parser = argparse.ArgumentParser(description="Repair and parse JSON files.")
|
765
|
+
parser.add_argument("filename", help="The JSON file to repair")
|
766
|
+
parser.add_argument(
|
767
|
+
"-i",
|
768
|
+
"--inline",
|
769
|
+
action="store_true",
|
770
|
+
help="Replace the file inline instead of returning the output to stdout",
|
771
|
+
)
|
772
|
+
parser.add_argument(
|
773
|
+
"-o",
|
774
|
+
"--output",
|
775
|
+
metavar="TARGET",
|
776
|
+
help="If specified, the output will be written to TARGET filename instead of stdout",
|
777
|
+
)
|
778
|
+
parser.add_argument(
|
779
|
+
"--ensure_ascii",
|
780
|
+
action="store_true",
|
781
|
+
help="Pass ensure_ascii=True to json.dumps()",
|
782
|
+
)
|
783
|
+
parser.add_argument(
|
784
|
+
"--indent",
|
785
|
+
type=int,
|
786
|
+
default=2,
|
787
|
+
help="Number of spaces for indentation (Default 2)",
|
788
|
+
)
|
789
|
+
|
790
|
+
if inline_args is None: # pragma: no cover
|
791
|
+
args = parser.parse_args()
|
792
|
+
else:
|
793
|
+
args = parser.parse_args(
|
794
|
+
inline_args
|
795
|
+
) # This is needed so this function is testable
|
796
|
+
|
797
|
+
if args.inline and args.output: # pragma: no cover
|
798
|
+
print("Error: You cannot pass both --inline and --output", file=sys.stderr)
|
799
|
+
sys.exit(1)
|
800
|
+
|
801
|
+
ensure_ascii = False
|
802
|
+
if args.ensure_ascii:
|
803
|
+
ensure_ascii = True
|
804
|
+
|
805
|
+
try:
|
806
|
+
result = from_file(args.filename)
|
807
|
+
|
808
|
+
if args.inline or args.output:
|
809
|
+
fd = open(args.output or args.filename, mode="w")
|
810
|
+
json.dump(result, fd, indent=args.indent, ensure_ascii=ensure_ascii)
|
811
|
+
fd.close()
|
812
|
+
else:
|
813
|
+
print(json.dumps(result, indent=args.indent, ensure_ascii=ensure_ascii))
|
814
|
+
except Exception as e: # pragma: no cover
|
815
|
+
print(f"Error: {str(e)}", file=sys.stderr)
|
816
|
+
sys.exit(1)
|
817
|
+
|
818
|
+
return 0 # Success
|
819
|
+
|
820
|
+
|
821
|
+
if __name__ == "__main__": # pragma: no cover
|
822
|
+
sys.exit(cli())
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.29.1
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -150,6 +150,31 @@ Some rules of thumb to use:
|
|
150
150
|
- `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
|
151
151
|
- If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
|
152
152
|
|
153
|
+
### Use json_repair from CLI
|
154
|
+
|
155
|
+
Install the library for command-line with:
|
156
|
+
```
|
157
|
+
pipx install json-repair
|
158
|
+
```
|
159
|
+
to know all options available:
|
160
|
+
```
|
161
|
+
$ json_repair -h
|
162
|
+
usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] filename
|
163
|
+
|
164
|
+
Repair and parse JSON files.
|
165
|
+
|
166
|
+
positional arguments:
|
167
|
+
filename The JSON file to repair
|
168
|
+
|
169
|
+
options:
|
170
|
+
-h, --help show this help message and exit
|
171
|
+
-i, --inline Replace the file inline instead of returning the output to stdout
|
172
|
+
-o TARGET, --output TARGET
|
173
|
+
If specified, the output will be written to TARGET filename instead of stdout
|
174
|
+
--ensure_ascii Pass ensure_ascii=True to json.dumps()
|
175
|
+
--indent INDENT Number of spaces for indentation (Default 2)
|
176
|
+
```
|
177
|
+
|
153
178
|
## Adding to requirements
|
154
179
|
**Please pin this library only on the major version!**
|
155
180
|
|
@@ -2,11 +2,13 @@ LICENSE
|
|
2
2
|
README.md
|
3
3
|
pyproject.toml
|
4
4
|
src/json_repair/__init__.py
|
5
|
+
src/json_repair/__main__.py
|
5
6
|
src/json_repair/json_repair.py
|
6
7
|
src/json_repair/py.typed
|
7
8
|
src/json_repair.egg-info/PKG-INFO
|
8
9
|
src/json_repair.egg-info/SOURCES.txt
|
9
10
|
src/json_repair.egg-info/dependency_links.txt
|
11
|
+
src/json_repair.egg-info/entry_points.txt
|
10
12
|
src/json_repair.egg-info/top_level.txt
|
11
13
|
tests/test_coverage.py
|
12
14
|
tests/test_json_repair.py
|
@@ -1,4 +1,8 @@
|
|
1
|
-
from src.json_repair.json_repair import from_file, repair_json, loads
|
1
|
+
from src.json_repair.json_repair import from_file, repair_json, loads, cli
|
2
|
+
from unittest.mock import patch
|
3
|
+
import os.path
|
4
|
+
import pathlib
|
5
|
+
import tempfile
|
2
6
|
|
3
7
|
def test_basic_types_valid():
|
4
8
|
assert repair_json("True", return_objects=True) == ""
|
@@ -225,10 +229,6 @@ def test_repair_json_skip_json_loads():
|
|
225
229
|
|
226
230
|
|
227
231
|
def test_repair_json_from_file():
|
228
|
-
import os.path
|
229
|
-
import pathlib
|
230
|
-
import tempfile
|
231
|
-
|
232
232
|
path = pathlib.Path(__file__).parent.resolve()
|
233
233
|
|
234
234
|
# Use chunk_length 2 to test the buffering feature
|
@@ -263,3 +263,50 @@ def test_repair_json_from_file():
|
|
263
263
|
|
264
264
|
def test_ensure_ascii():
|
265
265
|
assert repair_json("{'test_中国人_ascii':'统一码'}", ensure_ascii=False) == '{"test_中国人_ascii": "统一码"}'
|
266
|
+
|
267
|
+
|
268
|
+
|
269
|
+
def test_cli(capsys):
|
270
|
+
# Create a temporary file
|
271
|
+
temp_fd, temp_path = tempfile.mkstemp(suffix=".json")
|
272
|
+
try:
|
273
|
+
# Write content to the temporary file
|
274
|
+
with os.fdopen(temp_fd, 'w') as tmp:
|
275
|
+
tmp.write("{key:value")
|
276
|
+
cli(inline_args=[temp_path, '--indent', 0, '--ensure_ascii'])
|
277
|
+
captured = capsys.readouterr()
|
278
|
+
assert captured.out == '{\n"key": "value"\n}\n'
|
279
|
+
|
280
|
+
# Test the output option
|
281
|
+
tempout_fd, tempout_path = tempfile.mkstemp(suffix=".json")
|
282
|
+
cli(inline_args=[temp_path, '--indent', 0, '-o', tempout_path])
|
283
|
+
with open(tempout_path, 'r') as tmp:
|
284
|
+
out = tmp.read()
|
285
|
+
assert out == '{\n"key": "value"\n}'
|
286
|
+
|
287
|
+
# Test the inline option
|
288
|
+
cli(inline_args=[temp_path, '--indent', 0, '-i'])
|
289
|
+
with open(temp_path, 'r') as tmp:
|
290
|
+
out = tmp.read()
|
291
|
+
assert out == '{\n"key": "value"\n}'
|
292
|
+
|
293
|
+
|
294
|
+
finally:
|
295
|
+
# Clean up - delete the temporary file
|
296
|
+
os.remove(temp_path)
|
297
|
+
os.remove(tempout_path)
|
298
|
+
|
299
|
+
"""
|
300
|
+
def test_cli_inline(sample_json_file):
|
301
|
+
with patch('sys.argv', ['json_repair', sample_json_file, '-i']):
|
302
|
+
cli()
|
303
|
+
with open(sample_json_file, 'r') as f:
|
304
|
+
assert json.load(f) == {"key": "value"}
|
305
|
+
|
306
|
+
def test_cli_output_file(sample_json_file, tmp_path):
|
307
|
+
output_file = tmp_path / "output.json"
|
308
|
+
with patch('sys.argv', ['json_repair', sample_json_file, '-o', str(output_file)]):
|
309
|
+
cli()
|
310
|
+
with open(output_file, 'r') as f:
|
311
|
+
assert json.load(f) == {"key": "value"}
|
312
|
+
"""
|
@@ -19,7 +19,7 @@ def test_true_true_correct(benchmark):
|
|
19
19
|
mean_time = benchmark.stats.get("median")
|
20
20
|
|
21
21
|
# Define your time threshold in seconds
|
22
|
-
max_time =
|
22
|
+
max_time = 15 / 10 ** 4 # 1.5 millisecond
|
23
23
|
|
24
24
|
# Assert that the average time is below the threshold
|
25
25
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -31,7 +31,7 @@ def test_true_true_incorrect(benchmark):
|
|
31
31
|
mean_time = benchmark.stats.get("median")
|
32
32
|
|
33
33
|
# Define your time threshold in seconds
|
34
|
-
max_time =
|
34
|
+
max_time = 15 / 10 ** 4 # 1.5 millisecond
|
35
35
|
|
36
36
|
# Assert that the average time is below the threshold
|
37
37
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -53,7 +53,7 @@ def test_true_false_incorrect(benchmark):
|
|
53
53
|
mean_time = benchmark.stats.get("median")
|
54
54
|
|
55
55
|
# Define your time threshold in seconds
|
56
|
-
max_time =
|
56
|
+
max_time = 15 / 10 ** 4 # 1.5 millisecond
|
57
57
|
|
58
58
|
# Assert that the average time is below the threshold
|
59
59
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -64,7 +64,7 @@ def test_false_true_correct(benchmark):
|
|
64
64
|
mean_time = benchmark.stats.get("median")
|
65
65
|
|
66
66
|
# Define your time threshold in seconds
|
67
|
-
max_time =
|
67
|
+
max_time = 15 / 10 ** 4 # 1.5 millisecond
|
68
68
|
|
69
69
|
# Assert that the average time is below the threshold
|
70
70
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -75,7 +75,7 @@ def test_false_true_incorrect(benchmark):
|
|
75
75
|
mean_time = benchmark.stats.get("median")
|
76
76
|
|
77
77
|
# Define your time threshold in seconds
|
78
|
-
max_time =
|
78
|
+
max_time = 15 / 10 ** 4 # 1.5 millisecond
|
79
79
|
|
80
80
|
# Assert that the average time is below the threshold
|
81
81
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|