json-repair 0.29.0__tar.gz → 0.29.2__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {json_repair-0.29.0/src/json_repair.egg-info → json_repair-0.29.2}/PKG-INFO +10 -10
- {json_repair-0.29.0 → json_repair-0.29.2}/README.md +9 -9
- {json_repair-0.29.0 → json_repair-0.29.2}/pyproject.toml +1 -1
- {json_repair-0.29.0 → json_repair-0.29.2}/src/json_repair/json_repair.py +55 -36
- {json_repair-0.29.0 → json_repair-0.29.2/src/json_repair.egg-info}/PKG-INFO +10 -10
- {json_repair-0.29.0 → json_repair-0.29.2}/tests/test_json_repair.py +54 -5
- {json_repair-0.29.0 → json_repair-0.29.2}/tests/test_performance.py +6 -6
- {json_repair-0.29.0 → json_repair-0.29.2}/LICENSE +0 -0
- {json_repair-0.29.0 → json_repair-0.29.2}/setup.cfg +0 -0
- {json_repair-0.29.0 → json_repair-0.29.2}/src/json_repair/__init__.py +0 -0
- {json_repair-0.29.0 → json_repair-0.29.2}/src/json_repair/__main__.py +0 -0
- {json_repair-0.29.0 → json_repair-0.29.2}/src/json_repair/py.typed +0 -0
- {json_repair-0.29.0 → json_repair-0.29.2}/src/json_repair.egg-info/SOURCES.txt +0 -0
- {json_repair-0.29.0 → json_repair-0.29.2}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.29.0 → json_repair-0.29.2}/src/json_repair.egg-info/entry_points.txt +0 -0
- {json_repair-0.29.0 → json_repair-0.29.2}/src/json_repair.egg-info/top_level.txt +0 -0
- {json_repair-0.29.0 → json_repair-0.29.2}/tests/test_coverage.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.29.
|
3
|
+
Version: 0.29.2
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -156,24 +156,24 @@ Install the library for command-line with:
|
|
156
156
|
```
|
157
157
|
pipx install json-repair
|
158
158
|
```
|
159
|
-
|
159
|
+
to know all options available:
|
160
160
|
```
|
161
161
|
$ json_repair -h
|
162
|
-
|
163
|
-
usage: json_repair [-h] [-i] [--ensure_ascii] [--indent INDENT] filename
|
162
|
+
usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] filename
|
164
163
|
|
165
164
|
Repair and parse JSON files.
|
166
165
|
|
167
166
|
positional arguments:
|
168
|
-
filename
|
167
|
+
filename The JSON file to repair
|
169
168
|
|
170
169
|
options:
|
171
|
-
-h, --help
|
172
|
-
-i, --inline
|
173
|
-
|
174
|
-
|
170
|
+
-h, --help show this help message and exit
|
171
|
+
-i, --inline Replace the file inline instead of returning the output to stdout
|
172
|
+
-o TARGET, --output TARGET
|
173
|
+
If specified, the output will be written to TARGET filename instead of stdout
|
174
|
+
--ensure_ascii Pass ensure_ascii=True to json.dumps()
|
175
|
+
--indent INDENT Number of spaces for indentation (Default 2)
|
175
176
|
```
|
176
|
-
to learn how to use it
|
177
177
|
|
178
178
|
## Adding to requirements
|
179
179
|
**Please pin this library only on the major version!**
|
@@ -118,24 +118,24 @@ Install the library for command-line with:
|
|
118
118
|
```
|
119
119
|
pipx install json-repair
|
120
120
|
```
|
121
|
-
|
121
|
+
to know all options available:
|
122
122
|
```
|
123
123
|
$ json_repair -h
|
124
|
-
|
125
|
-
usage: json_repair [-h] [-i] [--ensure_ascii] [--indent INDENT] filename
|
124
|
+
usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] filename
|
126
125
|
|
127
126
|
Repair and parse JSON files.
|
128
127
|
|
129
128
|
positional arguments:
|
130
|
-
filename
|
129
|
+
filename The JSON file to repair
|
131
130
|
|
132
131
|
options:
|
133
|
-
-h, --help
|
134
|
-
-i, --inline
|
135
|
-
|
136
|
-
|
132
|
+
-h, --help show this help message and exit
|
133
|
+
-i, --inline Replace the file inline instead of returning the output to stdout
|
134
|
+
-o TARGET, --output TARGET
|
135
|
+
If specified, the output will be written to TARGET filename instead of stdout
|
136
|
+
--ensure_ascii Pass ensure_ascii=True to json.dumps()
|
137
|
+
--indent INDENT Number of spaces for indentation (Default 2)
|
137
138
|
```
|
138
|
-
to learn how to use it
|
139
139
|
|
140
140
|
## Adding to requirements
|
141
141
|
**Please pin this library only on the major version!**
|
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.29.
|
6
|
+
version = "0.29.2"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -384,38 +384,39 @@ class JSONParser:
|
|
384
384
|
# * If we are fixing missing quotes in an object, when it finds the special terminators
|
385
385
|
char = self.get_char_at()
|
386
386
|
while char and char != rstring_delimiter:
|
387
|
-
if
|
388
|
-
|
389
|
-
|
390
|
-
)
|
387
|
+
if (
|
388
|
+
missing_quotes
|
389
|
+
and self.get_context() == "object_key"
|
390
|
+
and (char == ":" or char.isspace())
|
391
|
+
):
|
392
|
+
self.log(
|
393
|
+
"While parsing a string missing the left delimiter in object key context, we found a :, stopping here",
|
394
|
+
"info",
|
395
|
+
)
|
396
|
+
break
|
397
|
+
if self.get_context() == "object_value" and char in [",", "}"]:
|
398
|
+
rstring_delimiter_missing = True
|
399
|
+
# check if this is a case in which the closing comma is NOT missing instead
|
400
|
+
i = 1
|
401
|
+
next_c = self.get_char_at(i)
|
402
|
+
while next_c and next_c != rstring_delimiter:
|
403
|
+
i += 1
|
404
|
+
next_c = self.get_char_at(i)
|
405
|
+
if next_c:
|
406
|
+
i += 1
|
407
|
+
next_c = self.get_char_at(i)
|
408
|
+
# found a delimiter, now we need to check that is followed strictly by a comma or brace
|
409
|
+
while next_c and next_c.isspace():
|
410
|
+
i += 1
|
411
|
+
next_c = self.get_char_at(i)
|
412
|
+
if next_c and next_c in [",", "}"]:
|
413
|
+
rstring_delimiter_missing = False
|
414
|
+
if rstring_delimiter_missing:
|
391
415
|
self.log(
|
392
|
-
"While parsing a string missing the left delimiter in object
|
416
|
+
"While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn't determine that a right delimiter was present. Stopping here",
|
393
417
|
"info",
|
394
418
|
)
|
395
419
|
break
|
396
|
-
elif self.get_context() == "object_value" and char in [",", "}"]:
|
397
|
-
rstring_delimiter_missing = True
|
398
|
-
# check if this is a case in which the closing comma is NOT missing instead
|
399
|
-
i = 1
|
400
|
-
next_c = self.get_char_at(i)
|
401
|
-
while next_c and next_c != rstring_delimiter:
|
402
|
-
i += 1
|
403
|
-
next_c = self.get_char_at(i)
|
404
|
-
if next_c:
|
405
|
-
i += 1
|
406
|
-
next_c = self.get_char_at(i)
|
407
|
-
# found a delimiter, now we need to check that is followed strictly by a comma or brace
|
408
|
-
while next_c and next_c.isspace():
|
409
|
-
i += 1
|
410
|
-
next_c = self.get_char_at(i)
|
411
|
-
if next_c and next_c in [",", "}"]:
|
412
|
-
rstring_delimiter_missing = False
|
413
|
-
if rstring_delimiter_missing:
|
414
|
-
self.log(
|
415
|
-
"While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn't determine that a right delimiter was present. Stopping here",
|
416
|
-
"info",
|
417
|
-
)
|
418
|
-
break
|
419
420
|
string_acc += char
|
420
421
|
self.index += 1
|
421
422
|
char = self.get_char_at()
|
@@ -507,7 +508,7 @@ class JSONParser:
|
|
507
508
|
if next_c == "}":
|
508
509
|
# OK this is valid then
|
509
510
|
self.log(
|
510
|
-
"While parsing a string, we a
|
511
|
+
"While parsing a string, we misplaced a quote that would have closed the string but has a different meaning here since this is the last element of the object, ignoring it",
|
511
512
|
"info",
|
512
513
|
)
|
513
514
|
string_acc += str(char)
|
@@ -760,7 +761,7 @@ def from_file(
|
|
760
761
|
return jsonobj
|
761
762
|
|
762
763
|
|
763
|
-
def cli(
|
764
|
+
def cli(inline_args: Optional[List[str]] = None) -> int:
|
764
765
|
parser = argparse.ArgumentParser(description="Repair and parse JSON files.")
|
765
766
|
parser.add_argument("filename", help="The JSON file to repair")
|
766
767
|
parser.add_argument(
|
@@ -769,10 +770,16 @@ def cli(): # pragma: no cover
|
|
769
770
|
action="store_true",
|
770
771
|
help="Replace the file inline instead of returning the output to stdout",
|
771
772
|
)
|
773
|
+
parser.add_argument(
|
774
|
+
"-o",
|
775
|
+
"--output",
|
776
|
+
metavar="TARGET",
|
777
|
+
help="If specified, the output will be written to TARGET filename instead of stdout",
|
778
|
+
)
|
772
779
|
parser.add_argument(
|
773
780
|
"--ensure_ascii",
|
774
781
|
action="store_true",
|
775
|
-
help="Pass
|
782
|
+
help="Pass ensure_ascii=True to json.dumps()",
|
776
783
|
)
|
777
784
|
parser.add_argument(
|
778
785
|
"--indent",
|
@@ -781,24 +788,36 @@ def cli(): # pragma: no cover
|
|
781
788
|
help="Number of spaces for indentation (Default 2)",
|
782
789
|
)
|
783
790
|
|
784
|
-
|
791
|
+
if inline_args is None: # pragma: no cover
|
792
|
+
args = parser.parse_args()
|
793
|
+
else:
|
794
|
+
args = parser.parse_args(
|
795
|
+
inline_args
|
796
|
+
) # This is needed so this function is testable
|
797
|
+
|
798
|
+
if args.inline and args.output: # pragma: no cover
|
799
|
+
print("Error: You cannot pass both --inline and --output", file=sys.stderr)
|
800
|
+
sys.exit(1)
|
785
801
|
|
786
802
|
ensure_ascii = False
|
787
803
|
if args.ensure_ascii:
|
788
804
|
ensure_ascii = True
|
805
|
+
|
789
806
|
try:
|
790
807
|
result = from_file(args.filename)
|
791
808
|
|
792
|
-
if args.inline:
|
793
|
-
fd = open(args.filename, mode="w")
|
809
|
+
if args.inline or args.output:
|
810
|
+
fd = open(args.output or args.filename, mode="w")
|
794
811
|
json.dump(result, fd, indent=args.indent, ensure_ascii=ensure_ascii)
|
795
812
|
fd.close()
|
796
813
|
else:
|
797
814
|
print(json.dumps(result, indent=args.indent, ensure_ascii=ensure_ascii))
|
798
|
-
except Exception as e:
|
815
|
+
except Exception as e: # pragma: no cover
|
799
816
|
print(f"Error: {str(e)}", file=sys.stderr)
|
800
817
|
sys.exit(1)
|
801
818
|
|
819
|
+
return 0 # Success
|
820
|
+
|
802
821
|
|
803
822
|
if __name__ == "__main__": # pragma: no cover
|
804
|
-
cli()
|
823
|
+
sys.exit(cli())
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.29.
|
3
|
+
Version: 0.29.2
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -156,24 +156,24 @@ Install the library for command-line with:
|
|
156
156
|
```
|
157
157
|
pipx install json-repair
|
158
158
|
```
|
159
|
-
|
159
|
+
to know all options available:
|
160
160
|
```
|
161
161
|
$ json_repair -h
|
162
|
-
|
163
|
-
usage: json_repair [-h] [-i] [--ensure_ascii] [--indent INDENT] filename
|
162
|
+
usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] filename
|
164
163
|
|
165
164
|
Repair and parse JSON files.
|
166
165
|
|
167
166
|
positional arguments:
|
168
|
-
filename
|
167
|
+
filename The JSON file to repair
|
169
168
|
|
170
169
|
options:
|
171
|
-
-h, --help
|
172
|
-
-i, --inline
|
173
|
-
|
174
|
-
|
170
|
+
-h, --help show this help message and exit
|
171
|
+
-i, --inline Replace the file inline instead of returning the output to stdout
|
172
|
+
-o TARGET, --output TARGET
|
173
|
+
If specified, the output will be written to TARGET filename instead of stdout
|
174
|
+
--ensure_ascii Pass ensure_ascii=True to json.dumps()
|
175
|
+
--indent INDENT Number of spaces for indentation (Default 2)
|
175
176
|
```
|
176
|
-
to learn how to use it
|
177
177
|
|
178
178
|
## Adding to requirements
|
179
179
|
**Please pin this library only on the major version!**
|
@@ -1,4 +1,8 @@
|
|
1
|
-
from src.json_repair.json_repair import from_file, repair_json, loads
|
1
|
+
from src.json_repair.json_repair import from_file, repair_json, loads, cli
|
2
|
+
from unittest.mock import patch
|
3
|
+
import os.path
|
4
|
+
import pathlib
|
5
|
+
import tempfile
|
2
6
|
|
3
7
|
def test_basic_types_valid():
|
4
8
|
assert repair_json("True", return_objects=True) == ""
|
@@ -94,6 +98,8 @@ def test_missing_and_mixed_quotes():
|
|
94
98
|
repair_json('{"name": "John", "age": 30, "city": "New')
|
95
99
|
== '{"name": "John", "age": 30, "city": "New"}'
|
96
100
|
)
|
101
|
+
assert repair_json('{"name": "John", "age": 30, "city": "New York, "gender": "male"}') == '{"name": "John", "age": 30, "city": "New York", "gender": "male"}'
|
102
|
+
|
97
103
|
assert repair_json('[{"key": "value", COMMENT "notes": "lorem "ipsum", sic." }]') == '[{"key": "value", "notes": "lorem \\"ipsum\\", sic."}]'
|
98
104
|
assert repair_json('{"key": ""value"}') == '{"key": "value"}'
|
99
105
|
assert repair_json('{"key": "value", 5: "value"}') == '{"key": "value", "5": "value"}'
|
@@ -225,10 +231,6 @@ def test_repair_json_skip_json_loads():
|
|
225
231
|
|
226
232
|
|
227
233
|
def test_repair_json_from_file():
|
228
|
-
import os.path
|
229
|
-
import pathlib
|
230
|
-
import tempfile
|
231
|
-
|
232
234
|
path = pathlib.Path(__file__).parent.resolve()
|
233
235
|
|
234
236
|
# Use chunk_length 2 to test the buffering feature
|
@@ -263,3 +265,50 @@ def test_repair_json_from_file():
|
|
263
265
|
|
264
266
|
def test_ensure_ascii():
|
265
267
|
assert repair_json("{'test_中国人_ascii':'统一码'}", ensure_ascii=False) == '{"test_中国人_ascii": "统一码"}'
|
268
|
+
|
269
|
+
|
270
|
+
|
271
|
+
def test_cli(capsys):
|
272
|
+
# Create a temporary file
|
273
|
+
temp_fd, temp_path = tempfile.mkstemp(suffix=".json")
|
274
|
+
try:
|
275
|
+
# Write content to the temporary file
|
276
|
+
with os.fdopen(temp_fd, 'w') as tmp:
|
277
|
+
tmp.write("{key:value")
|
278
|
+
cli(inline_args=[temp_path, '--indent', 0, '--ensure_ascii'])
|
279
|
+
captured = capsys.readouterr()
|
280
|
+
assert captured.out == '{\n"key": "value"\n}\n'
|
281
|
+
|
282
|
+
# Test the output option
|
283
|
+
tempout_fd, tempout_path = tempfile.mkstemp(suffix=".json")
|
284
|
+
cli(inline_args=[temp_path, '--indent', 0, '-o', tempout_path])
|
285
|
+
with open(tempout_path, 'r') as tmp:
|
286
|
+
out = tmp.read()
|
287
|
+
assert out == '{\n"key": "value"\n}'
|
288
|
+
|
289
|
+
# Test the inline option
|
290
|
+
cli(inline_args=[temp_path, '--indent', 0, '-i'])
|
291
|
+
with open(temp_path, 'r') as tmp:
|
292
|
+
out = tmp.read()
|
293
|
+
assert out == '{\n"key": "value"\n}'
|
294
|
+
|
295
|
+
|
296
|
+
finally:
|
297
|
+
# Clean up - delete the temporary file
|
298
|
+
os.remove(temp_path)
|
299
|
+
os.remove(tempout_path)
|
300
|
+
|
301
|
+
"""
|
302
|
+
def test_cli_inline(sample_json_file):
|
303
|
+
with patch('sys.argv', ['json_repair', sample_json_file, '-i']):
|
304
|
+
cli()
|
305
|
+
with open(sample_json_file, 'r') as f:
|
306
|
+
assert json.load(f) == {"key": "value"}
|
307
|
+
|
308
|
+
def test_cli_output_file(sample_json_file, tmp_path):
|
309
|
+
output_file = tmp_path / "output.json"
|
310
|
+
with patch('sys.argv', ['json_repair', sample_json_file, '-o', str(output_file)]):
|
311
|
+
cli()
|
312
|
+
with open(output_file, 'r') as f:
|
313
|
+
assert json.load(f) == {"key": "value"}
|
314
|
+
"""
|
@@ -19,7 +19,7 @@ def test_true_true_correct(benchmark):
|
|
19
19
|
mean_time = benchmark.stats.get("median")
|
20
20
|
|
21
21
|
# Define your time threshold in seconds
|
22
|
-
max_time =
|
22
|
+
max_time = 2 / 10 ** 3 # 2 millisecond
|
23
23
|
|
24
24
|
# Assert that the average time is below the threshold
|
25
25
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -31,7 +31,7 @@ def test_true_true_incorrect(benchmark):
|
|
31
31
|
mean_time = benchmark.stats.get("median")
|
32
32
|
|
33
33
|
# Define your time threshold in seconds
|
34
|
-
max_time =
|
34
|
+
max_time = 2 / 10 ** 3 # 2 millisecond
|
35
35
|
|
36
36
|
# Assert that the average time is below the threshold
|
37
37
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -53,7 +53,7 @@ def test_true_false_incorrect(benchmark):
|
|
53
53
|
mean_time = benchmark.stats.get("median")
|
54
54
|
|
55
55
|
# Define your time threshold in seconds
|
56
|
-
max_time =
|
56
|
+
max_time = 2 / 10 ** 3 # 2 millisecond
|
57
57
|
|
58
58
|
# Assert that the average time is below the threshold
|
59
59
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -64,7 +64,7 @@ def test_false_true_correct(benchmark):
|
|
64
64
|
mean_time = benchmark.stats.get("median")
|
65
65
|
|
66
66
|
# Define your time threshold in seconds
|
67
|
-
max_time =
|
67
|
+
max_time = 2 / 10 ** 3 # 2 millisecond
|
68
68
|
|
69
69
|
# Assert that the average time is below the threshold
|
70
70
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -75,7 +75,7 @@ def test_false_true_incorrect(benchmark):
|
|
75
75
|
mean_time = benchmark.stats.get("median")
|
76
76
|
|
77
77
|
# Define your time threshold in seconds
|
78
|
-
max_time =
|
78
|
+
max_time = 2 / 10 ** 3 # 2 millisecond
|
79
79
|
|
80
80
|
# Assert that the average time is below the threshold
|
81
81
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -97,7 +97,7 @@ def test_false_false_incorrect(benchmark):
|
|
97
97
|
mean_time = benchmark.stats.get("median")
|
98
98
|
|
99
99
|
# Define your time threshold in seconds
|
100
|
-
max_time =
|
100
|
+
max_time = 2 / 10 ** 3 # 2 millisecond
|
101
101
|
|
102
102
|
# Assert that the average time is below the threshold
|
103
103
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|