json-repair 0.29.0__tar.gz → 0.29.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {json_repair-0.29.0/src/json_repair.egg-info → json_repair-0.29.2}/PKG-INFO +10 -10
- {json_repair-0.29.0 → json_repair-0.29.2}/README.md +9 -9
- {json_repair-0.29.0 → json_repair-0.29.2}/pyproject.toml +1 -1
- {json_repair-0.29.0 → json_repair-0.29.2}/src/json_repair/json_repair.py +55 -36
- {json_repair-0.29.0 → json_repair-0.29.2/src/json_repair.egg-info}/PKG-INFO +10 -10
- {json_repair-0.29.0 → json_repair-0.29.2}/tests/test_json_repair.py +54 -5
- {json_repair-0.29.0 → json_repair-0.29.2}/tests/test_performance.py +6 -6
- {json_repair-0.29.0 → json_repair-0.29.2}/LICENSE +0 -0
- {json_repair-0.29.0 → json_repair-0.29.2}/setup.cfg +0 -0
- {json_repair-0.29.0 → json_repair-0.29.2}/src/json_repair/__init__.py +0 -0
- {json_repair-0.29.0 → json_repair-0.29.2}/src/json_repair/__main__.py +0 -0
- {json_repair-0.29.0 → json_repair-0.29.2}/src/json_repair/py.typed +0 -0
- {json_repair-0.29.0 → json_repair-0.29.2}/src/json_repair.egg-info/SOURCES.txt +0 -0
- {json_repair-0.29.0 → json_repair-0.29.2}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.29.0 → json_repair-0.29.2}/src/json_repair.egg-info/entry_points.txt +0 -0
- {json_repair-0.29.0 → json_repair-0.29.2}/src/json_repair.egg-info/top_level.txt +0 -0
- {json_repair-0.29.0 → json_repair-0.29.2}/tests/test_coverage.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.29.
|
3
|
+
Version: 0.29.2
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -156,24 +156,24 @@ Install the library for command-line with:
|
|
156
156
|
```
|
157
157
|
pipx install json-repair
|
158
158
|
```
|
159
|
-
|
159
|
+
to know all options available:
|
160
160
|
```
|
161
161
|
$ json_repair -h
|
162
|
-
|
163
|
-
usage: json_repair [-h] [-i] [--ensure_ascii] [--indent INDENT] filename
|
162
|
+
usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] filename
|
164
163
|
|
165
164
|
Repair and parse JSON files.
|
166
165
|
|
167
166
|
positional arguments:
|
168
|
-
filename
|
167
|
+
filename The JSON file to repair
|
169
168
|
|
170
169
|
options:
|
171
|
-
-h, --help
|
172
|
-
-i, --inline
|
173
|
-
|
174
|
-
|
170
|
+
-h, --help show this help message and exit
|
171
|
+
-i, --inline Replace the file inline instead of returning the output to stdout
|
172
|
+
-o TARGET, --output TARGET
|
173
|
+
If specified, the output will be written to TARGET filename instead of stdout
|
174
|
+
--ensure_ascii Pass ensure_ascii=True to json.dumps()
|
175
|
+
--indent INDENT Number of spaces for indentation (Default 2)
|
175
176
|
```
|
176
|
-
to learn how to use it
|
177
177
|
|
178
178
|
## Adding to requirements
|
179
179
|
**Please pin this library only on the major version!**
|
@@ -118,24 +118,24 @@ Install the library for command-line with:
|
|
118
118
|
```
|
119
119
|
pipx install json-repair
|
120
120
|
```
|
121
|
-
|
121
|
+
to know all options available:
|
122
122
|
```
|
123
123
|
$ json_repair -h
|
124
|
-
|
125
|
-
usage: json_repair [-h] [-i] [--ensure_ascii] [--indent INDENT] filename
|
124
|
+
usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] filename
|
126
125
|
|
127
126
|
Repair and parse JSON files.
|
128
127
|
|
129
128
|
positional arguments:
|
130
|
-
filename
|
129
|
+
filename The JSON file to repair
|
131
130
|
|
132
131
|
options:
|
133
|
-
-h, --help
|
134
|
-
-i, --inline
|
135
|
-
|
136
|
-
|
132
|
+
-h, --help show this help message and exit
|
133
|
+
-i, --inline Replace the file inline instead of returning the output to stdout
|
134
|
+
-o TARGET, --output TARGET
|
135
|
+
If specified, the output will be written to TARGET filename instead of stdout
|
136
|
+
--ensure_ascii Pass ensure_ascii=True to json.dumps()
|
137
|
+
--indent INDENT Number of spaces for indentation (Default 2)
|
137
138
|
```
|
138
|
-
to learn how to use it
|
139
139
|
|
140
140
|
## Adding to requirements
|
141
141
|
**Please pin this library only on the major version!**
|
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.29.
|
6
|
+
version = "0.29.2"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -384,38 +384,39 @@ class JSONParser:
|
|
384
384
|
# * If we are fixing missing quotes in an object, when it finds the special terminators
|
385
385
|
char = self.get_char_at()
|
386
386
|
while char and char != rstring_delimiter:
|
387
|
-
if
|
388
|
-
|
389
|
-
|
390
|
-
)
|
387
|
+
if (
|
388
|
+
missing_quotes
|
389
|
+
and self.get_context() == "object_key"
|
390
|
+
and (char == ":" or char.isspace())
|
391
|
+
):
|
392
|
+
self.log(
|
393
|
+
"While parsing a string missing the left delimiter in object key context, we found a :, stopping here",
|
394
|
+
"info",
|
395
|
+
)
|
396
|
+
break
|
397
|
+
if self.get_context() == "object_value" and char in [",", "}"]:
|
398
|
+
rstring_delimiter_missing = True
|
399
|
+
# check if this is a case in which the closing comma is NOT missing instead
|
400
|
+
i = 1
|
401
|
+
next_c = self.get_char_at(i)
|
402
|
+
while next_c and next_c != rstring_delimiter:
|
403
|
+
i += 1
|
404
|
+
next_c = self.get_char_at(i)
|
405
|
+
if next_c:
|
406
|
+
i += 1
|
407
|
+
next_c = self.get_char_at(i)
|
408
|
+
# found a delimiter, now we need to check that is followed strictly by a comma or brace
|
409
|
+
while next_c and next_c.isspace():
|
410
|
+
i += 1
|
411
|
+
next_c = self.get_char_at(i)
|
412
|
+
if next_c and next_c in [",", "}"]:
|
413
|
+
rstring_delimiter_missing = False
|
414
|
+
if rstring_delimiter_missing:
|
391
415
|
self.log(
|
392
|
-
"While parsing a string missing the left delimiter in object
|
416
|
+
"While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn't determine that a right delimiter was present. Stopping here",
|
393
417
|
"info",
|
394
418
|
)
|
395
419
|
break
|
396
|
-
elif self.get_context() == "object_value" and char in [",", "}"]:
|
397
|
-
rstring_delimiter_missing = True
|
398
|
-
# check if this is a case in which the closing comma is NOT missing instead
|
399
|
-
i = 1
|
400
|
-
next_c = self.get_char_at(i)
|
401
|
-
while next_c and next_c != rstring_delimiter:
|
402
|
-
i += 1
|
403
|
-
next_c = self.get_char_at(i)
|
404
|
-
if next_c:
|
405
|
-
i += 1
|
406
|
-
next_c = self.get_char_at(i)
|
407
|
-
# found a delimiter, now we need to check that is followed strictly by a comma or brace
|
408
|
-
while next_c and next_c.isspace():
|
409
|
-
i += 1
|
410
|
-
next_c = self.get_char_at(i)
|
411
|
-
if next_c and next_c in [",", "}"]:
|
412
|
-
rstring_delimiter_missing = False
|
413
|
-
if rstring_delimiter_missing:
|
414
|
-
self.log(
|
415
|
-
"While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn't determine that a right delimiter was present. Stopping here",
|
416
|
-
"info",
|
417
|
-
)
|
418
|
-
break
|
419
420
|
string_acc += char
|
420
421
|
self.index += 1
|
421
422
|
char = self.get_char_at()
|
@@ -507,7 +508,7 @@ class JSONParser:
|
|
507
508
|
if next_c == "}":
|
508
509
|
# OK this is valid then
|
509
510
|
self.log(
|
510
|
-
"While parsing a string, we a
|
511
|
+
"While parsing a string, we misplaced a quote that would have closed the string but has a different meaning here since this is the last element of the object, ignoring it",
|
511
512
|
"info",
|
512
513
|
)
|
513
514
|
string_acc += str(char)
|
@@ -760,7 +761,7 @@ def from_file(
|
|
760
761
|
return jsonobj
|
761
762
|
|
762
763
|
|
763
|
-
def cli(
|
764
|
+
def cli(inline_args: Optional[List[str]] = None) -> int:
|
764
765
|
parser = argparse.ArgumentParser(description="Repair and parse JSON files.")
|
765
766
|
parser.add_argument("filename", help="The JSON file to repair")
|
766
767
|
parser.add_argument(
|
@@ -769,10 +770,16 @@ def cli(): # pragma: no cover
|
|
769
770
|
action="store_true",
|
770
771
|
help="Replace the file inline instead of returning the output to stdout",
|
771
772
|
)
|
773
|
+
parser.add_argument(
|
774
|
+
"-o",
|
775
|
+
"--output",
|
776
|
+
metavar="TARGET",
|
777
|
+
help="If specified, the output will be written to TARGET filename instead of stdout",
|
778
|
+
)
|
772
779
|
parser.add_argument(
|
773
780
|
"--ensure_ascii",
|
774
781
|
action="store_true",
|
775
|
-
help="Pass
|
782
|
+
help="Pass ensure_ascii=True to json.dumps()",
|
776
783
|
)
|
777
784
|
parser.add_argument(
|
778
785
|
"--indent",
|
@@ -781,24 +788,36 @@ def cli(): # pragma: no cover
|
|
781
788
|
help="Number of spaces for indentation (Default 2)",
|
782
789
|
)
|
783
790
|
|
784
|
-
|
791
|
+
if inline_args is None: # pragma: no cover
|
792
|
+
args = parser.parse_args()
|
793
|
+
else:
|
794
|
+
args = parser.parse_args(
|
795
|
+
inline_args
|
796
|
+
) # This is needed so this function is testable
|
797
|
+
|
798
|
+
if args.inline and args.output: # pragma: no cover
|
799
|
+
print("Error: You cannot pass both --inline and --output", file=sys.stderr)
|
800
|
+
sys.exit(1)
|
785
801
|
|
786
802
|
ensure_ascii = False
|
787
803
|
if args.ensure_ascii:
|
788
804
|
ensure_ascii = True
|
805
|
+
|
789
806
|
try:
|
790
807
|
result = from_file(args.filename)
|
791
808
|
|
792
|
-
if args.inline:
|
793
|
-
fd = open(args.filename, mode="w")
|
809
|
+
if args.inline or args.output:
|
810
|
+
fd = open(args.output or args.filename, mode="w")
|
794
811
|
json.dump(result, fd, indent=args.indent, ensure_ascii=ensure_ascii)
|
795
812
|
fd.close()
|
796
813
|
else:
|
797
814
|
print(json.dumps(result, indent=args.indent, ensure_ascii=ensure_ascii))
|
798
|
-
except Exception as e:
|
815
|
+
except Exception as e: # pragma: no cover
|
799
816
|
print(f"Error: {str(e)}", file=sys.stderr)
|
800
817
|
sys.exit(1)
|
801
818
|
|
819
|
+
return 0 # Success
|
820
|
+
|
802
821
|
|
803
822
|
if __name__ == "__main__": # pragma: no cover
|
804
|
-
cli()
|
823
|
+
sys.exit(cli())
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.29.
|
3
|
+
Version: 0.29.2
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -156,24 +156,24 @@ Install the library for command-line with:
|
|
156
156
|
```
|
157
157
|
pipx install json-repair
|
158
158
|
```
|
159
|
-
|
159
|
+
to know all options available:
|
160
160
|
```
|
161
161
|
$ json_repair -h
|
162
|
-
|
163
|
-
usage: json_repair [-h] [-i] [--ensure_ascii] [--indent INDENT] filename
|
162
|
+
usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] filename
|
164
163
|
|
165
164
|
Repair and parse JSON files.
|
166
165
|
|
167
166
|
positional arguments:
|
168
|
-
filename
|
167
|
+
filename The JSON file to repair
|
169
168
|
|
170
169
|
options:
|
171
|
-
-h, --help
|
172
|
-
-i, --inline
|
173
|
-
|
174
|
-
|
170
|
+
-h, --help show this help message and exit
|
171
|
+
-i, --inline Replace the file inline instead of returning the output to stdout
|
172
|
+
-o TARGET, --output TARGET
|
173
|
+
If specified, the output will be written to TARGET filename instead of stdout
|
174
|
+
--ensure_ascii Pass ensure_ascii=True to json.dumps()
|
175
|
+
--indent INDENT Number of spaces for indentation (Default 2)
|
175
176
|
```
|
176
|
-
to learn how to use it
|
177
177
|
|
178
178
|
## Adding to requirements
|
179
179
|
**Please pin this library only on the major version!**
|
@@ -1,4 +1,8 @@
|
|
1
|
-
from src.json_repair.json_repair import from_file, repair_json, loads
|
1
|
+
from src.json_repair.json_repair import from_file, repair_json, loads, cli
|
2
|
+
from unittest.mock import patch
|
3
|
+
import os.path
|
4
|
+
import pathlib
|
5
|
+
import tempfile
|
2
6
|
|
3
7
|
def test_basic_types_valid():
|
4
8
|
assert repair_json("True", return_objects=True) == ""
|
@@ -94,6 +98,8 @@ def test_missing_and_mixed_quotes():
|
|
94
98
|
repair_json('{"name": "John", "age": 30, "city": "New')
|
95
99
|
== '{"name": "John", "age": 30, "city": "New"}'
|
96
100
|
)
|
101
|
+
assert repair_json('{"name": "John", "age": 30, "city": "New York, "gender": "male"}') == '{"name": "John", "age": 30, "city": "New York", "gender": "male"}'
|
102
|
+
|
97
103
|
assert repair_json('[{"key": "value", COMMENT "notes": "lorem "ipsum", sic." }]') == '[{"key": "value", "notes": "lorem \\"ipsum\\", sic."}]'
|
98
104
|
assert repair_json('{"key": ""value"}') == '{"key": "value"}'
|
99
105
|
assert repair_json('{"key": "value", 5: "value"}') == '{"key": "value", "5": "value"}'
|
@@ -225,10 +231,6 @@ def test_repair_json_skip_json_loads():
|
|
225
231
|
|
226
232
|
|
227
233
|
def test_repair_json_from_file():
|
228
|
-
import os.path
|
229
|
-
import pathlib
|
230
|
-
import tempfile
|
231
|
-
|
232
234
|
path = pathlib.Path(__file__).parent.resolve()
|
233
235
|
|
234
236
|
# Use chunk_length 2 to test the buffering feature
|
@@ -263,3 +265,50 @@ def test_repair_json_from_file():
|
|
263
265
|
|
264
266
|
def test_ensure_ascii():
|
265
267
|
assert repair_json("{'test_中国人_ascii':'统一码'}", ensure_ascii=False) == '{"test_中国人_ascii": "统一码"}'
|
268
|
+
|
269
|
+
|
270
|
+
|
271
|
+
def test_cli(capsys):
|
272
|
+
# Create a temporary file
|
273
|
+
temp_fd, temp_path = tempfile.mkstemp(suffix=".json")
|
274
|
+
try:
|
275
|
+
# Write content to the temporary file
|
276
|
+
with os.fdopen(temp_fd, 'w') as tmp:
|
277
|
+
tmp.write("{key:value")
|
278
|
+
cli(inline_args=[temp_path, '--indent', 0, '--ensure_ascii'])
|
279
|
+
captured = capsys.readouterr()
|
280
|
+
assert captured.out == '{\n"key": "value"\n}\n'
|
281
|
+
|
282
|
+
# Test the output option
|
283
|
+
tempout_fd, tempout_path = tempfile.mkstemp(suffix=".json")
|
284
|
+
cli(inline_args=[temp_path, '--indent', 0, '-o', tempout_path])
|
285
|
+
with open(tempout_path, 'r') as tmp:
|
286
|
+
out = tmp.read()
|
287
|
+
assert out == '{\n"key": "value"\n}'
|
288
|
+
|
289
|
+
# Test the inline option
|
290
|
+
cli(inline_args=[temp_path, '--indent', 0, '-i'])
|
291
|
+
with open(temp_path, 'r') as tmp:
|
292
|
+
out = tmp.read()
|
293
|
+
assert out == '{\n"key": "value"\n}'
|
294
|
+
|
295
|
+
|
296
|
+
finally:
|
297
|
+
# Clean up - delete the temporary file
|
298
|
+
os.remove(temp_path)
|
299
|
+
os.remove(tempout_path)
|
300
|
+
|
301
|
+
"""
|
302
|
+
def test_cli_inline(sample_json_file):
|
303
|
+
with patch('sys.argv', ['json_repair', sample_json_file, '-i']):
|
304
|
+
cli()
|
305
|
+
with open(sample_json_file, 'r') as f:
|
306
|
+
assert json.load(f) == {"key": "value"}
|
307
|
+
|
308
|
+
def test_cli_output_file(sample_json_file, tmp_path):
|
309
|
+
output_file = tmp_path / "output.json"
|
310
|
+
with patch('sys.argv', ['json_repair', sample_json_file, '-o', str(output_file)]):
|
311
|
+
cli()
|
312
|
+
with open(output_file, 'r') as f:
|
313
|
+
assert json.load(f) == {"key": "value"}
|
314
|
+
"""
|
@@ -19,7 +19,7 @@ def test_true_true_correct(benchmark):
|
|
19
19
|
mean_time = benchmark.stats.get("median")
|
20
20
|
|
21
21
|
# Define your time threshold in seconds
|
22
|
-
max_time =
|
22
|
+
max_time = 2 / 10 ** 3 # 2 millisecond
|
23
23
|
|
24
24
|
# Assert that the average time is below the threshold
|
25
25
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -31,7 +31,7 @@ def test_true_true_incorrect(benchmark):
|
|
31
31
|
mean_time = benchmark.stats.get("median")
|
32
32
|
|
33
33
|
# Define your time threshold in seconds
|
34
|
-
max_time =
|
34
|
+
max_time = 2 / 10 ** 3 # 2 millisecond
|
35
35
|
|
36
36
|
# Assert that the average time is below the threshold
|
37
37
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -53,7 +53,7 @@ def test_true_false_incorrect(benchmark):
|
|
53
53
|
mean_time = benchmark.stats.get("median")
|
54
54
|
|
55
55
|
# Define your time threshold in seconds
|
56
|
-
max_time =
|
56
|
+
max_time = 2 / 10 ** 3 # 2 millisecond
|
57
57
|
|
58
58
|
# Assert that the average time is below the threshold
|
59
59
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -64,7 +64,7 @@ def test_false_true_correct(benchmark):
|
|
64
64
|
mean_time = benchmark.stats.get("median")
|
65
65
|
|
66
66
|
# Define your time threshold in seconds
|
67
|
-
max_time =
|
67
|
+
max_time = 2 / 10 ** 3 # 2 millisecond
|
68
68
|
|
69
69
|
# Assert that the average time is below the threshold
|
70
70
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -75,7 +75,7 @@ def test_false_true_incorrect(benchmark):
|
|
75
75
|
mean_time = benchmark.stats.get("median")
|
76
76
|
|
77
77
|
# Define your time threshold in seconds
|
78
|
-
max_time =
|
78
|
+
max_time = 2 / 10 ** 3 # 2 millisecond
|
79
79
|
|
80
80
|
# Assert that the average time is below the threshold
|
81
81
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -97,7 +97,7 @@ def test_false_false_incorrect(benchmark):
|
|
97
97
|
mean_time = benchmark.stats.get("median")
|
98
98
|
|
99
99
|
# Define your time threshold in seconds
|
100
|
-
max_time =
|
100
|
+
max_time = 2 / 10 ** 3 # 2 millisecond
|
101
101
|
|
102
102
|
# Assert that the average time is below the threshold
|
103
103
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|