json-repair 0.28.4__tar.gz → 0.29.0__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {json_repair-0.28.4/src/json_repair.egg-info → json_repair-0.29.0}/PKG-INFO +26 -1
- {json_repair-0.28.4 → json_repair-0.29.0}/README.md +25 -0
- {json_repair-0.28.4 → json_repair-0.29.0}/pyproject.toml +3 -1
- json_repair-0.29.0/src/json_repair/__main__.py +4 -0
- {json_repair-0.28.4 → json_repair-0.29.0}/src/json_repair/json_repair.py +57 -10
- {json_repair-0.28.4 → json_repair-0.29.0/src/json_repair.egg-info}/PKG-INFO +26 -1
- {json_repair-0.28.4 → json_repair-0.29.0}/src/json_repair.egg-info/SOURCES.txt +2 -0
- json_repair-0.29.0/src/json_repair.egg-info/entry_points.txt +2 -0
- {json_repair-0.28.4 → json_repair-0.29.0}/tests/test_performance.py +5 -5
- {json_repair-0.28.4 → json_repair-0.29.0}/LICENSE +0 -0
- {json_repair-0.28.4 → json_repair-0.29.0}/setup.cfg +0 -0
- {json_repair-0.28.4 → json_repair-0.29.0}/src/json_repair/__init__.py +0 -0
- {json_repair-0.28.4 → json_repair-0.29.0}/src/json_repair/py.typed +0 -0
- {json_repair-0.28.4 → json_repair-0.29.0}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.28.4 → json_repair-0.29.0}/src/json_repair.egg-info/top_level.txt +0 -0
- {json_repair-0.28.4 → json_repair-0.29.0}/tests/test_coverage.py +0 -0
- {json_repair-0.28.4 → json_repair-0.29.0}/tests/test_json_repair.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.29.0
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -150,6 +150,31 @@ Some rules of thumb to use:
|
|
150
150
|
- `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
|
151
151
|
- If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
|
152
152
|
|
153
|
+
### Use json_repair from CLI
|
154
|
+
|
155
|
+
Install the library for command-line with:
|
156
|
+
```
|
157
|
+
pipx install json-repair
|
158
|
+
```
|
159
|
+
then run
|
160
|
+
```
|
161
|
+
$ json_repair -h
|
162
|
+
|
163
|
+
usage: json_repair [-h] [-i] [--ensure_ascii] [--indent INDENT] filename
|
164
|
+
|
165
|
+
Repair and parse JSON files.
|
166
|
+
|
167
|
+
positional arguments:
|
168
|
+
filename The JSON file to repair
|
169
|
+
|
170
|
+
options:
|
171
|
+
-h, --help show this help message and exit
|
172
|
+
-i, --inline Replace the file inline instead of returning the output to stdout
|
173
|
+
--ensure_ascii Pass the ensure_ascii parameter to json.dumps()
|
174
|
+
--indent INDENT Number of spaces for indentation (Default 2)
|
175
|
+
```
|
176
|
+
to learn how to use it
|
177
|
+
|
153
178
|
## Adding to requirements
|
154
179
|
**Please pin this library only on the major version!**
|
155
180
|
|
@@ -112,6 +112,31 @@ Some rules of thumb to use:
|
|
112
112
|
- `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
|
113
113
|
- If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
|
114
114
|
|
115
|
+
### Use json_repair from CLI
|
116
|
+
|
117
|
+
Install the library for command-line with:
|
118
|
+
```
|
119
|
+
pipx install json-repair
|
120
|
+
```
|
121
|
+
then run
|
122
|
+
```
|
123
|
+
$ json_repair -h
|
124
|
+
|
125
|
+
usage: json_repair [-h] [-i] [--ensure_ascii] [--indent INDENT] filename
|
126
|
+
|
127
|
+
Repair and parse JSON files.
|
128
|
+
|
129
|
+
positional arguments:
|
130
|
+
filename The JSON file to repair
|
131
|
+
|
132
|
+
options:
|
133
|
+
-h, --help show this help message and exit
|
134
|
+
-i, --inline Replace the file inline instead of returning the output to stdout
|
135
|
+
--ensure_ascii Pass the ensure_ascii parameter to json.dumps()
|
136
|
+
--indent INDENT Number of spaces for indentation (Default 2)
|
137
|
+
```
|
138
|
+
to learn how to use it
|
139
|
+
|
115
140
|
## Adding to requirements
|
116
141
|
**Please pin this library only on the major version!**
|
117
142
|
|
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.
|
6
|
+
version = "0.29.0"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -29,3 +29,5 @@ pythonpath = [
|
|
29
29
|
"pkgname" = ["py.typed"]
|
30
30
|
[tool.setuptools.packages.find]
|
31
31
|
where = ["src"]
|
32
|
+
[project.scripts]
|
33
|
+
json_repair = "json_repair.__main__:cli"
|
@@ -22,7 +22,9 @@ If something is wrong (a missing parantheses or quotes for example) it will use
|
|
22
22
|
All supported use cases are in the unit tests
|
23
23
|
"""
|
24
24
|
|
25
|
+
import argparse
|
25
26
|
import os
|
27
|
+
import sys
|
26
28
|
import json
|
27
29
|
from typing import Any, Dict, List, Optional, Union, TextIO, Tuple, Literal
|
28
30
|
|
@@ -40,14 +42,16 @@ class StringFileWrapper:
|
|
40
42
|
CHUNK_LENGTH = 1_000_000
|
41
43
|
self.buffer_length = CHUNK_LENGTH
|
42
44
|
|
43
|
-
def
|
45
|
+
def get_buffer(self, index: int) -> str:
|
44
46
|
if self.buffers.get(index) is None:
|
45
47
|
self.fd.seek(index * self.buffer_length)
|
46
48
|
self.buffers[index] = self.fd.read(self.buffer_length)
|
47
49
|
# Save memory by keeping max 2MB buffer chunks and min 2 chunks
|
48
50
|
if len(self.buffers) > max(2, 2_000_000 / self.buffer_length):
|
49
51
|
oldest_key = next(iter(self.buffers))
|
50
|
-
|
52
|
+
if oldest_key != index:
|
53
|
+
self.buffers.pop(oldest_key)
|
54
|
+
return self.buffers[index]
|
51
55
|
|
52
56
|
def __getitem__(self, index: Union[int, slice]) -> str:
|
53
57
|
# The buffer is an array that is seek like a RAM:
|
@@ -56,25 +60,24 @@ class StringFileWrapper:
|
|
56
60
|
if isinstance(index, slice):
|
57
61
|
buffer_index = index.start // self.buffer_length
|
58
62
|
buffer_end = index.stop // self.buffer_length
|
59
|
-
for i in range(buffer_index, buffer_end + 1):
|
60
|
-
self.fill_buffer(i)
|
61
63
|
if buffer_index == buffer_end:
|
62
|
-
return self.
|
64
|
+
return self.get_buffer(buffer_index)[
|
63
65
|
index.start % self.buffer_length : index.stop % self.buffer_length
|
64
66
|
]
|
65
67
|
else:
|
66
|
-
start_slice = self.
|
68
|
+
start_slice = self.get_buffer(buffer_index)[
|
67
69
|
index.start % self.buffer_length :
|
68
70
|
]
|
69
|
-
end_slice = self.
|
71
|
+
end_slice = self.get_buffer(buffer_end)[
|
72
|
+
: index.stop % self.buffer_length
|
73
|
+
]
|
70
74
|
middle_slices = [
|
71
|
-
self.
|
75
|
+
self.get_buffer(i) for i in range(buffer_index + 1, buffer_end)
|
72
76
|
]
|
73
77
|
return start_slice + "".join(middle_slices) + end_slice
|
74
78
|
else:
|
75
79
|
buffer_index = index // self.buffer_length
|
76
|
-
self.
|
77
|
-
return self.buffers[buffer_index][index % self.buffer_length]
|
80
|
+
return self.get_buffer(buffer_index)[index % self.buffer_length]
|
78
81
|
|
79
82
|
def __len__(self) -> int:
|
80
83
|
if self.length < 1:
|
@@ -755,3 +758,47 @@ def from_file(
|
|
755
758
|
fd.close()
|
756
759
|
|
757
760
|
return jsonobj
|
761
|
+
|
762
|
+
|
763
|
+
def cli(): # pragma: no cover
|
764
|
+
parser = argparse.ArgumentParser(description="Repair and parse JSON files.")
|
765
|
+
parser.add_argument("filename", help="The JSON file to repair")
|
766
|
+
parser.add_argument(
|
767
|
+
"-i",
|
768
|
+
"--inline",
|
769
|
+
action="store_true",
|
770
|
+
help="Replace the file inline instead of returning the output to stdout",
|
771
|
+
)
|
772
|
+
parser.add_argument(
|
773
|
+
"--ensure_ascii",
|
774
|
+
action="store_true",
|
775
|
+
help="Pass the ensure_ascii parameter to json.dumps()",
|
776
|
+
)
|
777
|
+
parser.add_argument(
|
778
|
+
"--indent",
|
779
|
+
type=int,
|
780
|
+
default=2,
|
781
|
+
help="Number of spaces for indentation (Default 2)",
|
782
|
+
)
|
783
|
+
|
784
|
+
args = parser.parse_args()
|
785
|
+
|
786
|
+
ensure_ascii = False
|
787
|
+
if args.ensure_ascii:
|
788
|
+
ensure_ascii = True
|
789
|
+
try:
|
790
|
+
result = from_file(args.filename)
|
791
|
+
|
792
|
+
if args.inline:
|
793
|
+
fd = open(args.filename, mode="w")
|
794
|
+
json.dump(result, fd, indent=args.indent, ensure_ascii=ensure_ascii)
|
795
|
+
fd.close()
|
796
|
+
else:
|
797
|
+
print(json.dumps(result, indent=args.indent, ensure_ascii=ensure_ascii))
|
798
|
+
except Exception as e:
|
799
|
+
print(f"Error: {str(e)}", file=sys.stderr)
|
800
|
+
sys.exit(1)
|
801
|
+
|
802
|
+
|
803
|
+
if __name__ == "__main__": # pragma: no cover
|
804
|
+
cli()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.29.0
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -150,6 +150,31 @@ Some rules of thumb to use:
|
|
150
150
|
- `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
|
151
151
|
- If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
|
152
152
|
|
153
|
+
### Use json_repair from CLI
|
154
|
+
|
155
|
+
Install the library for command-line with:
|
156
|
+
```
|
157
|
+
pipx install json-repair
|
158
|
+
```
|
159
|
+
then run
|
160
|
+
```
|
161
|
+
$ json_repair -h
|
162
|
+
|
163
|
+
usage: json_repair [-h] [-i] [--ensure_ascii] [--indent INDENT] filename
|
164
|
+
|
165
|
+
Repair and parse JSON files.
|
166
|
+
|
167
|
+
positional arguments:
|
168
|
+
filename The JSON file to repair
|
169
|
+
|
170
|
+
options:
|
171
|
+
-h, --help show this help message and exit
|
172
|
+
-i, --inline Replace the file inline instead of returning the output to stdout
|
173
|
+
--ensure_ascii Pass the ensure_ascii parameter to json.dumps()
|
174
|
+
--indent INDENT Number of spaces for indentation (Default 2)
|
175
|
+
```
|
176
|
+
to learn how to use it
|
177
|
+
|
153
178
|
## Adding to requirements
|
154
179
|
**Please pin this library only on the major version!**
|
155
180
|
|
@@ -2,11 +2,13 @@ LICENSE
|
|
2
2
|
README.md
|
3
3
|
pyproject.toml
|
4
4
|
src/json_repair/__init__.py
|
5
|
+
src/json_repair/__main__.py
|
5
6
|
src/json_repair/json_repair.py
|
6
7
|
src/json_repair/py.typed
|
7
8
|
src/json_repair.egg-info/PKG-INFO
|
8
9
|
src/json_repair.egg-info/SOURCES.txt
|
9
10
|
src/json_repair.egg-info/dependency_links.txt
|
11
|
+
src/json_repair.egg-info/entry_points.txt
|
10
12
|
src/json_repair.egg-info/top_level.txt
|
11
13
|
tests/test_coverage.py
|
12
14
|
tests/test_json_repair.py
|
@@ -19,7 +19,7 @@ def test_true_true_correct(benchmark):
|
|
19
19
|
mean_time = benchmark.stats.get("median")
|
20
20
|
|
21
21
|
# Define your time threshold in seconds
|
22
|
-
max_time =
|
22
|
+
max_time = 15 / 10 ** 4 # 1.5 millisecond
|
23
23
|
|
24
24
|
# Assert that the average time is below the threshold
|
25
25
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -31,7 +31,7 @@ def test_true_true_incorrect(benchmark):
|
|
31
31
|
mean_time = benchmark.stats.get("median")
|
32
32
|
|
33
33
|
# Define your time threshold in seconds
|
34
|
-
max_time =
|
34
|
+
max_time = 15 / 10 ** 4 # 1.5 millisecond
|
35
35
|
|
36
36
|
# Assert that the average time is below the threshold
|
37
37
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -53,7 +53,7 @@ def test_true_false_incorrect(benchmark):
|
|
53
53
|
mean_time = benchmark.stats.get("median")
|
54
54
|
|
55
55
|
# Define your time threshold in seconds
|
56
|
-
max_time =
|
56
|
+
max_time = 15 / 10 ** 4 # 1.5 millisecond
|
57
57
|
|
58
58
|
# Assert that the average time is below the threshold
|
59
59
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -64,7 +64,7 @@ def test_false_true_correct(benchmark):
|
|
64
64
|
mean_time = benchmark.stats.get("median")
|
65
65
|
|
66
66
|
# Define your time threshold in seconds
|
67
|
-
max_time =
|
67
|
+
max_time = 15 / 10 ** 4 # 1.5 millisecond
|
68
68
|
|
69
69
|
# Assert that the average time is below the threshold
|
70
70
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -75,7 +75,7 @@ def test_false_true_incorrect(benchmark):
|
|
75
75
|
mean_time = benchmark.stats.get("median")
|
76
76
|
|
77
77
|
# Define your time threshold in seconds
|
78
|
-
max_time =
|
78
|
+
max_time = 15 / 10 ** 4 # 1.5 millisecond
|
79
79
|
|
80
80
|
# Assert that the average time is below the threshold
|
81
81
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|