json-repair 0.28.4__py3-none-any.whl → 0.29.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- json_repair/__main__.py +4 -0
- json_repair/json_repair.py +57 -10
- {json_repair-0.28.4.dist-info → json_repair-0.29.0.dist-info}/METADATA +26 -1
- json_repair-0.29.0.dist-info/RECORD +10 -0
- {json_repair-0.28.4.dist-info → json_repair-0.29.0.dist-info}/WHEEL +1 -1
- json_repair-0.29.0.dist-info/entry_points.txt +2 -0
- json_repair-0.28.4.dist-info/RECORD +0 -8
- {json_repair-0.28.4.dist-info → json_repair-0.29.0.dist-info}/LICENSE +0 -0
- {json_repair-0.28.4.dist-info → json_repair-0.29.0.dist-info}/top_level.txt +0 -0
json_repair/__main__.py
ADDED
json_repair/json_repair.py
CHANGED
@@ -22,7 +22,9 @@ If something is wrong (a missing parantheses or quotes for example) it will use
|
|
22
22
|
All supported use cases are in the unit tests
|
23
23
|
"""
|
24
24
|
|
25
|
+
import argparse
|
25
26
|
import os
|
27
|
+
import sys
|
26
28
|
import json
|
27
29
|
from typing import Any, Dict, List, Optional, Union, TextIO, Tuple, Literal
|
28
30
|
|
@@ -40,14 +42,16 @@ class StringFileWrapper:
|
|
40
42
|
CHUNK_LENGTH = 1_000_000
|
41
43
|
self.buffer_length = CHUNK_LENGTH
|
42
44
|
|
43
|
-
def
|
45
|
+
def get_buffer(self, index: int) -> str:
|
44
46
|
if self.buffers.get(index) is None:
|
45
47
|
self.fd.seek(index * self.buffer_length)
|
46
48
|
self.buffers[index] = self.fd.read(self.buffer_length)
|
47
49
|
# Save memory by keeping max 2MB buffer chunks and min 2 chunks
|
48
50
|
if len(self.buffers) > max(2, 2_000_000 / self.buffer_length):
|
49
51
|
oldest_key = next(iter(self.buffers))
|
50
|
-
|
52
|
+
if oldest_key != index:
|
53
|
+
self.buffers.pop(oldest_key)
|
54
|
+
return self.buffers[index]
|
51
55
|
|
52
56
|
def __getitem__(self, index: Union[int, slice]) -> str:
|
53
57
|
# The buffer is an array that is seek like a RAM:
|
@@ -56,25 +60,24 @@ class StringFileWrapper:
|
|
56
60
|
if isinstance(index, slice):
|
57
61
|
buffer_index = index.start // self.buffer_length
|
58
62
|
buffer_end = index.stop // self.buffer_length
|
59
|
-
for i in range(buffer_index, buffer_end + 1):
|
60
|
-
self.fill_buffer(i)
|
61
63
|
if buffer_index == buffer_end:
|
62
|
-
return self.
|
64
|
+
return self.get_buffer(buffer_index)[
|
63
65
|
index.start % self.buffer_length : index.stop % self.buffer_length
|
64
66
|
]
|
65
67
|
else:
|
66
|
-
start_slice = self.
|
68
|
+
start_slice = self.get_buffer(buffer_index)[
|
67
69
|
index.start % self.buffer_length :
|
68
70
|
]
|
69
|
-
end_slice = self.
|
71
|
+
end_slice = self.get_buffer(buffer_end)[
|
72
|
+
: index.stop % self.buffer_length
|
73
|
+
]
|
70
74
|
middle_slices = [
|
71
|
-
self.
|
75
|
+
self.get_buffer(i) for i in range(buffer_index + 1, buffer_end)
|
72
76
|
]
|
73
77
|
return start_slice + "".join(middle_slices) + end_slice
|
74
78
|
else:
|
75
79
|
buffer_index = index // self.buffer_length
|
76
|
-
self.
|
77
|
-
return self.buffers[buffer_index][index % self.buffer_length]
|
80
|
+
return self.get_buffer(buffer_index)[index % self.buffer_length]
|
78
81
|
|
79
82
|
def __len__(self) -> int:
|
80
83
|
if self.length < 1:
|
@@ -755,3 +758,47 @@ def from_file(
|
|
755
758
|
fd.close()
|
756
759
|
|
757
760
|
return jsonobj
|
761
|
+
|
762
|
+
|
763
|
+
def cli(): # pragma: no cover
|
764
|
+
parser = argparse.ArgumentParser(description="Repair and parse JSON files.")
|
765
|
+
parser.add_argument("filename", help="The JSON file to repair")
|
766
|
+
parser.add_argument(
|
767
|
+
"-i",
|
768
|
+
"--inline",
|
769
|
+
action="store_true",
|
770
|
+
help="Replace the file inline instead of returning the output to stdout",
|
771
|
+
)
|
772
|
+
parser.add_argument(
|
773
|
+
"--ensure_ascii",
|
774
|
+
action="store_true",
|
775
|
+
help="Pass the ensure_ascii parameter to json.dumps()",
|
776
|
+
)
|
777
|
+
parser.add_argument(
|
778
|
+
"--indent",
|
779
|
+
type=int,
|
780
|
+
default=2,
|
781
|
+
help="Number of spaces for indentation (Default 2)",
|
782
|
+
)
|
783
|
+
|
784
|
+
args = parser.parse_args()
|
785
|
+
|
786
|
+
ensure_ascii = False
|
787
|
+
if args.ensure_ascii:
|
788
|
+
ensure_ascii = True
|
789
|
+
try:
|
790
|
+
result = from_file(args.filename)
|
791
|
+
|
792
|
+
if args.inline:
|
793
|
+
fd = open(args.filename, mode="w")
|
794
|
+
json.dump(result, fd, indent=args.indent, ensure_ascii=ensure_ascii)
|
795
|
+
fd.close()
|
796
|
+
else:
|
797
|
+
print(json.dumps(result, indent=args.indent, ensure_ascii=ensure_ascii))
|
798
|
+
except Exception as e:
|
799
|
+
print(f"Error: {str(e)}", file=sys.stderr)
|
800
|
+
sys.exit(1)
|
801
|
+
|
802
|
+
|
803
|
+
if __name__ == "__main__": # pragma: no cover
|
804
|
+
cli()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.29.0
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -150,6 +150,31 @@ Some rules of thumb to use:
|
|
150
150
|
- `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
|
151
151
|
- If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
|
152
152
|
|
153
|
+
### Use json_repair from CLI
|
154
|
+
|
155
|
+
Install the library for command-line with:
|
156
|
+
```
|
157
|
+
pipx install json-repair
|
158
|
+
```
|
159
|
+
then run
|
160
|
+
```
|
161
|
+
$ json_repair -h
|
162
|
+
|
163
|
+
usage: json_repair [-h] [-i] [--ensure_ascii] [--indent INDENT] filename
|
164
|
+
|
165
|
+
Repair and parse JSON files.
|
166
|
+
|
167
|
+
positional arguments:
|
168
|
+
filename The JSON file to repair
|
169
|
+
|
170
|
+
options:
|
171
|
+
-h, --help show this help message and exit
|
172
|
+
-i, --inline Replace the file inline instead of returning the output to stdout
|
173
|
+
--ensure_ascii Pass the ensure_ascii parameter to json.dumps()
|
174
|
+
--indent INDENT Number of spaces for indentation (Default 2)
|
175
|
+
```
|
176
|
+
to learn how to use it
|
177
|
+
|
153
178
|
## Adding to requirements
|
154
179
|
**Please pin this library only on the major version!**
|
155
180
|
|
@@ -0,0 +1,10 @@
|
|
1
|
+
json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
|
2
|
+
json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
|
3
|
+
json_repair/json_repair.py,sha256=hltJ3Qa4qFbUD3mVKkYvFWksnCcIZqx8zamKfBpjeNs,33538
|
4
|
+
json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
json_repair-0.29.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
6
|
+
json_repair-0.29.0.dist-info/METADATA,sha256=yh0EJo-I1u0R6X-Gq9ETz0WbgmuGIhzR7Icw9W4Kee0,9630
|
7
|
+
json_repair-0.29.0.dist-info/WHEEL,sha256=uCRv0ZEik_232NlR4YDw4Pv3Ajt5bKvMH13NUU7hFuI,91
|
8
|
+
json_repair-0.29.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
|
9
|
+
json_repair-0.29.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
10
|
+
json_repair-0.29.0.dist-info/RECORD,,
|
@@ -1,8 +0,0 @@
|
|
1
|
-
json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
|
2
|
-
json_repair/json_repair.py,sha256=odtRiFJ-u8mbdw_3Djx4jADxGoBeQvot3536D6Y6K0c,32266
|
3
|
-
json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
json_repair-0.28.4.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
5
|
-
json_repair-0.28.4.dist-info/METADATA,sha256=2JB2TM0mrFC7OejTtgFrpyr2qQOw8xwX0KmLTSzephk,9019
|
6
|
-
json_repair-0.28.4.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
|
7
|
-
json_repair-0.28.4.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
8
|
-
json_repair-0.28.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|