json-repair 0.28.4__py3-none-any.whl → 0.29.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/__main__.py +4 -0
- json_repair/json_repair.py +57 -10
- {json_repair-0.28.4.dist-info → json_repair-0.29.0.dist-info}/METADATA +26 -1
- json_repair-0.29.0.dist-info/RECORD +10 -0
- {json_repair-0.28.4.dist-info → json_repair-0.29.0.dist-info}/WHEEL +1 -1
- json_repair-0.29.0.dist-info/entry_points.txt +2 -0
- json_repair-0.28.4.dist-info/RECORD +0 -8
- {json_repair-0.28.4.dist-info → json_repair-0.29.0.dist-info}/LICENSE +0 -0
- {json_repair-0.28.4.dist-info → json_repair-0.29.0.dist-info}/top_level.txt +0 -0
json_repair/__main__.py
ADDED
json_repair/json_repair.py
CHANGED
@@ -22,7 +22,9 @@ If something is wrong (a missing parantheses or quotes for example) it will use
|
|
22
22
|
All supported use cases are in the unit tests
|
23
23
|
"""
|
24
24
|
|
25
|
+
import argparse
|
25
26
|
import os
|
27
|
+
import sys
|
26
28
|
import json
|
27
29
|
from typing import Any, Dict, List, Optional, Union, TextIO, Tuple, Literal
|
28
30
|
|
@@ -40,14 +42,16 @@ class StringFileWrapper:
|
|
40
42
|
CHUNK_LENGTH = 1_000_000
|
41
43
|
self.buffer_length = CHUNK_LENGTH
|
42
44
|
|
43
|
-
def
|
45
|
+
def get_buffer(self, index: int) -> str:
|
44
46
|
if self.buffers.get(index) is None:
|
45
47
|
self.fd.seek(index * self.buffer_length)
|
46
48
|
self.buffers[index] = self.fd.read(self.buffer_length)
|
47
49
|
# Save memory by keeping max 2MB buffer chunks and min 2 chunks
|
48
50
|
if len(self.buffers) > max(2, 2_000_000 / self.buffer_length):
|
49
51
|
oldest_key = next(iter(self.buffers))
|
50
|
-
|
52
|
+
if oldest_key != index:
|
53
|
+
self.buffers.pop(oldest_key)
|
54
|
+
return self.buffers[index]
|
51
55
|
|
52
56
|
def __getitem__(self, index: Union[int, slice]) -> str:
|
53
57
|
# The buffer is an array that is seek like a RAM:
|
@@ -56,25 +60,24 @@ class StringFileWrapper:
|
|
56
60
|
if isinstance(index, slice):
|
57
61
|
buffer_index = index.start // self.buffer_length
|
58
62
|
buffer_end = index.stop // self.buffer_length
|
59
|
-
for i in range(buffer_index, buffer_end + 1):
|
60
|
-
self.fill_buffer(i)
|
61
63
|
if buffer_index == buffer_end:
|
62
|
-
return self.
|
64
|
+
return self.get_buffer(buffer_index)[
|
63
65
|
index.start % self.buffer_length : index.stop % self.buffer_length
|
64
66
|
]
|
65
67
|
else:
|
66
|
-
start_slice = self.
|
68
|
+
start_slice = self.get_buffer(buffer_index)[
|
67
69
|
index.start % self.buffer_length :
|
68
70
|
]
|
69
|
-
end_slice = self.
|
71
|
+
end_slice = self.get_buffer(buffer_end)[
|
72
|
+
: index.stop % self.buffer_length
|
73
|
+
]
|
70
74
|
middle_slices = [
|
71
|
-
self.
|
75
|
+
self.get_buffer(i) for i in range(buffer_index + 1, buffer_end)
|
72
76
|
]
|
73
77
|
return start_slice + "".join(middle_slices) + end_slice
|
74
78
|
else:
|
75
79
|
buffer_index = index // self.buffer_length
|
76
|
-
self.
|
77
|
-
return self.buffers[buffer_index][index % self.buffer_length]
|
80
|
+
return self.get_buffer(buffer_index)[index % self.buffer_length]
|
78
81
|
|
79
82
|
def __len__(self) -> int:
|
80
83
|
if self.length < 1:
|
@@ -755,3 +758,47 @@ def from_file(
|
|
755
758
|
fd.close()
|
756
759
|
|
757
760
|
return jsonobj
|
761
|
+
|
762
|
+
|
763
|
+
def cli(): # pragma: no cover
|
764
|
+
parser = argparse.ArgumentParser(description="Repair and parse JSON files.")
|
765
|
+
parser.add_argument("filename", help="The JSON file to repair")
|
766
|
+
parser.add_argument(
|
767
|
+
"-i",
|
768
|
+
"--inline",
|
769
|
+
action="store_true",
|
770
|
+
help="Replace the file inline instead of returning the output to stdout",
|
771
|
+
)
|
772
|
+
parser.add_argument(
|
773
|
+
"--ensure_ascii",
|
774
|
+
action="store_true",
|
775
|
+
help="Pass the ensure_ascii parameter to json.dumps()",
|
776
|
+
)
|
777
|
+
parser.add_argument(
|
778
|
+
"--indent",
|
779
|
+
type=int,
|
780
|
+
default=2,
|
781
|
+
help="Number of spaces for indentation (Default 2)",
|
782
|
+
)
|
783
|
+
|
784
|
+
args = parser.parse_args()
|
785
|
+
|
786
|
+
ensure_ascii = False
|
787
|
+
if args.ensure_ascii:
|
788
|
+
ensure_ascii = True
|
789
|
+
try:
|
790
|
+
result = from_file(args.filename)
|
791
|
+
|
792
|
+
if args.inline:
|
793
|
+
fd = open(args.filename, mode="w")
|
794
|
+
json.dump(result, fd, indent=args.indent, ensure_ascii=ensure_ascii)
|
795
|
+
fd.close()
|
796
|
+
else:
|
797
|
+
print(json.dumps(result, indent=args.indent, ensure_ascii=ensure_ascii))
|
798
|
+
except Exception as e:
|
799
|
+
print(f"Error: {str(e)}", file=sys.stderr)
|
800
|
+
sys.exit(1)
|
801
|
+
|
802
|
+
|
803
|
+
if __name__ == "__main__": # pragma: no cover
|
804
|
+
cli()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.29.0
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -150,6 +150,31 @@ Some rules of thumb to use:
|
|
150
150
|
- `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
|
151
151
|
- If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
|
152
152
|
|
153
|
+
### Use json_repair from CLI
|
154
|
+
|
155
|
+
Install the library for command-line with:
|
156
|
+
```
|
157
|
+
pipx install json-repair
|
158
|
+
```
|
159
|
+
then run
|
160
|
+
```
|
161
|
+
$ json_repair -h
|
162
|
+
|
163
|
+
usage: json_repair [-h] [-i] [--ensure_ascii] [--indent INDENT] filename
|
164
|
+
|
165
|
+
Repair and parse JSON files.
|
166
|
+
|
167
|
+
positional arguments:
|
168
|
+
filename The JSON file to repair
|
169
|
+
|
170
|
+
options:
|
171
|
+
-h, --help show this help message and exit
|
172
|
+
-i, --inline Replace the file inline instead of returning the output to stdout
|
173
|
+
--ensure_ascii Pass the ensure_ascii parameter to json.dumps()
|
174
|
+
--indent INDENT Number of spaces for indentation (Default 2)
|
175
|
+
```
|
176
|
+
to learn how to use it
|
177
|
+
|
153
178
|
## Adding to requirements
|
154
179
|
**Please pin this library only on the major version!**
|
155
180
|
|
@@ -0,0 +1,10 @@
|
|
1
|
+
json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
|
2
|
+
json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
|
3
|
+
json_repair/json_repair.py,sha256=hltJ3Qa4qFbUD3mVKkYvFWksnCcIZqx8zamKfBpjeNs,33538
|
4
|
+
json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
json_repair-0.29.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
6
|
+
json_repair-0.29.0.dist-info/METADATA,sha256=yh0EJo-I1u0R6X-Gq9ETz0WbgmuGIhzR7Icw9W4Kee0,9630
|
7
|
+
json_repair-0.29.0.dist-info/WHEEL,sha256=uCRv0ZEik_232NlR4YDw4Pv3Ajt5bKvMH13NUU7hFuI,91
|
8
|
+
json_repair-0.29.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
|
9
|
+
json_repair-0.29.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
10
|
+
json_repair-0.29.0.dist-info/RECORD,,
|
@@ -1,8 +0,0 @@
|
|
1
|
-
json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
|
2
|
-
json_repair/json_repair.py,sha256=odtRiFJ-u8mbdw_3Djx4jADxGoBeQvot3536D6Y6K0c,32266
|
3
|
-
json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
json_repair-0.28.4.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
5
|
-
json_repair-0.28.4.dist-info/METADATA,sha256=2JB2TM0mrFC7OejTtgFrpyr2qQOw8xwX0KmLTSzephk,9019
|
6
|
-
json_repair-0.28.4.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
|
7
|
-
json_repair-0.28.4.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
8
|
-
json_repair-0.28.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|