json-repair 0.28.4__py3-none-any.whl → 0.29.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ from .json_repair import cli
2
+
3
+ if __name__ == "__main__":
4
+ cli()
@@ -22,7 +22,9 @@ If something is wrong (a missing parantheses or quotes for example) it will use
22
22
  All supported use cases are in the unit tests
23
23
  """
24
24
 
25
+ import argparse
25
26
  import os
27
+ import sys
26
28
  import json
27
29
  from typing import Any, Dict, List, Optional, Union, TextIO, Tuple, Literal
28
30
 
@@ -40,14 +42,16 @@ class StringFileWrapper:
40
42
  CHUNK_LENGTH = 1_000_000
41
43
  self.buffer_length = CHUNK_LENGTH
42
44
 
43
- def fill_buffer(self, index: int) -> None:
45
+ def get_buffer(self, index: int) -> str:
44
46
  if self.buffers.get(index) is None:
45
47
  self.fd.seek(index * self.buffer_length)
46
48
  self.buffers[index] = self.fd.read(self.buffer_length)
47
49
  # Save memory by keeping max 2MB buffer chunks and min 2 chunks
48
50
  if len(self.buffers) > max(2, 2_000_000 / self.buffer_length):
49
51
  oldest_key = next(iter(self.buffers))
50
- self.buffers.pop(oldest_key)
52
+ if oldest_key != index:
53
+ self.buffers.pop(oldest_key)
54
+ return self.buffers[index]
51
55
 
52
56
  def __getitem__(self, index: Union[int, slice]) -> str:
53
57
  # The buffer is an array that is seek like a RAM:
@@ -56,25 +60,24 @@ class StringFileWrapper:
56
60
  if isinstance(index, slice):
57
61
  buffer_index = index.start // self.buffer_length
58
62
  buffer_end = index.stop // self.buffer_length
59
- for i in range(buffer_index, buffer_end + 1):
60
- self.fill_buffer(i)
61
63
  if buffer_index == buffer_end:
62
- return self.buffers[buffer_index][
64
+ return self.get_buffer(buffer_index)[
63
65
  index.start % self.buffer_length : index.stop % self.buffer_length
64
66
  ]
65
67
  else:
66
- start_slice = self.buffers[buffer_index][
68
+ start_slice = self.get_buffer(buffer_index)[
67
69
  index.start % self.buffer_length :
68
70
  ]
69
- end_slice = self.buffers[buffer_end][: index.stop % self.buffer_length]
71
+ end_slice = self.get_buffer(buffer_end)[
72
+ : index.stop % self.buffer_length
73
+ ]
70
74
  middle_slices = [
71
- self.buffers[i] for i in range(buffer_index + 1, buffer_end)
75
+ self.get_buffer(i) for i in range(buffer_index + 1, buffer_end)
72
76
  ]
73
77
  return start_slice + "".join(middle_slices) + end_slice
74
78
  else:
75
79
  buffer_index = index // self.buffer_length
76
- self.fill_buffer(buffer_index)
77
- return self.buffers[buffer_index][index % self.buffer_length]
80
+ return self.get_buffer(buffer_index)[index % self.buffer_length]
78
81
 
79
82
  def __len__(self) -> int:
80
83
  if self.length < 1:
@@ -755,3 +758,47 @@ def from_file(
755
758
  fd.close()
756
759
 
757
760
  return jsonobj
761
+
762
+
763
+ def cli(): # pragma: no cover
764
+ parser = argparse.ArgumentParser(description="Repair and parse JSON files.")
765
+ parser.add_argument("filename", help="The JSON file to repair")
766
+ parser.add_argument(
767
+ "-i",
768
+ "--inline",
769
+ action="store_true",
770
+ help="Replace the file inline instead of returning the output to stdout",
771
+ )
772
+ parser.add_argument(
773
+ "--ensure_ascii",
774
+ action="store_true",
775
+ help="Pass the ensure_ascii parameter to json.dumps()",
776
+ )
777
+ parser.add_argument(
778
+ "--indent",
779
+ type=int,
780
+ default=2,
781
+ help="Number of spaces for indentation (Default 2)",
782
+ )
783
+
784
+ args = parser.parse_args()
785
+
786
+ ensure_ascii = False
787
+ if args.ensure_ascii:
788
+ ensure_ascii = True
789
+ try:
790
+ result = from_file(args.filename)
791
+
792
+ if args.inline:
793
+ fd = open(args.filename, mode="w")
794
+ json.dump(result, fd, indent=args.indent, ensure_ascii=ensure_ascii)
795
+ fd.close()
796
+ else:
797
+ print(json.dumps(result, indent=args.indent, ensure_ascii=ensure_ascii))
798
+ except Exception as e:
799
+ print(f"Error: {str(e)}", file=sys.stderr)
800
+ sys.exit(1)
801
+
802
+
803
+ if __name__ == "__main__": # pragma: no cover
804
+ cli()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.28.4
3
+ Version: 0.29.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -150,6 +150,31 @@ Some rules of thumb to use:
150
150
  - `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
151
151
  - If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
152
152
 
153
+ ### Use json_repair from CLI
154
+
155
+ Install the library for command-line with:
156
+ ```
157
+ pipx install json-repair
158
+ ```
159
+ then run
160
+ ```
161
+ $ json_repair -h
162
+
163
+ usage: json_repair [-h] [-i] [--ensure_ascii] [--indent INDENT] filename
164
+
165
+ Repair and parse JSON files.
166
+
167
+ positional arguments:
168
+ filename The JSON file to repair
169
+
170
+ options:
171
+ -h, --help show this help message and exit
172
+ -i, --inline Replace the file inline instead of returning the output to stdout
173
+ --ensure_ascii Pass the ensure_ascii parameter to json.dumps()
174
+ --indent INDENT Number of spaces for indentation (Default 2)
175
+ ```
176
+ to learn how to use it
177
+
153
178
  ## Adding to requirements
154
179
  **Please pin this library only on the major version!**
155
180
 
@@ -0,0 +1,10 @@
1
+ json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
+ json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
+ json_repair/json_repair.py,sha256=hltJ3Qa4qFbUD3mVKkYvFWksnCcIZqx8zamKfBpjeNs,33538
4
+ json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ json_repair-0.29.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
6
+ json_repair-0.29.0.dist-info/METADATA,sha256=yh0EJo-I1u0R6X-Gq9ETz0WbgmuGIhzR7Icw9W4Kee0,9630
7
+ json_repair-0.29.0.dist-info/WHEEL,sha256=uCRv0ZEik_232NlR4YDw4Pv3Ajt5bKvMH13NUU7hFuI,91
8
+ json_repair-0.29.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
9
+ json_repair-0.29.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
10
+ json_repair-0.29.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (74.0.0)
2
+ Generator: setuptools (74.1.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ json_repair = json_repair.__main__:cli
@@ -1,8 +0,0 @@
1
- json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
- json_repair/json_repair.py,sha256=odtRiFJ-u8mbdw_3Djx4jADxGoBeQvot3536D6Y6K0c,32266
3
- json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- json_repair-0.28.4.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
5
- json_repair-0.28.4.dist-info/METADATA,sha256=2JB2TM0mrFC7OejTtgFrpyr2qQOw8xwX0KmLTSzephk,9019
6
- json_repair-0.28.4.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
7
- json_repair-0.28.4.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
8
- json_repair-0.28.4.dist-info/RECORD,,