json-repair 0.28.4__py3-none-any.whl → 0.29.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,4 @@
1
+ from .json_repair import cli
2
+
3
+ if __name__ == "__main__":
4
+ cli()
@@ -22,7 +22,9 @@ If something is wrong (a missing parantheses or quotes for example) it will use
22
22
  All supported use cases are in the unit tests
23
23
  """
24
24
 
25
+ import argparse
25
26
  import os
27
+ import sys
26
28
  import json
27
29
  from typing import Any, Dict, List, Optional, Union, TextIO, Tuple, Literal
28
30
 
@@ -40,14 +42,16 @@ class StringFileWrapper:
40
42
  CHUNK_LENGTH = 1_000_000
41
43
  self.buffer_length = CHUNK_LENGTH
42
44
 
43
- def fill_buffer(self, index: int) -> None:
45
+ def get_buffer(self, index: int) -> str:
44
46
  if self.buffers.get(index) is None:
45
47
  self.fd.seek(index * self.buffer_length)
46
48
  self.buffers[index] = self.fd.read(self.buffer_length)
47
49
  # Save memory by keeping max 2MB buffer chunks and min 2 chunks
48
50
  if len(self.buffers) > max(2, 2_000_000 / self.buffer_length):
49
51
  oldest_key = next(iter(self.buffers))
50
- self.buffers.pop(oldest_key)
52
+ if oldest_key != index:
53
+ self.buffers.pop(oldest_key)
54
+ return self.buffers[index]
51
55
 
52
56
  def __getitem__(self, index: Union[int, slice]) -> str:
53
57
  # The buffer is an array that is seek like a RAM:
@@ -56,25 +60,24 @@ class StringFileWrapper:
56
60
  if isinstance(index, slice):
57
61
  buffer_index = index.start // self.buffer_length
58
62
  buffer_end = index.stop // self.buffer_length
59
- for i in range(buffer_index, buffer_end + 1):
60
- self.fill_buffer(i)
61
63
  if buffer_index == buffer_end:
62
- return self.buffers[buffer_index][
64
+ return self.get_buffer(buffer_index)[
63
65
  index.start % self.buffer_length : index.stop % self.buffer_length
64
66
  ]
65
67
  else:
66
- start_slice = self.buffers[buffer_index][
68
+ start_slice = self.get_buffer(buffer_index)[
67
69
  index.start % self.buffer_length :
68
70
  ]
69
- end_slice = self.buffers[buffer_end][: index.stop % self.buffer_length]
71
+ end_slice = self.get_buffer(buffer_end)[
72
+ : index.stop % self.buffer_length
73
+ ]
70
74
  middle_slices = [
71
- self.buffers[i] for i in range(buffer_index + 1, buffer_end)
75
+ self.get_buffer(i) for i in range(buffer_index + 1, buffer_end)
72
76
  ]
73
77
  return start_slice + "".join(middle_slices) + end_slice
74
78
  else:
75
79
  buffer_index = index // self.buffer_length
76
- self.fill_buffer(buffer_index)
77
- return self.buffers[buffer_index][index % self.buffer_length]
80
+ return self.get_buffer(buffer_index)[index % self.buffer_length]
78
81
 
79
82
  def __len__(self) -> int:
80
83
  if self.length < 1:
@@ -755,3 +758,65 @@ def from_file(
755
758
  fd.close()
756
759
 
757
760
  return jsonobj
761
+
762
+
763
+ def cli(inline_args: Optional[List[str]] = None) -> int:
764
+ parser = argparse.ArgumentParser(description="Repair and parse JSON files.")
765
+ parser.add_argument("filename", help="The JSON file to repair")
766
+ parser.add_argument(
767
+ "-i",
768
+ "--inline",
769
+ action="store_true",
770
+ help="Replace the file inline instead of returning the output to stdout",
771
+ )
772
+ parser.add_argument(
773
+ "-o",
774
+ "--output",
775
+ metavar="TARGET",
776
+ help="If specified, the output will be written to TARGET filename instead of stdout",
777
+ )
778
+ parser.add_argument(
779
+ "--ensure_ascii",
780
+ action="store_true",
781
+ help="Pass ensure_ascii=True to json.dumps()",
782
+ )
783
+ parser.add_argument(
784
+ "--indent",
785
+ type=int,
786
+ default=2,
787
+ help="Number of spaces for indentation (Default 2)",
788
+ )
789
+
790
+ if inline_args is None: # pragma: no cover
791
+ args = parser.parse_args()
792
+ else:
793
+ args = parser.parse_args(
794
+ inline_args
795
+ ) # This is needed so this function is testable
796
+
797
+ if args.inline and args.output: # pragma: no cover
798
+ print("Error: You cannot pass both --inline and --output", file=sys.stderr)
799
+ sys.exit(1)
800
+
801
+ ensure_ascii = False
802
+ if args.ensure_ascii:
803
+ ensure_ascii = True
804
+
805
+ try:
806
+ result = from_file(args.filename)
807
+
808
+ if args.inline or args.output:
809
+ fd = open(args.output or args.filename, mode="w")
810
+ json.dump(result, fd, indent=args.indent, ensure_ascii=ensure_ascii)
811
+ fd.close()
812
+ else:
813
+ print(json.dumps(result, indent=args.indent, ensure_ascii=ensure_ascii))
814
+ except Exception as e: # pragma: no cover
815
+ print(f"Error: {str(e)}", file=sys.stderr)
816
+ sys.exit(1)
817
+
818
+ return 0 # Success
819
+
820
+
821
+ if __name__ == "__main__": # pragma: no cover
822
+ sys.exit(cli())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.28.4
3
+ Version: 0.29.1
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -150,6 +150,31 @@ Some rules of thumb to use:
150
150
  - `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
151
151
  - If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
152
152
 
153
+ ### Use json_repair from CLI
154
+
155
+ Install the library for command-line with:
156
+ ```
157
+ pipx install json-repair
158
+ ```
159
+ to know all options available:
160
+ ```
161
+ $ json_repair -h
162
+ usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] filename
163
+
164
+ Repair and parse JSON files.
165
+
166
+ positional arguments:
167
+ filename The JSON file to repair
168
+
169
+ options:
170
+ -h, --help show this help message and exit
171
+ -i, --inline Replace the file inline instead of returning the output to stdout
172
+ -o TARGET, --output TARGET
173
+ If specified, the output will be written to TARGET filename instead of stdout
174
+ --ensure_ascii Pass ensure_ascii=True to json.dumps()
175
+ --indent INDENT Number of spaces for indentation (Default 2)
176
+ ```
177
+
153
178
  ## Adding to requirements
154
179
  **Please pin this library only on the major version!**
155
180
 
@@ -0,0 +1,10 @@
1
+ json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
+ json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
+ json_repair/json_repair.py,sha256=amzSIOX_wR22QCheozEzsPLA09RRc8AybBUaiIIJagI,34164
4
+ json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ json_repair-0.29.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
6
+ json_repair-0.29.1.dist-info/METADATA,sha256=q2kI12fNuayrEkqqDtVWKmagimcSgAKPHdanuQwMAtI,9787
7
+ json_repair-0.29.1.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
8
+ json_repair-0.29.1.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
9
+ json_repair-0.29.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
10
+ json_repair-0.29.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (74.0.0)
2
+ Generator: setuptools (74.1.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ json_repair = json_repair.__main__:cli
@@ -1,8 +0,0 @@
1
- json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
- json_repair/json_repair.py,sha256=odtRiFJ-u8mbdw_3Djx4jADxGoBeQvot3536D6Y6K0c,32266
3
- json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- json_repair-0.28.4.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
5
- json_repair-0.28.4.dist-info/METADATA,sha256=2JB2TM0mrFC7OejTtgFrpyr2qQOw8xwX0KmLTSzephk,9019
6
- json_repair-0.28.4.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
7
- json_repair-0.28.4.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
8
- json_repair-0.28.4.dist-info/RECORD,,