json-repair 0.28.4__py3-none-any.whl → 0.29.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ from .json_repair import cli
2
+
3
+ if __name__ == "__main__":
4
+ cli()
@@ -22,7 +22,9 @@ If something is wrong (a missing parantheses or quotes for example) it will use
22
22
  All supported use cases are in the unit tests
23
23
  """
24
24
 
25
+ import argparse
25
26
  import os
27
+ import sys
26
28
  import json
27
29
  from typing import Any, Dict, List, Optional, Union, TextIO, Tuple, Literal
28
30
 
@@ -40,14 +42,16 @@ class StringFileWrapper:
40
42
  CHUNK_LENGTH = 1_000_000
41
43
  self.buffer_length = CHUNK_LENGTH
42
44
 
43
- def fill_buffer(self, index: int) -> None:
45
+ def get_buffer(self, index: int) -> str:
44
46
  if self.buffers.get(index) is None:
45
47
  self.fd.seek(index * self.buffer_length)
46
48
  self.buffers[index] = self.fd.read(self.buffer_length)
47
49
  # Save memory by keeping max 2MB buffer chunks and min 2 chunks
48
50
  if len(self.buffers) > max(2, 2_000_000 / self.buffer_length):
49
51
  oldest_key = next(iter(self.buffers))
50
- self.buffers.pop(oldest_key)
52
+ if oldest_key != index:
53
+ self.buffers.pop(oldest_key)
54
+ return self.buffers[index]
51
55
 
52
56
  def __getitem__(self, index: Union[int, slice]) -> str:
53
57
  # The buffer is an array that is seek like a RAM:
@@ -56,25 +60,24 @@ class StringFileWrapper:
56
60
  if isinstance(index, slice):
57
61
  buffer_index = index.start // self.buffer_length
58
62
  buffer_end = index.stop // self.buffer_length
59
- for i in range(buffer_index, buffer_end + 1):
60
- self.fill_buffer(i)
61
63
  if buffer_index == buffer_end:
62
- return self.buffers[buffer_index][
64
+ return self.get_buffer(buffer_index)[
63
65
  index.start % self.buffer_length : index.stop % self.buffer_length
64
66
  ]
65
67
  else:
66
- start_slice = self.buffers[buffer_index][
68
+ start_slice = self.get_buffer(buffer_index)[
67
69
  index.start % self.buffer_length :
68
70
  ]
69
- end_slice = self.buffers[buffer_end][: index.stop % self.buffer_length]
71
+ end_slice = self.get_buffer(buffer_end)[
72
+ : index.stop % self.buffer_length
73
+ ]
70
74
  middle_slices = [
71
- self.buffers[i] for i in range(buffer_index + 1, buffer_end)
75
+ self.get_buffer(i) for i in range(buffer_index + 1, buffer_end)
72
76
  ]
73
77
  return start_slice + "".join(middle_slices) + end_slice
74
78
  else:
75
79
  buffer_index = index // self.buffer_length
76
- self.fill_buffer(buffer_index)
77
- return self.buffers[buffer_index][index % self.buffer_length]
80
+ return self.get_buffer(buffer_index)[index % self.buffer_length]
78
81
 
79
82
  def __len__(self) -> int:
80
83
  if self.length < 1:
@@ -755,3 +758,65 @@ def from_file(
755
758
  fd.close()
756
759
 
757
760
  return jsonobj
761
+
762
+
763
+ def cli(inline_args: Optional[List[str]] = None) -> int:
764
+ parser = argparse.ArgumentParser(description="Repair and parse JSON files.")
765
+ parser.add_argument("filename", help="The JSON file to repair")
766
+ parser.add_argument(
767
+ "-i",
768
+ "--inline",
769
+ action="store_true",
770
+ help="Replace the file inline instead of returning the output to stdout",
771
+ )
772
+ parser.add_argument(
773
+ "-o",
774
+ "--output",
775
+ metavar="TARGET",
776
+ help="If specified, the output will be written to TARGET filename instead of stdout",
777
+ )
778
+ parser.add_argument(
779
+ "--ensure_ascii",
780
+ action="store_true",
781
+ help="Pass ensure_ascii=True to json.dumps()",
782
+ )
783
+ parser.add_argument(
784
+ "--indent",
785
+ type=int,
786
+ default=2,
787
+ help="Number of spaces for indentation (Default 2)",
788
+ )
789
+
790
+ if inline_args is None: # pragma: no cover
791
+ args = parser.parse_args()
792
+ else:
793
+ args = parser.parse_args(
794
+ inline_args
795
+ ) # This is needed so this function is testable
796
+
797
+ if args.inline and args.output: # pragma: no cover
798
+ print("Error: You cannot pass both --inline and --output", file=sys.stderr)
799
+ sys.exit(1)
800
+
801
+ ensure_ascii = False
802
+ if args.ensure_ascii:
803
+ ensure_ascii = True
804
+
805
+ try:
806
+ result = from_file(args.filename)
807
+
808
+ if args.inline or args.output:
809
+ fd = open(args.output or args.filename, mode="w")
810
+ json.dump(result, fd, indent=args.indent, ensure_ascii=ensure_ascii)
811
+ fd.close()
812
+ else:
813
+ print(json.dumps(result, indent=args.indent, ensure_ascii=ensure_ascii))
814
+ except Exception as e: # pragma: no cover
815
+ print(f"Error: {str(e)}", file=sys.stderr)
816
+ sys.exit(1)
817
+
818
+ return 0 # Success
819
+
820
+
821
+ if __name__ == "__main__": # pragma: no cover
822
+ sys.exit(cli())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.28.4
3
+ Version: 0.29.1
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -150,6 +150,31 @@ Some rules of thumb to use:
150
150
  - `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
151
151
  - If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
152
152
 
153
+ ### Use json_repair from CLI
154
+
155
+ Install the library for command-line with:
156
+ ```
157
+ pipx install json-repair
158
+ ```
159
+ to know all options available:
160
+ ```
161
+ $ json_repair -h
162
+ usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] filename
163
+
164
+ Repair and parse JSON files.
165
+
166
+ positional arguments:
167
+ filename The JSON file to repair
168
+
169
+ options:
170
+ -h, --help show this help message and exit
171
+ -i, --inline Replace the file inline instead of returning the output to stdout
172
+ -o TARGET, --output TARGET
173
+ If specified, the output will be written to TARGET filename instead of stdout
174
+ --ensure_ascii Pass ensure_ascii=True to json.dumps()
175
+ --indent INDENT Number of spaces for indentation (Default 2)
176
+ ```
177
+
153
178
  ## Adding to requirements
154
179
  **Please pin this library only on the major version!**
155
180
 
@@ -0,0 +1,10 @@
1
+ json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
+ json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
+ json_repair/json_repair.py,sha256=amzSIOX_wR22QCheozEzsPLA09RRc8AybBUaiIIJagI,34164
4
+ json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ json_repair-0.29.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
6
+ json_repair-0.29.1.dist-info/METADATA,sha256=q2kI12fNuayrEkqqDtVWKmagimcSgAKPHdanuQwMAtI,9787
7
+ json_repair-0.29.1.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
8
+ json_repair-0.29.1.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
9
+ json_repair-0.29.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
10
+ json_repair-0.29.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (74.0.0)
2
+ Generator: setuptools (74.1.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ json_repair = json_repair.__main__:cli
@@ -1,8 +0,0 @@
1
- json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
- json_repair/json_repair.py,sha256=odtRiFJ-u8mbdw_3Djx4jADxGoBeQvot3536D6Y6K0c,32266
3
- json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- json_repair-0.28.4.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
5
- json_repair-0.28.4.dist-info/METADATA,sha256=2JB2TM0mrFC7OejTtgFrpyr2qQOw8xwX0KmLTSzephk,9019
6
- json_repair-0.28.4.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
7
- json_repair-0.28.4.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
8
- json_repair-0.28.4.dist-info/RECORD,,