json-repair 0.37.0__py3-none-any.whl → 0.39.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -101,6 +101,8 @@ class JSONParser:
101
101
  char.isdigit() or char == "-" or char == "."
102
102
  ):
103
103
  return self.parse_number()
104
+ elif char in ["#", "/"]:
105
+ return self.parse_comment()
104
106
  # If everything else fails, we just ignore and move on
105
107
  else:
106
108
  self.index += 1
@@ -138,8 +140,9 @@ class JSONParser:
138
140
  # The rollback index needs to be updated here in case the key is empty
139
141
  rollback_index = self.index
140
142
  key = str(self.parse_string())
141
-
142
- if key != "" or (key == "" and self.get_char_at() == ":"):
143
+ if key == "":
144
+ self.skip_whitespaces_at()
145
+ if key != "" or (key == "" and self.get_char_at() in [":", "}"]):
143
146
  # If the string is empty but there is a object divider, we are done here
144
147
  break
145
148
  if ContextValues.ARRAY in self.context.context and key in obj:
@@ -199,11 +202,10 @@ class JSONParser:
199
202
  self.skip_whitespaces_at()
200
203
  value = self.parse_json()
201
204
 
202
- # It is possible that parse_json() returns nothing valid, so we stop
205
+ # It is possible that parse_json() returns nothing valid, so we increase by 1
203
206
  if value == "":
204
- break
205
-
206
- if value == "..." and self.get_char_at(-1) == ".":
207
+ self.index += 1
208
+ elif value == "..." and self.get_char_at(-1) == ".":
207
209
  self.log(
208
210
  "While parsing an array, found a stray '...'; ignoring it",
209
211
  )
@@ -243,6 +245,8 @@ class JSONParser:
243
245
  lstring_delimiter = rstring_delimiter = '"'
244
246
 
245
247
  char = self.get_char_at()
248
+ if char in ["#", "/"]:
249
+ return self.parse_comment()
246
250
  # A valid string can only start with a valid quote or, in our case, with a literal
247
251
  while char and char not in self.STRING_DELIMITERS and not char.isalnum():
248
252
  self.index += 1
@@ -753,6 +757,76 @@ class JSONParser:
753
757
  return self.skip_to_character(character=character, idx=idx + 1)
754
758
  return idx
755
759
 
760
+ def parse_comment(self) -> str:
761
+ """
762
+ Parse code-like comments:
763
+
764
+ - "# comment": A line comment that continues until a newline.
765
+ - "// comment": A line comment that continues until a newline.
766
+ - "/* comment */": A block comment that continues until the closing delimiter "*/".
767
+
768
+ The comment is skipped over and an empty string is returned so that comments do not interfere
769
+ with the actual JSON elements.
770
+ """
771
+ char = self.get_char_at()
772
+ termination_characters = ["\n", "\r"]
773
+ if ContextValues.ARRAY in self.context.context:
774
+ termination_characters.append("]")
775
+ if ContextValues.OBJECT_VALUE in self.context.context:
776
+ termination_characters.append("}")
777
+ if ContextValues.OBJECT_KEY in self.context.context:
778
+ termination_characters.append(":")
779
+ # Line comment starting with #
780
+ if char == "#":
781
+ comment = ""
782
+ while char and char not in termination_characters:
783
+ comment += char
784
+ self.index += 1
785
+ char = self.get_char_at()
786
+ self.log(f"Found line comment: {comment}")
787
+ return ""
788
+
789
+ # Comments starting with '/'
790
+ elif char == "/":
791
+ next_char = self.get_char_at(1)
792
+ # Handle line comment starting with //
793
+ if next_char == "/":
794
+ comment = "//"
795
+ self.index += 2 # Skip both slashes.
796
+ char = self.get_char_at()
797
+ while char and char not in termination_characters:
798
+ comment += char
799
+ self.index += 1
800
+ char = self.get_char_at()
801
+ self.log(f"Found line comment: {comment}")
802
+ return ""
803
+ # Handle block comment starting with /*
804
+ elif next_char == "*":
805
+ comment = "/*"
806
+ self.index += 2 # Skip '/*'
807
+ while True:
808
+ char = self.get_char_at()
809
+ if not char:
810
+ self.log(
811
+ "Reached end-of-string while parsing block comment; unclosed block comment."
812
+ )
813
+ break
814
+ comment += char
815
+ self.index += 1
816
+ if comment.endswith("*/"):
817
+ break
818
+ self.log(f"Found block comment: {comment}")
819
+ return ""
820
+ else:
821
+ # Not a recognized comment pattern, skip the slash.
822
+ self.index += 1
823
+ return ""
824
+
825
+ else:
826
+ # Should not be reached: if for some reason the current character does not start a comment, skip it.
827
+ self.index += 1
828
+ return ""
829
+
756
830
  def _log(self, text: str) -> None:
757
831
  window: int = 10
758
832
  start: int = max(self.index - window, 0)
@@ -160,7 +160,7 @@ def cli(inline_args: Optional[List[str]] = None) -> int:
160
160
 
161
161
  Args:
162
162
  inline_args (Optional[List[str]]): List of command-line arguments for testing purposes. Defaults to None.
163
- - filename (str): The JSON file to repair
163
+ - filename (str): The JSON file to repair. If omitted, the JSON is read from stdin.
164
164
  - -i, --inline (bool): Replace the file inline instead of returning the output to stdout.
165
165
  - -o, --output TARGET (str): If specified, the output will be written to TARGET filename instead of stdout.
166
166
  - --ensure_ascii (bool): Pass ensure_ascii=True to json.dumps(). Will pass False otherwise.
@@ -174,9 +174,15 @@ def cli(inline_args: Optional[List[str]] = None) -> int:
174
174
 
175
175
  Example:
176
176
  >>> cli(['example.json', '--indent', '4'])
177
+ >>> cat json.txt | json_repair
177
178
  """
178
179
  parser = argparse.ArgumentParser(description="Repair and parse JSON files.")
179
- parser.add_argument("filename", help="The JSON file to repair")
180
+ # Make the filename argument optional; if omitted, we will read from stdin.
181
+ parser.add_argument(
182
+ "filename",
183
+ nargs="?",
184
+ help="The JSON file to repair (if omitted, reads from stdin)",
185
+ )
180
186
  parser.add_argument(
181
187
  "-i",
182
188
  "--inline",
@@ -204,9 +210,12 @@ def cli(inline_args: Optional[List[str]] = None) -> int:
204
210
  if inline_args is None: # pragma: no cover
205
211
  args = parser.parse_args()
206
212
  else:
207
- args = parser.parse_args(
208
- inline_args
209
- ) # This is needed so this function is testable
213
+ args = parser.parse_args(inline_args)
214
+
215
+ # Inline mode requires a filename, so error out if none was provided.
216
+ if args.inline and not args.filename: # pragma: no cover
217
+ print("Error: Inline mode requires a filename", file=sys.stderr)
218
+ sys.exit(1)
210
219
 
211
220
  if args.inline and args.output: # pragma: no cover
212
221
  print("Error: You cannot pass both --inline and --output", file=sys.stderr)
@@ -217,8 +226,12 @@ def cli(inline_args: Optional[List[str]] = None) -> int:
217
226
  ensure_ascii = True
218
227
 
219
228
  try:
220
- result = from_file(args.filename)
221
-
229
+ # Use from_file if a filename is provided; otherwise read from stdin.
230
+ if args.filename:
231
+ result = from_file(args.filename)
232
+ else:
233
+ data = sys.stdin.read()
234
+ result = loads(data)
222
235
  if args.inline or args.output:
223
236
  with open(args.output or args.filename, mode="w") as fd:
224
237
  json.dump(result, fd, indent=args.indent, ensure_ascii=ensure_ascii)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: json_repair
3
- Version: 0.37.0
3
+ Version: 0.39.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -0,0 +1,13 @@
1
+ json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
2
+ json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
+ json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
4
+ json_repair/json_parser.py,sha256=BQsH8CRy59C2176bMwVerfqbHDXfLoEC1v5frmCiv7M,39020
5
+ json_repair/json_repair.py,sha256=k-5HRRlCqrxNmJi0u1KE3IUeL4HXqi1XZ7oAL-NFDLo,10314
6
+ json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ json_repair/string_file_wrapper.py,sha256=koZmdq2-Z5K7XF1bDqX6dEbNaVMJYcMTjq-aGe6NQvA,4526
8
+ json_repair-0.39.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
9
+ json_repair-0.39.0.dist-info/METADATA,sha256=cArvqcMBL9FVCwnJGtsaeF7lXWjOFWG3_1OueGjOiRs,11794
10
+ json_repair-0.39.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
11
+ json_repair-0.39.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
12
+ json_repair-0.39.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
13
+ json_repair-0.39.0.dist-info/RECORD,,
@@ -1,13 +0,0 @@
1
- json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
2
- json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
- json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
4
- json_repair/json_parser.py,sha256=XASjndsU8Q2NNXENoQJKCwZUxf0ezlsq538x7Y33T3A,35948
5
- json_repair/json_repair.py,sha256=LINLSJBs3cJMfs1YRDaIpfWR5PJLs87Oe06G5yQjY18,9729
6
- json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- json_repair/string_file_wrapper.py,sha256=koZmdq2-Z5K7XF1bDqX6dEbNaVMJYcMTjq-aGe6NQvA,4526
8
- json_repair-0.37.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
9
- json_repair-0.37.0.dist-info/METADATA,sha256=GN2yJwIv5nrKgjKjocEdCbPioWmyghYqUPxj6ogvxfo,11794
10
- json_repair-0.37.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
11
- json_repair-0.37.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
12
- json_repair-0.37.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
13
- json_repair-0.37.0.dist-info/RECORD,,