json-repair 0.37.0__py3-none-any.whl → 0.39.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -101,6 +101,8 @@ class JSONParser:
101
101
  char.isdigit() or char == "-" or char == "."
102
102
  ):
103
103
  return self.parse_number()
104
+ elif char in ["#", "/"]:
105
+ return self.parse_comment()
104
106
  # If everything else fails, we just ignore and move on
105
107
  else:
106
108
  self.index += 1
@@ -138,8 +140,9 @@ class JSONParser:
138
140
  # The rollback index needs to be updated here in case the key is empty
139
141
  rollback_index = self.index
140
142
  key = str(self.parse_string())
141
-
142
- if key != "" or (key == "" and self.get_char_at() == ":"):
143
+ if key == "":
144
+ self.skip_whitespaces_at()
145
+ if key != "" or (key == "" and self.get_char_at() in [":", "}"]):
143
146
  # If the string is empty but there is a object divider, we are done here
144
147
  break
145
148
  if ContextValues.ARRAY in self.context.context and key in obj:
@@ -199,11 +202,10 @@ class JSONParser:
199
202
  self.skip_whitespaces_at()
200
203
  value = self.parse_json()
201
204
 
202
- # It is possible that parse_json() returns nothing valid, so we stop
205
+ # It is possible that parse_json() returns nothing valid, so we increase by 1
203
206
  if value == "":
204
- break
205
-
206
- if value == "..." and self.get_char_at(-1) == ".":
207
+ self.index += 1
208
+ elif value == "..." and self.get_char_at(-1) == ".":
207
209
  self.log(
208
210
  "While parsing an array, found a stray '...'; ignoring it",
209
211
  )
@@ -243,6 +245,8 @@ class JSONParser:
243
245
  lstring_delimiter = rstring_delimiter = '"'
244
246
 
245
247
  char = self.get_char_at()
248
+ if char in ["#", "/"]:
249
+ return self.parse_comment()
246
250
  # A valid string can only start with a valid quote or, in our case, with a literal
247
251
  while char and char not in self.STRING_DELIMITERS and not char.isalnum():
248
252
  self.index += 1
@@ -753,6 +757,76 @@ class JSONParser:
753
757
  return self.skip_to_character(character=character, idx=idx + 1)
754
758
  return idx
755
759
 
760
+ def parse_comment(self) -> str:
761
+ """
762
+ Parse code-like comments:
763
+
764
+ - "# comment": A line comment that continues until a newline.
765
+ - "// comment": A line comment that continues until a newline.
766
+ - "/* comment */": A block comment that continues until the closing delimiter "*/".
767
+
768
+ The comment is skipped over and an empty string is returned so that comments do not interfere
769
+ with the actual JSON elements.
770
+ """
771
+ char = self.get_char_at()
772
+ termination_characters = ["\n", "\r"]
773
+ if ContextValues.ARRAY in self.context.context:
774
+ termination_characters.append("]")
775
+ if ContextValues.OBJECT_VALUE in self.context.context:
776
+ termination_characters.append("}")
777
+ if ContextValues.OBJECT_KEY in self.context.context:
778
+ termination_characters.append(":")
779
+ # Line comment starting with #
780
+ if char == "#":
781
+ comment = ""
782
+ while char and char not in termination_characters:
783
+ comment += char
784
+ self.index += 1
785
+ char = self.get_char_at()
786
+ self.log(f"Found line comment: {comment}")
787
+ return ""
788
+
789
+ # Comments starting with '/'
790
+ elif char == "/":
791
+ next_char = self.get_char_at(1)
792
+ # Handle line comment starting with //
793
+ if next_char == "/":
794
+ comment = "//"
795
+ self.index += 2 # Skip both slashes.
796
+ char = self.get_char_at()
797
+ while char and char not in termination_characters:
798
+ comment += char
799
+ self.index += 1
800
+ char = self.get_char_at()
801
+ self.log(f"Found line comment: {comment}")
802
+ return ""
803
+ # Handle block comment starting with /*
804
+ elif next_char == "*":
805
+ comment = "/*"
806
+ self.index += 2 # Skip '/*'
807
+ while True:
808
+ char = self.get_char_at()
809
+ if not char:
810
+ self.log(
811
+ "Reached end-of-string while parsing block comment; unclosed block comment."
812
+ )
813
+ break
814
+ comment += char
815
+ self.index += 1
816
+ if comment.endswith("*/"):
817
+ break
818
+ self.log(f"Found block comment: {comment}")
819
+ return ""
820
+ else:
821
+ # Not a recognized comment pattern, skip the slash.
822
+ self.index += 1
823
+ return ""
824
+
825
+ else:
826
+ # Should not be reached: if for some reason the current character does not start a comment, skip it.
827
+ self.index += 1
828
+ return ""
829
+
756
830
  def _log(self, text: str) -> None:
757
831
  window: int = 10
758
832
  start: int = max(self.index - window, 0)
@@ -160,7 +160,7 @@ def cli(inline_args: Optional[List[str]] = None) -> int:
160
160
 
161
161
  Args:
162
162
  inline_args (Optional[List[str]]): List of command-line arguments for testing purposes. Defaults to None.
163
- - filename (str): The JSON file to repair
163
+ - filename (str): The JSON file to repair. If omitted, the JSON is read from stdin.
164
164
  - -i, --inline (bool): Replace the file inline instead of returning the output to stdout.
165
165
  - -o, --output TARGET (str): If specified, the output will be written to TARGET filename instead of stdout.
166
166
  - --ensure_ascii (bool): Pass ensure_ascii=True to json.dumps(). Will pass False otherwise.
@@ -174,9 +174,15 @@ def cli(inline_args: Optional[List[str]] = None) -> int:
174
174
 
175
175
  Example:
176
176
  >>> cli(['example.json', '--indent', '4'])
177
+ >>> cat json.txt | json_repair
177
178
  """
178
179
  parser = argparse.ArgumentParser(description="Repair and parse JSON files.")
179
- parser.add_argument("filename", help="The JSON file to repair")
180
+ # Make the filename argument optional; if omitted, we will read from stdin.
181
+ parser.add_argument(
182
+ "filename",
183
+ nargs="?",
184
+ help="The JSON file to repair (if omitted, reads from stdin)",
185
+ )
180
186
  parser.add_argument(
181
187
  "-i",
182
188
  "--inline",
@@ -204,9 +210,12 @@ def cli(inline_args: Optional[List[str]] = None) -> int:
204
210
  if inline_args is None: # pragma: no cover
205
211
  args = parser.parse_args()
206
212
  else:
207
- args = parser.parse_args(
208
- inline_args
209
- ) # This is needed so this function is testable
213
+ args = parser.parse_args(inline_args)
214
+
215
+ # Inline mode requires a filename, so error out if none was provided.
216
+ if args.inline and not args.filename: # pragma: no cover
217
+ print("Error: Inline mode requires a filename", file=sys.stderr)
218
+ sys.exit(1)
210
219
 
211
220
  if args.inline and args.output: # pragma: no cover
212
221
  print("Error: You cannot pass both --inline and --output", file=sys.stderr)
@@ -217,8 +226,12 @@ def cli(inline_args: Optional[List[str]] = None) -> int:
217
226
  ensure_ascii = True
218
227
 
219
228
  try:
220
- result = from_file(args.filename)
221
-
229
+ # Use from_file if a filename is provided; otherwise read from stdin.
230
+ if args.filename:
231
+ result = from_file(args.filename)
232
+ else:
233
+ data = sys.stdin.read()
234
+ result = loads(data)
222
235
  if args.inline or args.output:
223
236
  with open(args.output or args.filename, mode="w") as fd:
224
237
  json.dump(result, fd, indent=args.indent, ensure_ascii=ensure_ascii)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: json_repair
3
- Version: 0.37.0
3
+ Version: 0.39.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -0,0 +1,13 @@
1
+ json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
2
+ json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
+ json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
4
+ json_repair/json_parser.py,sha256=BQsH8CRy59C2176bMwVerfqbHDXfLoEC1v5frmCiv7M,39020
5
+ json_repair/json_repair.py,sha256=k-5HRRlCqrxNmJi0u1KE3IUeL4HXqi1XZ7oAL-NFDLo,10314
6
+ json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ json_repair/string_file_wrapper.py,sha256=koZmdq2-Z5K7XF1bDqX6dEbNaVMJYcMTjq-aGe6NQvA,4526
8
+ json_repair-0.39.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
9
+ json_repair-0.39.0.dist-info/METADATA,sha256=cArvqcMBL9FVCwnJGtsaeF7lXWjOFWG3_1OueGjOiRs,11794
10
+ json_repair-0.39.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
11
+ json_repair-0.39.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
12
+ json_repair-0.39.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
13
+ json_repair-0.39.0.dist-info/RECORD,,
@@ -1,13 +0,0 @@
1
- json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
2
- json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
- json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
4
- json_repair/json_parser.py,sha256=XASjndsU8Q2NNXENoQJKCwZUxf0ezlsq538x7Y33T3A,35948
5
- json_repair/json_repair.py,sha256=LINLSJBs3cJMfs1YRDaIpfWR5PJLs87Oe06G5yQjY18,9729
6
- json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- json_repair/string_file_wrapper.py,sha256=koZmdq2-Z5K7XF1bDqX6dEbNaVMJYcMTjq-aGe6NQvA,4526
8
- json_repair-0.37.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
9
- json_repair-0.37.0.dist-info/METADATA,sha256=GN2yJwIv5nrKgjKjocEdCbPioWmyghYqUPxj6ogvxfo,11794
10
- json_repair-0.37.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
11
- json_repair-0.37.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
12
- json_repair-0.37.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
13
- json_repair-0.37.0.dist-info/RECORD,,