json-repair 0.37.0__py3-none-any.whl → 0.39.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_parser.py +80 -6
- json_repair/json_repair.py +20 -7
- {json_repair-0.37.0.dist-info → json_repair-0.39.0.dist-info}/METADATA +1 -1
- json_repair-0.39.0.dist-info/RECORD +13 -0
- json_repair-0.37.0.dist-info/RECORD +0 -13
- {json_repair-0.37.0.dist-info → json_repair-0.39.0.dist-info}/LICENSE +0 -0
- {json_repair-0.37.0.dist-info → json_repair-0.39.0.dist-info}/WHEEL +0 -0
- {json_repair-0.37.0.dist-info → json_repair-0.39.0.dist-info}/entry_points.txt +0 -0
- {json_repair-0.37.0.dist-info → json_repair-0.39.0.dist-info}/top_level.txt +0 -0
json_repair/json_parser.py
CHANGED
@@ -101,6 +101,8 @@ class JSONParser:
|
|
101
101
|
char.isdigit() or char == "-" or char == "."
|
102
102
|
):
|
103
103
|
return self.parse_number()
|
104
|
+
elif char in ["#", "/"]:
|
105
|
+
return self.parse_comment()
|
104
106
|
# If everything else fails, we just ignore and move on
|
105
107
|
else:
|
106
108
|
self.index += 1
|
@@ -138,8 +140,9 @@ class JSONParser:
|
|
138
140
|
# The rollback index needs to be updated here in case the key is empty
|
139
141
|
rollback_index = self.index
|
140
142
|
key = str(self.parse_string())
|
141
|
-
|
142
|
-
|
143
|
+
if key == "":
|
144
|
+
self.skip_whitespaces_at()
|
145
|
+
if key != "" or (key == "" and self.get_char_at() in [":", "}"]):
|
143
146
|
# If the string is empty but there is a object divider, we are done here
|
144
147
|
break
|
145
148
|
if ContextValues.ARRAY in self.context.context and key in obj:
|
@@ -199,11 +202,10 @@ class JSONParser:
|
|
199
202
|
self.skip_whitespaces_at()
|
200
203
|
value = self.parse_json()
|
201
204
|
|
202
|
-
# It is possible that parse_json() returns nothing valid, so we
|
205
|
+
# It is possible that parse_json() returns nothing valid, so we increase by 1
|
203
206
|
if value == "":
|
204
|
-
|
205
|
-
|
206
|
-
if value == "..." and self.get_char_at(-1) == ".":
|
207
|
+
self.index += 1
|
208
|
+
elif value == "..." and self.get_char_at(-1) == ".":
|
207
209
|
self.log(
|
208
210
|
"While parsing an array, found a stray '...'; ignoring it",
|
209
211
|
)
|
@@ -243,6 +245,8 @@ class JSONParser:
|
|
243
245
|
lstring_delimiter = rstring_delimiter = '"'
|
244
246
|
|
245
247
|
char = self.get_char_at()
|
248
|
+
if char in ["#", "/"]:
|
249
|
+
return self.parse_comment()
|
246
250
|
# A valid string can only start with a valid quote or, in our case, with a literal
|
247
251
|
while char and char not in self.STRING_DELIMITERS and not char.isalnum():
|
248
252
|
self.index += 1
|
@@ -753,6 +757,76 @@ class JSONParser:
|
|
753
757
|
return self.skip_to_character(character=character, idx=idx + 1)
|
754
758
|
return idx
|
755
759
|
|
760
|
+
def parse_comment(self) -> str:
|
761
|
+
"""
|
762
|
+
Parse code-like comments:
|
763
|
+
|
764
|
+
- "# comment": A line comment that continues until a newline.
|
765
|
+
- "// comment": A line comment that continues until a newline.
|
766
|
+
- "/* comment */": A block comment that continues until the closing delimiter "*/".
|
767
|
+
|
768
|
+
The comment is skipped over and an empty string is returned so that comments do not interfere
|
769
|
+
with the actual JSON elements.
|
770
|
+
"""
|
771
|
+
char = self.get_char_at()
|
772
|
+
termination_characters = ["\n", "\r"]
|
773
|
+
if ContextValues.ARRAY in self.context.context:
|
774
|
+
termination_characters.append("]")
|
775
|
+
if ContextValues.OBJECT_VALUE in self.context.context:
|
776
|
+
termination_characters.append("}")
|
777
|
+
if ContextValues.OBJECT_KEY in self.context.context:
|
778
|
+
termination_characters.append(":")
|
779
|
+
# Line comment starting with #
|
780
|
+
if char == "#":
|
781
|
+
comment = ""
|
782
|
+
while char and char not in termination_characters:
|
783
|
+
comment += char
|
784
|
+
self.index += 1
|
785
|
+
char = self.get_char_at()
|
786
|
+
self.log(f"Found line comment: {comment}")
|
787
|
+
return ""
|
788
|
+
|
789
|
+
# Comments starting with '/'
|
790
|
+
elif char == "/":
|
791
|
+
next_char = self.get_char_at(1)
|
792
|
+
# Handle line comment starting with //
|
793
|
+
if next_char == "/":
|
794
|
+
comment = "//"
|
795
|
+
self.index += 2 # Skip both slashes.
|
796
|
+
char = self.get_char_at()
|
797
|
+
while char and char not in termination_characters:
|
798
|
+
comment += char
|
799
|
+
self.index += 1
|
800
|
+
char = self.get_char_at()
|
801
|
+
self.log(f"Found line comment: {comment}")
|
802
|
+
return ""
|
803
|
+
# Handle block comment starting with /*
|
804
|
+
elif next_char == "*":
|
805
|
+
comment = "/*"
|
806
|
+
self.index += 2 # Skip '/*'
|
807
|
+
while True:
|
808
|
+
char = self.get_char_at()
|
809
|
+
if not char:
|
810
|
+
self.log(
|
811
|
+
"Reached end-of-string while parsing block comment; unclosed block comment."
|
812
|
+
)
|
813
|
+
break
|
814
|
+
comment += char
|
815
|
+
self.index += 1
|
816
|
+
if comment.endswith("*/"):
|
817
|
+
break
|
818
|
+
self.log(f"Found block comment: {comment}")
|
819
|
+
return ""
|
820
|
+
else:
|
821
|
+
# Not a recognized comment pattern, skip the slash.
|
822
|
+
self.index += 1
|
823
|
+
return ""
|
824
|
+
|
825
|
+
else:
|
826
|
+
# Should not be reached: if for some reason the current character does not start a comment, skip it.
|
827
|
+
self.index += 1
|
828
|
+
return ""
|
829
|
+
|
756
830
|
def _log(self, text: str) -> None:
|
757
831
|
window: int = 10
|
758
832
|
start: int = max(self.index - window, 0)
|
json_repair/json_repair.py
CHANGED
@@ -160,7 +160,7 @@ def cli(inline_args: Optional[List[str]] = None) -> int:
|
|
160
160
|
|
161
161
|
Args:
|
162
162
|
inline_args (Optional[List[str]]): List of command-line arguments for testing purposes. Defaults to None.
|
163
|
-
- filename (str): The JSON file to repair
|
163
|
+
- filename (str): The JSON file to repair. If omitted, the JSON is read from stdin.
|
164
164
|
- -i, --inline (bool): Replace the file inline instead of returning the output to stdout.
|
165
165
|
- -o, --output TARGET (str): If specified, the output will be written to TARGET filename instead of stdout.
|
166
166
|
- --ensure_ascii (bool): Pass ensure_ascii=True to json.dumps(). Will pass False otherwise.
|
@@ -174,9 +174,15 @@ def cli(inline_args: Optional[List[str]] = None) -> int:
|
|
174
174
|
|
175
175
|
Example:
|
176
176
|
>>> cli(['example.json', '--indent', '4'])
|
177
|
+
>>> cat json.txt | json_repair
|
177
178
|
"""
|
178
179
|
parser = argparse.ArgumentParser(description="Repair and parse JSON files.")
|
179
|
-
|
180
|
+
# Make the filename argument optional; if omitted, we will read from stdin.
|
181
|
+
parser.add_argument(
|
182
|
+
"filename",
|
183
|
+
nargs="?",
|
184
|
+
help="The JSON file to repair (if omitted, reads from stdin)",
|
185
|
+
)
|
180
186
|
parser.add_argument(
|
181
187
|
"-i",
|
182
188
|
"--inline",
|
@@ -204,9 +210,12 @@ def cli(inline_args: Optional[List[str]] = None) -> int:
|
|
204
210
|
if inline_args is None: # pragma: no cover
|
205
211
|
args = parser.parse_args()
|
206
212
|
else:
|
207
|
-
args = parser.parse_args(
|
208
|
-
|
209
|
-
|
213
|
+
args = parser.parse_args(inline_args)
|
214
|
+
|
215
|
+
# Inline mode requires a filename, so error out if none was provided.
|
216
|
+
if args.inline and not args.filename: # pragma: no cover
|
217
|
+
print("Error: Inline mode requires a filename", file=sys.stderr)
|
218
|
+
sys.exit(1)
|
210
219
|
|
211
220
|
if args.inline and args.output: # pragma: no cover
|
212
221
|
print("Error: You cannot pass both --inline and --output", file=sys.stderr)
|
@@ -217,8 +226,12 @@ def cli(inline_args: Optional[List[str]] = None) -> int:
|
|
217
226
|
ensure_ascii = True
|
218
227
|
|
219
228
|
try:
|
220
|
-
|
221
|
-
|
229
|
+
# Use from_file if a filename is provided; otherwise read from stdin.
|
230
|
+
if args.filename:
|
231
|
+
result = from_file(args.filename)
|
232
|
+
else:
|
233
|
+
data = sys.stdin.read()
|
234
|
+
result = loads(data)
|
222
235
|
if args.inline or args.output:
|
223
236
|
with open(args.output or args.filename, mode="w") as fd:
|
224
237
|
json.dump(result, fd, indent=args.indent, ensure_ascii=ensure_ascii)
|
@@ -0,0 +1,13 @@
|
|
1
|
+
json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
|
2
|
+
json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
|
3
|
+
json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
|
4
|
+
json_repair/json_parser.py,sha256=BQsH8CRy59C2176bMwVerfqbHDXfLoEC1v5frmCiv7M,39020
|
5
|
+
json_repair/json_repair.py,sha256=k-5HRRlCqrxNmJi0u1KE3IUeL4HXqi1XZ7oAL-NFDLo,10314
|
6
|
+
json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
+
json_repair/string_file_wrapper.py,sha256=koZmdq2-Z5K7XF1bDqX6dEbNaVMJYcMTjq-aGe6NQvA,4526
|
8
|
+
json_repair-0.39.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
9
|
+
json_repair-0.39.0.dist-info/METADATA,sha256=cArvqcMBL9FVCwnJGtsaeF7lXWjOFWG3_1OueGjOiRs,11794
|
10
|
+
json_repair-0.39.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
11
|
+
json_repair-0.39.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
|
12
|
+
json_repair-0.39.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
13
|
+
json_repair-0.39.0.dist-info/RECORD,,
|
@@ -1,13 +0,0 @@
|
|
1
|
-
json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
|
2
|
-
json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
|
3
|
-
json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
|
4
|
-
json_repair/json_parser.py,sha256=XASjndsU8Q2NNXENoQJKCwZUxf0ezlsq538x7Y33T3A,35948
|
5
|
-
json_repair/json_repair.py,sha256=LINLSJBs3cJMfs1YRDaIpfWR5PJLs87Oe06G5yQjY18,9729
|
6
|
-
json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
-
json_repair/string_file_wrapper.py,sha256=koZmdq2-Z5K7XF1bDqX6dEbNaVMJYcMTjq-aGe6NQvA,4526
|
8
|
-
json_repair-0.37.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
9
|
-
json_repair-0.37.0.dist-info/METADATA,sha256=GN2yJwIv5nrKgjKjocEdCbPioWmyghYqUPxj6ogvxfo,11794
|
10
|
-
json_repair-0.37.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
11
|
-
json_repair-0.37.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
|
12
|
-
json_repair-0.37.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
13
|
-
json_repair-0.37.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|