json-repair 0.37.0__tar.gz → 0.39.0__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {json_repair-0.37.0/src/json_repair.egg-info → json_repair-0.39.0}/PKG-INFO +1 -1
- {json_repair-0.37.0 → json_repair-0.39.0}/pyproject.toml +1 -1
- {json_repair-0.37.0 → json_repair-0.39.0}/src/json_repair/json_parser.py +80 -6
- {json_repair-0.37.0 → json_repair-0.39.0}/src/json_repair/json_repair.py +20 -7
- {json_repair-0.37.0 → json_repair-0.39.0/src/json_repair.egg-info}/PKG-INFO +1 -1
- {json_repair-0.37.0 → json_repair-0.39.0}/tests/test_json_repair.py +15 -15
- {json_repair-0.37.0 → json_repair-0.39.0}/LICENSE +0 -0
- {json_repair-0.37.0 → json_repair-0.39.0}/README.md +0 -0
- {json_repair-0.37.0 → json_repair-0.39.0}/setup.cfg +0 -0
- {json_repair-0.37.0 → json_repair-0.39.0}/src/json_repair/__init__.py +0 -0
- {json_repair-0.37.0 → json_repair-0.39.0}/src/json_repair/__main__.py +0 -0
- {json_repair-0.37.0 → json_repair-0.39.0}/src/json_repair/json_context.py +0 -0
- {json_repair-0.37.0 → json_repair-0.39.0}/src/json_repair/py.typed +0 -0
- {json_repair-0.37.0 → json_repair-0.39.0}/src/json_repair/string_file_wrapper.py +0 -0
- {json_repair-0.37.0 → json_repair-0.39.0}/src/json_repair.egg-info/SOURCES.txt +0 -0
- {json_repair-0.37.0 → json_repair-0.39.0}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.37.0 → json_repair-0.39.0}/src/json_repair.egg-info/entry_points.txt +0 -0
- {json_repair-0.37.0 → json_repair-0.39.0}/src/json_repair.egg-info/top_level.txt +0 -0
- {json_repair-0.37.0 → json_repair-0.39.0}/tests/test_coverage.py +0 -0
- {json_repair-0.37.0 → json_repair-0.39.0}/tests/test_performance.py +0 -0
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.
|
6
|
+
version = "0.39.0"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -101,6 +101,8 @@ class JSONParser:
|
|
101
101
|
char.isdigit() or char == "-" or char == "."
|
102
102
|
):
|
103
103
|
return self.parse_number()
|
104
|
+
elif char in ["#", "/"]:
|
105
|
+
return self.parse_comment()
|
104
106
|
# If everything else fails, we just ignore and move on
|
105
107
|
else:
|
106
108
|
self.index += 1
|
@@ -138,8 +140,9 @@ class JSONParser:
|
|
138
140
|
# The rollback index needs to be updated here in case the key is empty
|
139
141
|
rollback_index = self.index
|
140
142
|
key = str(self.parse_string())
|
141
|
-
|
142
|
-
|
143
|
+
if key == "":
|
144
|
+
self.skip_whitespaces_at()
|
145
|
+
if key != "" or (key == "" and self.get_char_at() in [":", "}"]):
|
143
146
|
# If the string is empty but there is a object divider, we are done here
|
144
147
|
break
|
145
148
|
if ContextValues.ARRAY in self.context.context and key in obj:
|
@@ -199,11 +202,10 @@ class JSONParser:
|
|
199
202
|
self.skip_whitespaces_at()
|
200
203
|
value = self.parse_json()
|
201
204
|
|
202
|
-
# It is possible that parse_json() returns nothing valid, so we
|
205
|
+
# It is possible that parse_json() returns nothing valid, so we increase by 1
|
203
206
|
if value == "":
|
204
|
-
|
205
|
-
|
206
|
-
if value == "..." and self.get_char_at(-1) == ".":
|
207
|
+
self.index += 1
|
208
|
+
elif value == "..." and self.get_char_at(-1) == ".":
|
207
209
|
self.log(
|
208
210
|
"While parsing an array, found a stray '...'; ignoring it",
|
209
211
|
)
|
@@ -243,6 +245,8 @@ class JSONParser:
|
|
243
245
|
lstring_delimiter = rstring_delimiter = '"'
|
244
246
|
|
245
247
|
char = self.get_char_at()
|
248
|
+
if char in ["#", "/"]:
|
249
|
+
return self.parse_comment()
|
246
250
|
# A valid string can only start with a valid quote or, in our case, with a literal
|
247
251
|
while char and char not in self.STRING_DELIMITERS and not char.isalnum():
|
248
252
|
self.index += 1
|
@@ -753,6 +757,76 @@ class JSONParser:
|
|
753
757
|
return self.skip_to_character(character=character, idx=idx + 1)
|
754
758
|
return idx
|
755
759
|
|
760
|
+
def parse_comment(self) -> str:
|
761
|
+
"""
|
762
|
+
Parse code-like comments:
|
763
|
+
|
764
|
+
- "# comment": A line comment that continues until a newline.
|
765
|
+
- "// comment": A line comment that continues until a newline.
|
766
|
+
- "/* comment */": A block comment that continues until the closing delimiter "*/".
|
767
|
+
|
768
|
+
The comment is skipped over and an empty string is returned so that comments do not interfere
|
769
|
+
with the actual JSON elements.
|
770
|
+
"""
|
771
|
+
char = self.get_char_at()
|
772
|
+
termination_characters = ["\n", "\r"]
|
773
|
+
if ContextValues.ARRAY in self.context.context:
|
774
|
+
termination_characters.append("]")
|
775
|
+
if ContextValues.OBJECT_VALUE in self.context.context:
|
776
|
+
termination_characters.append("}")
|
777
|
+
if ContextValues.OBJECT_KEY in self.context.context:
|
778
|
+
termination_characters.append(":")
|
779
|
+
# Line comment starting with #
|
780
|
+
if char == "#":
|
781
|
+
comment = ""
|
782
|
+
while char and char not in termination_characters:
|
783
|
+
comment += char
|
784
|
+
self.index += 1
|
785
|
+
char = self.get_char_at()
|
786
|
+
self.log(f"Found line comment: {comment}")
|
787
|
+
return ""
|
788
|
+
|
789
|
+
# Comments starting with '/'
|
790
|
+
elif char == "/":
|
791
|
+
next_char = self.get_char_at(1)
|
792
|
+
# Handle line comment starting with //
|
793
|
+
if next_char == "/":
|
794
|
+
comment = "//"
|
795
|
+
self.index += 2 # Skip both slashes.
|
796
|
+
char = self.get_char_at()
|
797
|
+
while char and char not in termination_characters:
|
798
|
+
comment += char
|
799
|
+
self.index += 1
|
800
|
+
char = self.get_char_at()
|
801
|
+
self.log(f"Found line comment: {comment}")
|
802
|
+
return ""
|
803
|
+
# Handle block comment starting with /*
|
804
|
+
elif next_char == "*":
|
805
|
+
comment = "/*"
|
806
|
+
self.index += 2 # Skip '/*'
|
807
|
+
while True:
|
808
|
+
char = self.get_char_at()
|
809
|
+
if not char:
|
810
|
+
self.log(
|
811
|
+
"Reached end-of-string while parsing block comment; unclosed block comment."
|
812
|
+
)
|
813
|
+
break
|
814
|
+
comment += char
|
815
|
+
self.index += 1
|
816
|
+
if comment.endswith("*/"):
|
817
|
+
break
|
818
|
+
self.log(f"Found block comment: {comment}")
|
819
|
+
return ""
|
820
|
+
else:
|
821
|
+
# Not a recognized comment pattern, skip the slash.
|
822
|
+
self.index += 1
|
823
|
+
return ""
|
824
|
+
|
825
|
+
else:
|
826
|
+
# Should not be reached: if for some reason the current character does not start a comment, skip it.
|
827
|
+
self.index += 1
|
828
|
+
return ""
|
829
|
+
|
756
830
|
def _log(self, text: str) -> None:
|
757
831
|
window: int = 10
|
758
832
|
start: int = max(self.index - window, 0)
|
@@ -160,7 +160,7 @@ def cli(inline_args: Optional[List[str]] = None) -> int:
|
|
160
160
|
|
161
161
|
Args:
|
162
162
|
inline_args (Optional[List[str]]): List of command-line arguments for testing purposes. Defaults to None.
|
163
|
-
- filename (str): The JSON file to repair
|
163
|
+
- filename (str): The JSON file to repair. If omitted, the JSON is read from stdin.
|
164
164
|
- -i, --inline (bool): Replace the file inline instead of returning the output to stdout.
|
165
165
|
- -o, --output TARGET (str): If specified, the output will be written to TARGET filename instead of stdout.
|
166
166
|
- --ensure_ascii (bool): Pass ensure_ascii=True to json.dumps(). Will pass False otherwise.
|
@@ -174,9 +174,15 @@ def cli(inline_args: Optional[List[str]] = None) -> int:
|
|
174
174
|
|
175
175
|
Example:
|
176
176
|
>>> cli(['example.json', '--indent', '4'])
|
177
|
+
>>> cat json.txt | json_repair
|
177
178
|
"""
|
178
179
|
parser = argparse.ArgumentParser(description="Repair and parse JSON files.")
|
179
|
-
|
180
|
+
# Make the filename argument optional; if omitted, we will read from stdin.
|
181
|
+
parser.add_argument(
|
182
|
+
"filename",
|
183
|
+
nargs="?",
|
184
|
+
help="The JSON file to repair (if omitted, reads from stdin)",
|
185
|
+
)
|
180
186
|
parser.add_argument(
|
181
187
|
"-i",
|
182
188
|
"--inline",
|
@@ -204,9 +210,12 @@ def cli(inline_args: Optional[List[str]] = None) -> int:
|
|
204
210
|
if inline_args is None: # pragma: no cover
|
205
211
|
args = parser.parse_args()
|
206
212
|
else:
|
207
|
-
args = parser.parse_args(
|
208
|
-
|
209
|
-
|
213
|
+
args = parser.parse_args(inline_args)
|
214
|
+
|
215
|
+
# Inline mode requires a filename, so error out if none was provided.
|
216
|
+
if args.inline and not args.filename: # pragma: no cover
|
217
|
+
print("Error: Inline mode requires a filename", file=sys.stderr)
|
218
|
+
sys.exit(1)
|
210
219
|
|
211
220
|
if args.inline and args.output: # pragma: no cover
|
212
221
|
print("Error: You cannot pass both --inline and --output", file=sys.stderr)
|
@@ -217,8 +226,12 @@ def cli(inline_args: Optional[List[str]] = None) -> int:
|
|
217
226
|
ensure_ascii = True
|
218
227
|
|
219
228
|
try:
|
220
|
-
|
221
|
-
|
229
|
+
# Use from_file if a filename is provided; otherwise read from stdin.
|
230
|
+
if args.filename:
|
231
|
+
result = from_file(args.filename)
|
232
|
+
else:
|
233
|
+
data = sys.stdin.read()
|
234
|
+
result = loads(data)
|
222
235
|
if args.inline or args.output:
|
223
236
|
with open(args.output or args.filename, mode="w") as fd:
|
224
237
|
json.dump(result, fd, indent=args.indent, ensure_ascii=ensure_ascii)
|
@@ -3,6 +3,7 @@ from unittest.mock import patch
|
|
3
3
|
import os.path
|
4
4
|
import pathlib
|
5
5
|
import tempfile
|
6
|
+
import io
|
6
7
|
|
7
8
|
def test_basic_types_valid():
|
8
9
|
assert repair_json("True", return_objects=True) == ""
|
@@ -124,6 +125,7 @@ def test_array_edge_cases():
|
|
124
125
|
assert repair_json('{"key": ["value]}') == '{"key": ["value"]}'
|
125
126
|
assert repair_json('["lorem "ipsum" sic"]') == '["lorem \\"ipsum\\" sic"]'
|
126
127
|
assert repair_json('{"key1": ["value1", "value2"}, "key2": ["value3", "value4"]}') == '{"key1": ["value1", "value2"], "key2": ["value3", "value4"]}'
|
128
|
+
assert repair_json('[ "value", /* comment */ "value2" ]') == '["value", "value2"]'
|
127
129
|
|
128
130
|
def test_escaping():
|
129
131
|
assert repair_json("'\"'") == '""'
|
@@ -158,6 +160,9 @@ def test_object_edge_cases():
|
|
158
160
|
assert repair_json('{"key:"value"}') == '{"key": "value"}'
|
159
161
|
assert repair_json('{"key:value}') == '{"key": "value"}'
|
160
162
|
assert repair_json('[{"lorem": {"ipsum": "sic"}, """" "lorem": {"ipsum": "sic"}]') == '[{"lorem": {"ipsum": "sic"}}, {"lorem": {"ipsum": "sic"}}]'
|
163
|
+
assert repair_json('{ "key": { "key2": "value2" // comment }, "key3": "value3" }') == '{"key": {"key2": "value2"}, "key3": "value3"}'
|
164
|
+
assert repair_json('{ "key": { "key2": "value2" # comment }, "key3": "value3" }') == '{"key": {"key2": "value2"}, "key3": "value3"}'
|
165
|
+
assert repair_json('{ "key": { "key2": "value2" /* comment */ }, "key3": "value3" }') == '{"key": {"key2": "value2"}, "key3": "value3"}'
|
161
166
|
|
162
167
|
def test_number_edge_cases():
|
163
168
|
assert repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'
|
@@ -313,18 +318,13 @@ def test_cli(capsys):
|
|
313
318
|
# Clean up - delete the temporary file
|
314
319
|
os.remove(temp_path)
|
315
320
|
os.remove(tempout_path)
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
with patch('sys.argv', ['json_repair', sample_json_file, '-o', str(output_file)]):
|
327
|
-
cli()
|
328
|
-
with open(output_file, 'r') as f:
|
329
|
-
assert json.load(f) == {"key": "value"}
|
330
|
-
"""
|
321
|
+
|
322
|
+
# Prepare a JSON string that needs to be repaired.
|
323
|
+
test_input = "{key:value"
|
324
|
+
# Expected output when running cli with --indent 0.
|
325
|
+
expected_output = '{\n"key": "value"\n}\n'
|
326
|
+
# Patch sys.stdin so that cli() reads from it instead of a file.
|
327
|
+
with patch('sys.stdin', new=io.StringIO(test_input)):
|
328
|
+
cli(inline_args=['--indent', 0])
|
329
|
+
captured = capsys.readouterr()
|
330
|
+
assert captured.out == expected_output
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|