json-repair 0.6.1__tar.gz → 0.6.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {json_repair-0.6.1/src/json_repair.egg-info → json_repair-0.6.2}/PKG-INFO +1 -1
- {json_repair-0.6.1 → json_repair-0.6.2}/pyproject.toml +1 -1
- {json_repair-0.6.1 → json_repair-0.6.2}/src/json_repair/json_repair.py +8 -22
- {json_repair-0.6.1 → json_repair-0.6.2/src/json_repair.egg-info}/PKG-INFO +1 -1
- {json_repair-0.6.1 → json_repair-0.6.2}/tests/test_json_repair.py +3 -2
- {json_repair-0.6.1 → json_repair-0.6.2}/tests/test_performance.py +4 -4
- {json_repair-0.6.1 → json_repair-0.6.2}/LICENSE +0 -0
- {json_repair-0.6.1 → json_repair-0.6.2}/README.md +0 -0
- {json_repair-0.6.1 → json_repair-0.6.2}/setup.cfg +0 -0
- {json_repair-0.6.1 → json_repair-0.6.2}/src/json_repair/__init__.py +0 -0
- {json_repair-0.6.1 → json_repair-0.6.2}/src/json_repair.egg-info/SOURCES.txt +0 -0
- {json_repair-0.6.1 → json_repair-0.6.2}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.6.1 → json_repair-0.6.2}/src/json_repair.egg-info/top_level.txt +0 -0
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.6.
|
6
|
+
version = "0.6.2"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -207,17 +207,18 @@ class JSONParser:
|
|
207
207
|
if fixed_quotes:
|
208
208
|
if self.context == "object_key" and (char == ":" or char.isspace()):
|
209
209
|
break
|
210
|
-
elif self.context == "object_value" and
|
210
|
+
elif self.context == "object_value" and char in [",", "}"]:
|
211
211
|
break
|
212
212
|
self.index += 1
|
213
213
|
char = self.get_char_at()
|
214
214
|
# ChatGPT sometimes forget to quote links in markdown like: { "content": "[LINK]("https://google.com")" }
|
215
215
|
if (
|
216
216
|
char == string_terminator
|
217
|
-
|
217
|
+
# Next character is not a comma
|
218
|
+
and self.get_char_at(1) != ","
|
218
219
|
and (
|
219
220
|
fix_broken_markdown_link
|
220
|
-
or (self.
|
221
|
+
or (self.get_char_at(-2) == "]" and self.get_char_at(-1)) == "("
|
221
222
|
)
|
222
223
|
):
|
223
224
|
fix_broken_markdown_link = not fix_broken_markdown_link
|
@@ -239,7 +240,7 @@ class JSONParser:
|
|
239
240
|
|
240
241
|
return self.json_str[start:end]
|
241
242
|
|
242
|
-
def parse_number(self) -> Union[float, int]:
|
243
|
+
def parse_number(self) -> Union[float, int, str]:
|
243
244
|
# <number> is a valid real number expressed in one of a number of given formats
|
244
245
|
number_str = ""
|
245
246
|
number_chars = set("0123456789-.eE")
|
@@ -257,7 +258,7 @@ class JSONParser:
|
|
257
258
|
# This is a string then
|
258
259
|
return self.parse_string()
|
259
260
|
|
260
|
-
def parse_boolean_or_null(self) -> Union[bool, None]:
|
261
|
+
def parse_boolean_or_null(self) -> Union[bool, str, None]:
|
261
262
|
# <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
|
262
263
|
boolean_map = {"true": (True, 4), "false": (False, 5), "null": (None, 4)}
|
263
264
|
for key, (value, length) in boolean_map.items():
|
@@ -272,21 +273,7 @@ class JSONParser:
|
|
272
273
|
self.json_str = self.json_str[: self.index] + char + self.json_str[self.index :]
|
273
274
|
self.index += 1
|
274
275
|
|
275
|
-
def get_char_at(self) -> Union[str, bool]:
|
276
|
-
# Why not use something simpler? Because we might be out of bounds and doing this check all the time is annoying
|
277
|
-
try:
|
278
|
-
return self.json_str[self.index]
|
279
|
-
except IndexError:
|
280
|
-
return False
|
281
|
-
|
282
|
-
def get_prev_char(self, count=1):
|
283
|
-
# Why not use something simpler? Because we might be out of bounds and doing this check all the time is annoying
|
284
|
-
try:
|
285
|
-
return self.json_str[self.index - count]
|
286
|
-
except IndexError:
|
287
|
-
return False
|
288
|
-
|
289
|
-
def get_next_char(self, count=1):
|
276
|
+
def get_char_at(self, count: int = 0) -> Union[str, bool]:
|
290
277
|
# Why not use something simpler? Because we might be out of bounds and doing this check all the time is annoying
|
291
278
|
try:
|
292
279
|
return self.json_str[self.index + count]
|
@@ -320,8 +307,7 @@ def repair_json(
|
|
320
307
|
It will return the fixed string by default.
|
321
308
|
When `return_objects=True` is passed, it will return the decoded data structure instead.
|
322
309
|
"""
|
323
|
-
json_str =
|
324
|
-
json_str = re.sub(r"\s+$", "", json_str)
|
310
|
+
json_str = json_str.strip()
|
325
311
|
json_str = re.sub(r"/\*.*?\*/", "", json_str)
|
326
312
|
parser = JSONParser(json_str)
|
327
313
|
if skip_json_loads:
|
@@ -7,8 +7,9 @@ def test_repair_json():
|
|
7
7
|
assert repair_json("{}") == "{}"
|
8
8
|
assert repair_json("\"") == '""'
|
9
9
|
assert repair_json("\n") == '""'
|
10
|
-
assert repair_json(
|
11
|
-
assert repair_json('{"key":
|
10
|
+
assert repair_json(" /* COMMENT */ ") == '""'
|
11
|
+
assert repair_json(' {"key": true, "key2": false, "key3": null}') == '{"key": true, "key2": false, "key3": null}'
|
12
|
+
assert repair_json('{"key": TRUE, "key2": FALSE, "key3": Null} ') == '{"key": true, "key2": false, "key3": null}'
|
12
13
|
assert repair_json("{'key': 'string', 'key2': false, \"key3\": null, \"key4\": unquoted}") == '{"key": "string", "key2": false, "key3": null, "key4": "unquoted"}'
|
13
14
|
assert (
|
14
15
|
repair_json('{"name": "John", "age": 30, "city": "New York"}')
|
@@ -282,7 +282,7 @@ def test_true_true(benchmark):
|
|
282
282
|
mean_time = benchmark.stats.get("median")
|
283
283
|
|
284
284
|
# Define your time threshold in seconds (100ms in this case)
|
285
|
-
max_time =
|
285
|
+
max_time = 0.7 / 10 ** 6 # 0.7 microsecond
|
286
286
|
|
287
287
|
# Assert that the average time is below the threshold
|
288
288
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -293,7 +293,7 @@ def test_true_false(benchmark):
|
|
293
293
|
mean_time = benchmark.stats.get("median")
|
294
294
|
|
295
295
|
# Define your time threshold in seconds (100ms in this case)
|
296
|
-
max_time =
|
296
|
+
max_time = 24 * (1 / 10 ** 6) # 24 microsecond
|
297
297
|
|
298
298
|
# Assert that the average time is below the threshold
|
299
299
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -304,7 +304,7 @@ def test_false_true(benchmark):
|
|
304
304
|
mean_time = benchmark.stats.get("median")
|
305
305
|
|
306
306
|
# Define your time threshold in seconds (ms in this case)
|
307
|
-
max_time =
|
307
|
+
max_time = 900 / 10 ** 4 # 0.9 millisecond
|
308
308
|
|
309
309
|
# Assert that the average time is below the threshold
|
310
310
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -315,7 +315,7 @@ def test_false_false(benchmark):
|
|
315
315
|
mean_time = benchmark.stats.get("median")
|
316
316
|
|
317
317
|
# Define your time threshold in seconds (100ms in this case)
|
318
|
-
max_time =
|
318
|
+
max_time = 55 / 10 ** 6 # 56 microsecond
|
319
319
|
|
320
320
|
# Assert that the average time is below the threshold
|
321
321
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|