json-repair 0.46.2__py3-none-any.whl → 0.47.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_parser.py +14 -4
- json_repair/json_repair.py +4 -4
- {json_repair-0.46.2.dist-info → json_repair-0.47.1.dist-info}/METADATA +5 -1
- {json_repair-0.46.2.dist-info → json_repair-0.47.1.dist-info}/RECORD +8 -8
- {json_repair-0.46.2.dist-info → json_repair-0.47.1.dist-info}/WHEEL +0 -0
- {json_repair-0.46.2.dist-info → json_repair-0.47.1.dist-info}/entry_points.txt +0 -0
- {json_repair-0.46.2.dist-info → json_repair-0.47.1.dist-info}/licenses/LICENSE +0 -0
- {json_repair-0.46.2.dist-info → json_repair-0.47.1.dist-info}/top_level.txt +0 -0
json_repair/json_parser.py
CHANGED
@@ -365,13 +365,14 @@ class JSONParser:
|
|
365
365
|
)
|
366
366
|
break
|
367
367
|
if (
|
368
|
-
|
368
|
+
not self.stream_stable
|
369
369
|
and self.context.current == ContextValues.OBJECT_VALUE
|
370
370
|
and char
|
371
371
|
in [
|
372
372
|
",",
|
373
373
|
"}",
|
374
374
|
]
|
375
|
+
and string_acc[-1] != rstring_delimiter
|
375
376
|
):
|
376
377
|
rstring_delimiter_missing = True
|
377
378
|
# check if this is a case in which the closing comma is NOT missing instead
|
@@ -434,9 +435,10 @@ class JSONParser:
|
|
434
435
|
)
|
435
436
|
break
|
436
437
|
if (
|
437
|
-
|
438
|
+
not self.stream_stable
|
438
439
|
and char == "]"
|
439
440
|
and ContextValues.ARRAY in self.context.context
|
441
|
+
and string_acc[-1] != rstring_delimiter
|
440
442
|
):
|
441
443
|
# We found the end of an array and we are in array context
|
442
444
|
# So let's check if we find a rstring_delimiter forward otherwise end early
|
@@ -456,9 +458,17 @@ class JSONParser:
|
|
456
458
|
if char in [rstring_delimiter, "t", "n", "r", "b", "\\"]:
|
457
459
|
string_acc = string_acc[:-1]
|
458
460
|
escape_seqs = {"t": "\t", "n": "\n", "r": "\r", "b": "\b"}
|
459
|
-
string_acc += escape_seqs.get(char, char)
|
461
|
+
string_acc += escape_seqs.get(char, char)
|
460
462
|
self.index += 1
|
461
463
|
char = self.get_char_at()
|
464
|
+
while char and string_acc[-1] == "\\" and char in [rstring_delimiter, "\\"]:
|
465
|
+
# this is a bit of a special case, if I don't do this it will close the loop or create a train of \\
|
466
|
+
# I don't love it though
|
467
|
+
string_acc = string_acc[:-1]
|
468
|
+
string_acc += char
|
469
|
+
self.index += 1
|
470
|
+
char = self.get_char_at()
|
471
|
+
continue
|
462
472
|
elif char in ["u", "x"]:
|
463
473
|
# If we find a unicode escape sequence, normalize it
|
464
474
|
num_chars = 4 if char == "u" else 2
|
@@ -499,7 +509,7 @@ class JSONParser:
|
|
499
509
|
)
|
500
510
|
break
|
501
511
|
# ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
|
502
|
-
if char == rstring_delimiter:
|
512
|
+
if char == rstring_delimiter and string_acc[-1] != "\\":
|
503
513
|
# Special case here, in case of double quotes one after another
|
504
514
|
if doubled_quotes and self.get_char_at(1) == rstring_delimiter:
|
505
515
|
self.log("While parsing a string, we found a doubled quote, ignoring it")
|
json_repair/json_repair.py
CHANGED
@@ -37,9 +37,9 @@ def repair_json(
|
|
37
37
|
skip_json_loads: bool = False,
|
38
38
|
logging: bool = False,
|
39
39
|
json_fd: TextIO | None = None,
|
40
|
-
ensure_ascii: bool = True,
|
41
40
|
chunk_length: int = 0,
|
42
41
|
stream_stable: bool = False,
|
42
|
+
**json_dumps_args,
|
43
43
|
) -> str: ...
|
44
44
|
|
45
45
|
|
@@ -50,9 +50,9 @@ def repair_json(
|
|
50
50
|
skip_json_loads: bool = False,
|
51
51
|
logging: bool = False,
|
52
52
|
json_fd: TextIO | None = None,
|
53
|
-
ensure_ascii: bool = True,
|
54
53
|
chunk_length: int = 0,
|
55
54
|
stream_stable: bool = False,
|
55
|
+
**json_dumps_args,
|
56
56
|
) -> JSONReturnType | tuple[JSONReturnType, list[dict[str, str]]]: ...
|
57
57
|
|
58
58
|
|
@@ -62,9 +62,9 @@ def repair_json(
|
|
62
62
|
skip_json_loads: bool = False,
|
63
63
|
logging: bool = False,
|
64
64
|
json_fd: TextIO | None = None,
|
65
|
-
ensure_ascii: bool = True,
|
66
65
|
chunk_length: int = 0,
|
67
66
|
stream_stable: bool = False,
|
67
|
+
**json_dumps_args,
|
68
68
|
) -> JSONReturnType | tuple[JSONReturnType, list[dict[str, str]]]:
|
69
69
|
"""
|
70
70
|
Given a json formatted string, it will try to decode it and, if it fails, it will try to fix it.
|
@@ -96,7 +96,7 @@ def repair_json(
|
|
96
96
|
# Avoid returning only a pair of quotes if it's an empty string
|
97
97
|
elif parsed_json == "":
|
98
98
|
return ""
|
99
|
-
return json.dumps(parsed_json,
|
99
|
+
return json.dumps(parsed_json, **json_dumps_args)
|
100
100
|
|
101
101
|
|
102
102
|
def loads(
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.47.1
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -175,6 +175,10 @@ will return
|
|
175
175
|
|
176
176
|
{"test_chinese_ascii": "统一码"}
|
177
177
|
|
178
|
+
### JSON dumps parameters
|
179
|
+
|
180
|
+
More in general, `repair_json` will accept all parameters that `json.dumps` accepts and just pass them through (for example indent)
|
181
|
+
|
178
182
|
### Performance considerations
|
179
183
|
If you find this library too slow because is using `json.loads()` you can skip that by passing `skip_json_loads=True` to `repair_json`. Like:
|
180
184
|
|
@@ -1,14 +1,14 @@
|
|
1
1
|
json_repair/__init__.py,sha256=6FDD6dEVM5Pb5o4Zodgw4ex30Hzy-YvNRy0vts9SQ4I,118
|
2
2
|
json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
|
3
3
|
json_repair/json_context.py,sha256=WsMOjqpGSr6aaDONcrk8UFtTurzWon2Qq9AoBBYseoI,934
|
4
|
-
json_repair/json_parser.py,sha256=
|
5
|
-
json_repair/json_repair.py,sha256=
|
4
|
+
json_repair/json_parser.py,sha256=YBi07AfBGoZ54locsc6j1Y7WfdretFzmt0wXDEWwRo8,40321
|
5
|
+
json_repair/json_repair.py,sha256=0qL2LuzlNJa3VnEqYNaJyZNAL2w18oAt2YvA-TlMxmY,11211
|
6
6
|
json_repair/object_comparer.py,sha256=LlIF0MisRglzC-CiG5AxAEDCBWBHeJd-6uXYx0uRmCk,1175
|
7
7
|
json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
8
|
json_repair/string_file_wrapper.py,sha256=tGkWBEUPE-CZPf4uSM5NE9oSDTpskX0myJiXsl-gbds,4333
|
9
|
-
json_repair-0.
|
10
|
-
json_repair-0.
|
11
|
-
json_repair-0.
|
12
|
-
json_repair-0.
|
13
|
-
json_repair-0.
|
14
|
-
json_repair-0.
|
9
|
+
json_repair-0.47.1.dist-info/licenses/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
10
|
+
json_repair-0.47.1.dist-info/METADATA,sha256=AgW6QikkfasSzZk-AplVvSkixKfeh4aEUx1UPFFHWmA,12368
|
11
|
+
json_repair-0.47.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
12
|
+
json_repair-0.47.1.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
|
13
|
+
json_repair-0.47.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
14
|
+
json_repair-0.47.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|