json-repair 0.43.0__py3-none-any.whl → 0.44.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_parser.py +39 -10
- json_repair/json_repair.py +5 -2
- {json_repair-0.43.0.dist-info → json_repair-0.44.0.dist-info}/METADATA +9 -1
- {json_repair-0.43.0.dist-info → json_repair-0.44.0.dist-info}/RECORD +8 -8
- {json_repair-0.43.0.dist-info → json_repair-0.44.0.dist-info}/WHEEL +0 -0
- {json_repair-0.43.0.dist-info → json_repair-0.44.0.dist-info}/entry_points.txt +0 -0
- {json_repair-0.43.0.dist-info → json_repair-0.44.0.dist-info}/licenses/LICENSE +0 -0
- {json_repair-0.43.0.dist-info → json_repair-0.44.0.dist-info}/top_level.txt +0 -0
json_repair/json_parser.py
CHANGED
@@ -17,6 +17,7 @@ class JSONParser:
|
|
17
17
|
json_fd: Optional[TextIO],
|
18
18
|
logging: Optional[bool],
|
19
19
|
json_fd_chunk_length: int = 0,
|
20
|
+
stream_stable: bool = False,
|
20
21
|
) -> None:
|
21
22
|
# The string to parse
|
22
23
|
self.json_str: Union[str, StringFileWrapper] = json_str
|
@@ -40,6 +41,14 @@ class JSONParser:
|
|
40
41
|
else:
|
41
42
|
# No-op
|
42
43
|
self.log = lambda *args, **kwargs: None
|
44
|
+
# When the json to be repaired is the accumulation of streaming json at a certain moment.
|
45
|
+
# e.g. json obtained from llm response.
|
46
|
+
# If this parameter to True will keep the repair results stable. For example:
|
47
|
+
# case 1: '{"key": "val\\' => '{"key": "val"}'
|
48
|
+
# case 2: '{"key": "val\\n' => '{"key": "val\\n"}'
|
49
|
+
# case 3: '{"key": "val\\n123,`key2:value2' => '{"key": "val\\n123,`key2:value2"}'
|
50
|
+
# case 4: '{"key": "val\\n123,`key2:value2`"}' => '{"key": "val\\n123,`key2:value2`"}'
|
51
|
+
self.stream_stable = stream_stable
|
43
52
|
|
44
53
|
def parse(
|
45
54
|
self,
|
@@ -159,6 +168,10 @@ class JSONParser:
|
|
159
168
|
and isinstance(new_array[0], list)
|
160
169
|
else new_array
|
161
170
|
)
|
171
|
+
self.skip_whitespaces_at()
|
172
|
+
if self.get_char_at() == ",":
|
173
|
+
self.index += 1
|
174
|
+
self.skip_whitespaces_at()
|
162
175
|
continue
|
163
176
|
else:
|
164
177
|
self.index = rollback_index
|
@@ -374,10 +387,15 @@ class JSONParser:
|
|
374
387
|
"While parsing a string missing the left delimiter in object key context, we found a :, stopping here",
|
375
388
|
)
|
376
389
|
break
|
377
|
-
if
|
378
|
-
|
379
|
-
|
380
|
-
|
390
|
+
if (
|
391
|
+
(missing_quotes or not self.stream_stable)
|
392
|
+
and self.context.current == ContextValues.OBJECT_VALUE
|
393
|
+
and char
|
394
|
+
in [
|
395
|
+
",",
|
396
|
+
"}",
|
397
|
+
]
|
398
|
+
):
|
381
399
|
rstring_delimiter_missing = True
|
382
400
|
# check if this is a case in which the closing comma is NOT missing instead
|
383
401
|
i = self.skip_to_character(character=rstring_delimiter, idx=1)
|
@@ -446,7 +464,11 @@ class JSONParser:
|
|
446
464
|
"While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn't determine that a right delimiter was present. Stopping here",
|
447
465
|
)
|
448
466
|
break
|
449
|
-
if
|
467
|
+
if (
|
468
|
+
(missing_quotes or not self.stream_stable)
|
469
|
+
and char == "]"
|
470
|
+
and ContextValues.ARRAY in self.context.context
|
471
|
+
):
|
450
472
|
# We found the end of an array and we are in array context
|
451
473
|
# So let's check if we find a rstring_delimiter forward otherwise end early
|
452
474
|
i = self.skip_to_character(rstring_delimiter)
|
@@ -456,6 +478,9 @@ class JSONParser:
|
|
456
478
|
string_acc += char
|
457
479
|
self.index += 1
|
458
480
|
char = self.get_char_at()
|
481
|
+
# Unclosed string ends with a \ character. This character is ignored if stream_stable = True.
|
482
|
+
if self.stream_stable and not char and string_acc[-1] == "\\":
|
483
|
+
string_acc = string_acc[:-1]
|
459
484
|
if char and string_acc[-1] == "\\":
|
460
485
|
# This is a special case, if people use real strings this might happen
|
461
486
|
self.log("Found a stray escape sequence, normalizing it")
|
@@ -665,14 +690,18 @@ class JSONParser:
|
|
665
690
|
# A fallout of the previous special case in the while loop,
|
666
691
|
# we need to update the index only if we had a closing quote
|
667
692
|
if char != rstring_delimiter:
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
693
|
+
# if stream_stable = True, unclosed strings do not trim trailing whitespace characters
|
694
|
+
if not self.stream_stable:
|
695
|
+
self.log(
|
696
|
+
"While parsing a string, we missed the closing quote, ignoring",
|
697
|
+
)
|
698
|
+
string_acc = string_acc.rstrip()
|
672
699
|
else:
|
673
700
|
self.index += 1
|
674
701
|
|
675
|
-
if
|
702
|
+
if not self.stream_stable and (
|
703
|
+
missing_quotes or (string_acc and string_acc[-1] == "\n")
|
704
|
+
):
|
676
705
|
# Clean the whitespaces for some corner cases
|
677
706
|
string_acc = string_acc.rstrip()
|
678
707
|
|
json_repair/json_repair.py
CHANGED
@@ -38,6 +38,7 @@ def repair_json(
|
|
38
38
|
json_fd: Optional[TextIO] = None,
|
39
39
|
ensure_ascii: bool = True,
|
40
40
|
chunk_length: int = 0,
|
41
|
+
stream_stable: bool = False,
|
41
42
|
) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
|
42
43
|
"""
|
43
44
|
Given a json formatted string, it will try to decode it and, if it fails, it will try to fix it.
|
@@ -50,11 +51,11 @@ def repair_json(
|
|
50
51
|
json_fd (Optional[TextIO], optional): File descriptor for JSON input. Do not use! Use `from_file` or `load` instead. Defaults to None.
|
51
52
|
ensure_ascii (bool, optional): Set to False to avoid converting non-latin characters to ascii (for example when using chinese characters). Defaults to True. Ignored if `skip_json_loads` is True.
|
52
53
|
chunk_length (int, optional): Size in bytes of the file chunks to read at once. Ignored if `json_fd` is None. Do not use! Use `from_file` or `load` instead. Defaults to 1MB.
|
53
|
-
|
54
|
+
stream_stable (bool, optional): When the json to be repaired is the accumulation of streaming json at a certain moment.If this parameter to True will keep the repair results stable.
|
54
55
|
Returns:
|
55
56
|
Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: The repaired JSON or a tuple with the repaired JSON and repair log.
|
56
57
|
"""
|
57
|
-
parser = JSONParser(json_str, json_fd, logging, chunk_length)
|
58
|
+
parser = JSONParser(json_str, json_fd, logging, chunk_length, stream_stable)
|
58
59
|
if skip_json_loads:
|
59
60
|
parsed_json = parser.parse()
|
60
61
|
else:
|
@@ -76,6 +77,7 @@ def loads(
|
|
76
77
|
json_str: str,
|
77
78
|
skip_json_loads: bool = False,
|
78
79
|
logging: bool = False,
|
80
|
+
stream_stable: bool = False,
|
79
81
|
) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
|
80
82
|
"""
|
81
83
|
This function works like `json.loads()` except that it will fix your JSON in the process.
|
@@ -94,6 +96,7 @@ def loads(
|
|
94
96
|
return_objects=True,
|
95
97
|
skip_json_loads=skip_json_loads,
|
96
98
|
logging=logging,
|
99
|
+
stream_stable=stream_stable,
|
97
100
|
)
|
98
101
|
|
99
102
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.44.0
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -188,6 +188,14 @@ Some rules of thumb to use:
|
|
188
188
|
- `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
|
189
189
|
- If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
|
190
190
|
|
191
|
+
### Use json_repair with streaming
|
192
|
+
|
193
|
+
Sometimes you are streaming some data and want to repair the JSON coming from it. Normally this won't work but you can pass `stream_stable` to `repair_json()` or `loads()` to make it work:
|
194
|
+
|
195
|
+
```
|
196
|
+
stream_output = repair_json(stream_input, stream_stable=True)
|
197
|
+
```
|
198
|
+
|
191
199
|
### Use json_repair from CLI
|
192
200
|
|
193
201
|
Install the library for command-line with:
|
@@ -1,14 +1,14 @@
|
|
1
1
|
json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
|
2
2
|
json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
|
3
3
|
json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
|
4
|
-
json_repair/json_parser.py,sha256=
|
5
|
-
json_repair/json_repair.py,sha256=
|
4
|
+
json_repair/json_parser.py,sha256=ID60F0RMzaCpeHPkZbuidJcsmrVBiPmQDRUOgjoeedE,41972
|
5
|
+
json_repair/json_repair.py,sha256=o84um759Alft7mlj7lXZFtPQZQPjbo5Jxraa7dTdiRg,10621
|
6
6
|
json_repair/object_comparer.py,sha256=SeicB6_N4BHAEPon7s2BELEaJc4oyR9ZhfX2RgPk6Bw,1682
|
7
7
|
json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
8
|
json_repair/string_file_wrapper.py,sha256=koZmdq2-Z5K7XF1bDqX6dEbNaVMJYcMTjq-aGe6NQvA,4526
|
9
|
-
json_repair-0.
|
10
|
-
json_repair-0.
|
11
|
-
json_repair-0.
|
12
|
-
json_repair-0.
|
13
|
-
json_repair-0.
|
14
|
-
json_repair-0.
|
9
|
+
json_repair-0.44.0.dist-info/licenses/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
10
|
+
json_repair-0.44.0.dist-info/METADATA,sha256=mu_r9oiyo_35hwk745ZTFoMZrJ9PBjRPjFKgICkKSZQ,12157
|
11
|
+
json_repair-0.44.0.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
|
12
|
+
json_repair-0.44.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
|
13
|
+
json_repair-0.44.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
14
|
+
json_repair-0.44.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|