json-repair 0.42.0__py3-none-any.whl → 0.44.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_parser.py +62 -12
- json_repair/json_repair.py +5 -2
- {json_repair-0.42.0.dist-info → json_repair-0.44.0.dist-info}/METADATA +9 -1
- {json_repair-0.42.0.dist-info → json_repair-0.44.0.dist-info}/RECORD +8 -8
- {json_repair-0.42.0.dist-info → json_repair-0.44.0.dist-info}/WHEEL +1 -1
- {json_repair-0.42.0.dist-info → json_repair-0.44.0.dist-info}/entry_points.txt +0 -0
- {json_repair-0.42.0.dist-info → json_repair-0.44.0.dist-info}/licenses/LICENSE +0 -0
- {json_repair-0.42.0.dist-info → json_repair-0.44.0.dist-info}/top_level.txt +0 -0
json_repair/json_parser.py
CHANGED
@@ -17,6 +17,7 @@ class JSONParser:
|
|
17
17
|
json_fd: Optional[TextIO],
|
18
18
|
logging: Optional[bool],
|
19
19
|
json_fd_chunk_length: int = 0,
|
20
|
+
stream_stable: bool = False,
|
20
21
|
) -> None:
|
21
22
|
# The string to parse
|
22
23
|
self.json_str: Union[str, StringFileWrapper] = json_str
|
@@ -40,6 +41,14 @@ class JSONParser:
|
|
40
41
|
else:
|
41
42
|
# No-op
|
42
43
|
self.log = lambda *args, **kwargs: None
|
44
|
+
# When the json to be repaired is the accumulation of streaming json at a certain moment.
|
45
|
+
# e.g. json obtained from llm response.
|
46
|
+
# If this parameter to True will keep the repair results stable. For example:
|
47
|
+
# case 1: '{"key": "val\\' => '{"key": "val"}'
|
48
|
+
# case 2: '{"key": "val\\n' => '{"key": "val\\n"}'
|
49
|
+
# case 3: '{"key": "val\\n123,`key2:value2' => '{"key": "val\\n123,`key2:value2"}'
|
50
|
+
# case 4: '{"key": "val\\n123,`key2:value2`"}' => '{"key": "val\\n123,`key2:value2`"}'
|
51
|
+
self.stream_stable = stream_stable
|
43
52
|
|
44
53
|
def parse(
|
45
54
|
self,
|
@@ -113,7 +122,7 @@ class JSONParser:
|
|
113
122
|
|
114
123
|
def parse_object(self) -> Dict[str, JSONReturnType]:
|
115
124
|
# <object> ::= '{' [ <member> *(', ' <member>) ] '}' ; A sequence of 'members'
|
116
|
-
obj = {}
|
125
|
+
obj: Dict[str, JSONReturnType] = {}
|
117
126
|
# Stop when you either find the closing parentheses or you have iterated over the entire string
|
118
127
|
while (self.get_char_at() or "}") != "}":
|
119
128
|
# This is what we expect to find:
|
@@ -141,6 +150,31 @@ class JSONParser:
|
|
141
150
|
while self.get_char_at():
|
142
151
|
# The rollback index needs to be updated here in case the key is empty
|
143
152
|
rollback_index = self.index
|
153
|
+
if self.get_char_at() == "[" and key == "":
|
154
|
+
# Is this an array?
|
155
|
+
# Need to check if the previous parsed value contained in obj is an array and in that case parse and merge the two
|
156
|
+
prev_key = list(obj.keys())[-1] if obj else None
|
157
|
+
if prev_key and isinstance(obj[prev_key], list):
|
158
|
+
# If the previous key's value is an array, parse the new array and merge
|
159
|
+
self.index += 1
|
160
|
+
new_array = self.parse_array()
|
161
|
+
if isinstance(new_array, list):
|
162
|
+
# Merge and flatten the arrays
|
163
|
+
prev_value = obj[prev_key]
|
164
|
+
if isinstance(prev_value, list):
|
165
|
+
prev_value.extend(
|
166
|
+
new_array[0]
|
167
|
+
if len(new_array) == 1
|
168
|
+
and isinstance(new_array[0], list)
|
169
|
+
else new_array
|
170
|
+
)
|
171
|
+
self.skip_whitespaces_at()
|
172
|
+
if self.get_char_at() == ",":
|
173
|
+
self.index += 1
|
174
|
+
self.skip_whitespaces_at()
|
175
|
+
continue
|
176
|
+
else:
|
177
|
+
self.index = rollback_index
|
144
178
|
key = str(self.parse_string())
|
145
179
|
if key == "":
|
146
180
|
self.skip_whitespaces_at()
|
@@ -216,7 +250,7 @@ class JSONParser:
|
|
216
250
|
|
217
251
|
# skip over whitespace after a value but before closing ]
|
218
252
|
char = self.get_char_at()
|
219
|
-
while char and (char.isspace() or char == ","):
|
253
|
+
while char and char != "]" and (char.isspace() or char == ","):
|
220
254
|
self.index += 1
|
221
255
|
char = self.get_char_at()
|
222
256
|
|
@@ -353,10 +387,15 @@ class JSONParser:
|
|
353
387
|
"While parsing a string missing the left delimiter in object key context, we found a :, stopping here",
|
354
388
|
)
|
355
389
|
break
|
356
|
-
if
|
357
|
-
|
358
|
-
|
359
|
-
|
390
|
+
if (
|
391
|
+
(missing_quotes or not self.stream_stable)
|
392
|
+
and self.context.current == ContextValues.OBJECT_VALUE
|
393
|
+
and char
|
394
|
+
in [
|
395
|
+
",",
|
396
|
+
"}",
|
397
|
+
]
|
398
|
+
):
|
360
399
|
rstring_delimiter_missing = True
|
361
400
|
# check if this is a case in which the closing comma is NOT missing instead
|
362
401
|
i = self.skip_to_character(character=rstring_delimiter, idx=1)
|
@@ -425,7 +464,11 @@ class JSONParser:
|
|
425
464
|
"While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn't determine that a right delimiter was present. Stopping here",
|
426
465
|
)
|
427
466
|
break
|
428
|
-
if
|
467
|
+
if (
|
468
|
+
(missing_quotes or not self.stream_stable)
|
469
|
+
and char == "]"
|
470
|
+
and ContextValues.ARRAY in self.context.context
|
471
|
+
):
|
429
472
|
# We found the end of an array and we are in array context
|
430
473
|
# So let's check if we find a rstring_delimiter forward otherwise end early
|
431
474
|
i = self.skip_to_character(rstring_delimiter)
|
@@ -435,6 +478,9 @@ class JSONParser:
|
|
435
478
|
string_acc += char
|
436
479
|
self.index += 1
|
437
480
|
char = self.get_char_at()
|
481
|
+
# Unclosed string ends with a \ character. This character is ignored if stream_stable = True.
|
482
|
+
if self.stream_stable and not char and string_acc[-1] == "\\":
|
483
|
+
string_acc = string_acc[:-1]
|
438
484
|
if char and string_acc[-1] == "\\":
|
439
485
|
# This is a special case, if people use real strings this might happen
|
440
486
|
self.log("Found a stray escape sequence, normalizing it")
|
@@ -644,14 +690,18 @@ class JSONParser:
|
|
644
690
|
# A fallout of the previous special case in the while loop,
|
645
691
|
# we need to update the index only if we had a closing quote
|
646
692
|
if char != rstring_delimiter:
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
693
|
+
# if stream_stable = True, unclosed strings do not trim trailing whitespace characters
|
694
|
+
if not self.stream_stable:
|
695
|
+
self.log(
|
696
|
+
"While parsing a string, we missed the closing quote, ignoring",
|
697
|
+
)
|
698
|
+
string_acc = string_acc.rstrip()
|
651
699
|
else:
|
652
700
|
self.index += 1
|
653
701
|
|
654
|
-
if
|
702
|
+
if not self.stream_stable and (
|
703
|
+
missing_quotes or (string_acc and string_acc[-1] == "\n")
|
704
|
+
):
|
655
705
|
# Clean the whitespaces for some corner cases
|
656
706
|
string_acc = string_acc.rstrip()
|
657
707
|
|
json_repair/json_repair.py
CHANGED
@@ -38,6 +38,7 @@ def repair_json(
|
|
38
38
|
json_fd: Optional[TextIO] = None,
|
39
39
|
ensure_ascii: bool = True,
|
40
40
|
chunk_length: int = 0,
|
41
|
+
stream_stable: bool = False,
|
41
42
|
) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
|
42
43
|
"""
|
43
44
|
Given a json formatted string, it will try to decode it and, if it fails, it will try to fix it.
|
@@ -50,11 +51,11 @@ def repair_json(
|
|
50
51
|
json_fd (Optional[TextIO], optional): File descriptor for JSON input. Do not use! Use `from_file` or `load` instead. Defaults to None.
|
51
52
|
ensure_ascii (bool, optional): Set to False to avoid converting non-latin characters to ascii (for example when using chinese characters). Defaults to True. Ignored if `skip_json_loads` is True.
|
52
53
|
chunk_length (int, optional): Size in bytes of the file chunks to read at once. Ignored if `json_fd` is None. Do not use! Use `from_file` or `load` instead. Defaults to 1MB.
|
53
|
-
|
54
|
+
stream_stable (bool, optional): When the json to be repaired is the accumulation of streaming json at a certain moment.If this parameter to True will keep the repair results stable.
|
54
55
|
Returns:
|
55
56
|
Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: The repaired JSON or a tuple with the repaired JSON and repair log.
|
56
57
|
"""
|
57
|
-
parser = JSONParser(json_str, json_fd, logging, chunk_length)
|
58
|
+
parser = JSONParser(json_str, json_fd, logging, chunk_length, stream_stable)
|
58
59
|
if skip_json_loads:
|
59
60
|
parsed_json = parser.parse()
|
60
61
|
else:
|
@@ -76,6 +77,7 @@ def loads(
|
|
76
77
|
json_str: str,
|
77
78
|
skip_json_loads: bool = False,
|
78
79
|
logging: bool = False,
|
80
|
+
stream_stable: bool = False,
|
79
81
|
) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
|
80
82
|
"""
|
81
83
|
This function works like `json.loads()` except that it will fix your JSON in the process.
|
@@ -94,6 +96,7 @@ def loads(
|
|
94
96
|
return_objects=True,
|
95
97
|
skip_json_loads=skip_json_loads,
|
96
98
|
logging=logging,
|
99
|
+
stream_stable=stream_stable,
|
97
100
|
)
|
98
101
|
|
99
102
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.44.0
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -188,6 +188,14 @@ Some rules of thumb to use:
|
|
188
188
|
- `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
|
189
189
|
- If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
|
190
190
|
|
191
|
+
### Use json_repair with streaming
|
192
|
+
|
193
|
+
Sometimes you are streaming some data and want to repair the JSON coming from it. Normally this won't work but you can pass `stream_stable` to `repair_json()` or `loads()` to make it work:
|
194
|
+
|
195
|
+
```
|
196
|
+
stream_output = repair_json(stream_input, stream_stable=True)
|
197
|
+
```
|
198
|
+
|
191
199
|
### Use json_repair from CLI
|
192
200
|
|
193
201
|
Install the library for command-line with:
|
@@ -1,14 +1,14 @@
|
|
1
1
|
json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
|
2
2
|
json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
|
3
3
|
json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
|
4
|
-
json_repair/json_parser.py,sha256=
|
5
|
-
json_repair/json_repair.py,sha256=
|
4
|
+
json_repair/json_parser.py,sha256=ID60F0RMzaCpeHPkZbuidJcsmrVBiPmQDRUOgjoeedE,41972
|
5
|
+
json_repair/json_repair.py,sha256=o84um759Alft7mlj7lXZFtPQZQPjbo5Jxraa7dTdiRg,10621
|
6
6
|
json_repair/object_comparer.py,sha256=SeicB6_N4BHAEPon7s2BELEaJc4oyR9ZhfX2RgPk6Bw,1682
|
7
7
|
json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
8
|
json_repair/string_file_wrapper.py,sha256=koZmdq2-Z5K7XF1bDqX6dEbNaVMJYcMTjq-aGe6NQvA,4526
|
9
|
-
json_repair-0.
|
10
|
-
json_repair-0.
|
11
|
-
json_repair-0.
|
12
|
-
json_repair-0.
|
13
|
-
json_repair-0.
|
14
|
-
json_repair-0.
|
9
|
+
json_repair-0.44.0.dist-info/licenses/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
10
|
+
json_repair-0.44.0.dist-info/METADATA,sha256=mu_r9oiyo_35hwk745ZTFoMZrJ9PBjRPjFKgICkKSZQ,12157
|
11
|
+
json_repair-0.44.0.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
|
12
|
+
json_repair-0.44.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
|
13
|
+
json_repair-0.44.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
14
|
+
json_repair-0.44.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|