json-repair 0.42.0__py3-none-any.whl → 0.44.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,6 +17,7 @@ class JSONParser:
17
17
  json_fd: Optional[TextIO],
18
18
  logging: Optional[bool],
19
19
  json_fd_chunk_length: int = 0,
20
+ stream_stable: bool = False,
20
21
  ) -> None:
21
22
  # The string to parse
22
23
  self.json_str: Union[str, StringFileWrapper] = json_str
@@ -40,6 +41,14 @@ class JSONParser:
40
41
  else:
41
42
  # No-op
42
43
  self.log = lambda *args, **kwargs: None
44
+ # When the json to be repaired is the accumulation of streaming json at a certain moment.
45
+ # e.g. json obtained from llm response.
46
+ # If this parameter to True will keep the repair results stable. For example:
47
+ # case 1: '{"key": "val\\' => '{"key": "val"}'
48
+ # case 2: '{"key": "val\\n' => '{"key": "val\\n"}'
49
+ # case 3: '{"key": "val\\n123,`key2:value2' => '{"key": "val\\n123,`key2:value2"}'
50
+ # case 4: '{"key": "val\\n123,`key2:value2`"}' => '{"key": "val\\n123,`key2:value2`"}'
51
+ self.stream_stable = stream_stable
43
52
 
44
53
  def parse(
45
54
  self,
@@ -113,7 +122,7 @@ class JSONParser:
113
122
 
114
123
  def parse_object(self) -> Dict[str, JSONReturnType]:
115
124
  # <object> ::= '{' [ <member> *(', ' <member>) ] '}' ; A sequence of 'members'
116
- obj = {}
125
+ obj: Dict[str, JSONReturnType] = {}
117
126
  # Stop when you either find the closing parentheses or you have iterated over the entire string
118
127
  while (self.get_char_at() or "}") != "}":
119
128
  # This is what we expect to find:
@@ -141,6 +150,31 @@ class JSONParser:
141
150
  while self.get_char_at():
142
151
  # The rollback index needs to be updated here in case the key is empty
143
152
  rollback_index = self.index
153
+ if self.get_char_at() == "[" and key == "":
154
+ # Is this an array?
155
+ # Need to check if the previous parsed value contained in obj is an array and in that case parse and merge the two
156
+ prev_key = list(obj.keys())[-1] if obj else None
157
+ if prev_key and isinstance(obj[prev_key], list):
158
+ # If the previous key's value is an array, parse the new array and merge
159
+ self.index += 1
160
+ new_array = self.parse_array()
161
+ if isinstance(new_array, list):
162
+ # Merge and flatten the arrays
163
+ prev_value = obj[prev_key]
164
+ if isinstance(prev_value, list):
165
+ prev_value.extend(
166
+ new_array[0]
167
+ if len(new_array) == 1
168
+ and isinstance(new_array[0], list)
169
+ else new_array
170
+ )
171
+ self.skip_whitespaces_at()
172
+ if self.get_char_at() == ",":
173
+ self.index += 1
174
+ self.skip_whitespaces_at()
175
+ continue
176
+ else:
177
+ self.index = rollback_index
144
178
  key = str(self.parse_string())
145
179
  if key == "":
146
180
  self.skip_whitespaces_at()
@@ -216,7 +250,7 @@ class JSONParser:
216
250
 
217
251
  # skip over whitespace after a value but before closing ]
218
252
  char = self.get_char_at()
219
- while char and (char.isspace() or char == ","):
253
+ while char and char != "]" and (char.isspace() or char == ","):
220
254
  self.index += 1
221
255
  char = self.get_char_at()
222
256
 
@@ -353,10 +387,15 @@ class JSONParser:
353
387
  "While parsing a string missing the left delimiter in object key context, we found a :, stopping here",
354
388
  )
355
389
  break
356
- if self.context.current == ContextValues.OBJECT_VALUE and char in [
357
- ",",
358
- "}",
359
- ]:
390
+ if (
391
+ (missing_quotes or not self.stream_stable)
392
+ and self.context.current == ContextValues.OBJECT_VALUE
393
+ and char
394
+ in [
395
+ ",",
396
+ "}",
397
+ ]
398
+ ):
360
399
  rstring_delimiter_missing = True
361
400
  # check if this is a case in which the closing comma is NOT missing instead
362
401
  i = self.skip_to_character(character=rstring_delimiter, idx=1)
@@ -425,7 +464,11 @@ class JSONParser:
425
464
  "While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn't determine that a right delimiter was present. Stopping here",
426
465
  )
427
466
  break
428
- if char == "]" and ContextValues.ARRAY in self.context.context:
467
+ if (
468
+ (missing_quotes or not self.stream_stable)
469
+ and char == "]"
470
+ and ContextValues.ARRAY in self.context.context
471
+ ):
429
472
  # We found the end of an array and we are in array context
430
473
  # So let's check if we find a rstring_delimiter forward otherwise end early
431
474
  i = self.skip_to_character(rstring_delimiter)
@@ -435,6 +478,9 @@ class JSONParser:
435
478
  string_acc += char
436
479
  self.index += 1
437
480
  char = self.get_char_at()
481
+ # Unclosed string ends with a \ character. This character is ignored if stream_stable = True.
482
+ if self.stream_stable and not char and string_acc[-1] == "\\":
483
+ string_acc = string_acc[:-1]
438
484
  if char and string_acc[-1] == "\\":
439
485
  # This is a special case, if people use real strings this might happen
440
486
  self.log("Found a stray escape sequence, normalizing it")
@@ -644,14 +690,18 @@ class JSONParser:
644
690
  # A fallout of the previous special case in the while loop,
645
691
  # we need to update the index only if we had a closing quote
646
692
  if char != rstring_delimiter:
647
- self.log(
648
- "While parsing a string, we missed the closing quote, ignoring",
649
- )
650
- string_acc = string_acc.rstrip()
693
+ # if stream_stable = True, unclosed strings do not trim trailing whitespace characters
694
+ if not self.stream_stable:
695
+ self.log(
696
+ "While parsing a string, we missed the closing quote, ignoring",
697
+ )
698
+ string_acc = string_acc.rstrip()
651
699
  else:
652
700
  self.index += 1
653
701
 
654
- if missing_quotes or (string_acc and string_acc[-1] == "\n"):
702
+ if not self.stream_stable and (
703
+ missing_quotes or (string_acc and string_acc[-1] == "\n")
704
+ ):
655
705
  # Clean the whitespaces for some corner cases
656
706
  string_acc = string_acc.rstrip()
657
707
 
@@ -38,6 +38,7 @@ def repair_json(
38
38
  json_fd: Optional[TextIO] = None,
39
39
  ensure_ascii: bool = True,
40
40
  chunk_length: int = 0,
41
+ stream_stable: bool = False,
41
42
  ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
42
43
  """
43
44
  Given a json formatted string, it will try to decode it and, if it fails, it will try to fix it.
@@ -50,11 +51,11 @@ def repair_json(
50
51
  json_fd (Optional[TextIO], optional): File descriptor for JSON input. Do not use! Use `from_file` or `load` instead. Defaults to None.
51
52
  ensure_ascii (bool, optional): Set to False to avoid converting non-latin characters to ascii (for example when using chinese characters). Defaults to True. Ignored if `skip_json_loads` is True.
52
53
  chunk_length (int, optional): Size in bytes of the file chunks to read at once. Ignored if `json_fd` is None. Do not use! Use `from_file` or `load` instead. Defaults to 1MB.
53
-
54
+ stream_stable (bool, optional): When the json to be repaired is the accumulation of streaming json at a certain moment.If this parameter to True will keep the repair results stable.
54
55
  Returns:
55
56
  Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: The repaired JSON or a tuple with the repaired JSON and repair log.
56
57
  """
57
- parser = JSONParser(json_str, json_fd, logging, chunk_length)
58
+ parser = JSONParser(json_str, json_fd, logging, chunk_length, stream_stable)
58
59
  if skip_json_loads:
59
60
  parsed_json = parser.parse()
60
61
  else:
@@ -76,6 +77,7 @@ def loads(
76
77
  json_str: str,
77
78
  skip_json_loads: bool = False,
78
79
  logging: bool = False,
80
+ stream_stable: bool = False,
79
81
  ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
80
82
  """
81
83
  This function works like `json.loads()` except that it will fix your JSON in the process.
@@ -94,6 +96,7 @@ def loads(
94
96
  return_objects=True,
95
97
  skip_json_loads=skip_json_loads,
96
98
  logging=logging,
99
+ stream_stable=stream_stable,
97
100
  )
98
101
 
99
102
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: json_repair
3
- Version: 0.42.0
3
+ Version: 0.44.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -188,6 +188,14 @@ Some rules of thumb to use:
188
188
  - `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
189
189
  - If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
190
190
 
191
+ ### Use json_repair with streaming
192
+
193
+ Sometimes you are streaming some data and want to repair the JSON coming from it. Normally this won't work but you can pass `stream_stable` to `repair_json()` or `loads()` to make it work:
194
+
195
+ ```
196
+ stream_output = repair_json(stream_input, stream_stable=True)
197
+ ```
198
+
191
199
  ### Use json_repair from CLI
192
200
 
193
201
  Install the library for command-line with:
@@ -1,14 +1,14 @@
1
1
  json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
2
2
  json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
3
  json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
4
- json_repair/json_parser.py,sha256=bu8FBxaazJ_tRJQmdZA7Me68HD7t3JuReVgjvyGWbmQ,39174
5
- json_repair/json_repair.py,sha256=k-5HRRlCqrxNmJi0u1KE3IUeL4HXqi1XZ7oAL-NFDLo,10314
4
+ json_repair/json_parser.py,sha256=ID60F0RMzaCpeHPkZbuidJcsmrVBiPmQDRUOgjoeedE,41972
5
+ json_repair/json_repair.py,sha256=o84um759Alft7mlj7lXZFtPQZQPjbo5Jxraa7dTdiRg,10621
6
6
  json_repair/object_comparer.py,sha256=SeicB6_N4BHAEPon7s2BELEaJc4oyR9ZhfX2RgPk6Bw,1682
7
7
  json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  json_repair/string_file_wrapper.py,sha256=koZmdq2-Z5K7XF1bDqX6dEbNaVMJYcMTjq-aGe6NQvA,4526
9
- json_repair-0.42.0.dist-info/licenses/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
10
- json_repair-0.42.0.dist-info/METADATA,sha256=XJXQxqg6znbpuxK1hTY_0v3FB4KtH_8gOMPD-u_EdvY,11860
11
- json_repair-0.42.0.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
12
- json_repair-0.42.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
13
- json_repair-0.42.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
14
- json_repair-0.42.0.dist-info/RECORD,,
9
+ json_repair-0.44.0.dist-info/licenses/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
10
+ json_repair-0.44.0.dist-info/METADATA,sha256=mu_r9oiyo_35hwk745ZTFoMZrJ9PBjRPjFKgICkKSZQ,12157
11
+ json_repair-0.44.0.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
12
+ json_repair-0.44.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
13
+ json_repair-0.44.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
14
+ json_repair-0.44.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (79.0.0)
2
+ Generator: setuptools (80.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5