json-repair 0.43.0__py3-none-any.whl → 0.44.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,6 +17,7 @@ class JSONParser:
17
17
  json_fd: Optional[TextIO],
18
18
  logging: Optional[bool],
19
19
  json_fd_chunk_length: int = 0,
20
+ stream_stable: bool = False,
20
21
  ) -> None:
21
22
  # The string to parse
22
23
  self.json_str: Union[str, StringFileWrapper] = json_str
@@ -40,6 +41,14 @@ class JSONParser:
40
41
  else:
41
42
  # No-op
42
43
  self.log = lambda *args, **kwargs: None
44
+ # When the json to be repaired is the accumulation of streaming json at a certain moment.
45
+ # e.g. json obtained from llm response.
46
+ # If this parameter to True will keep the repair results stable. For example:
47
+ # case 1: '{"key": "val\\' => '{"key": "val"}'
48
+ # case 2: '{"key": "val\\n' => '{"key": "val\\n"}'
49
+ # case 3: '{"key": "val\\n123,`key2:value2' => '{"key": "val\\n123,`key2:value2"}'
50
+ # case 4: '{"key": "val\\n123,`key2:value2`"}' => '{"key": "val\\n123,`key2:value2`"}'
51
+ self.stream_stable = stream_stable
43
52
 
44
53
  def parse(
45
54
  self,
@@ -159,6 +168,10 @@ class JSONParser:
159
168
  and isinstance(new_array[0], list)
160
169
  else new_array
161
170
  )
171
+ self.skip_whitespaces_at()
172
+ if self.get_char_at() == ",":
173
+ self.index += 1
174
+ self.skip_whitespaces_at()
162
175
  continue
163
176
  else:
164
177
  self.index = rollback_index
@@ -374,10 +387,15 @@ class JSONParser:
374
387
  "While parsing a string missing the left delimiter in object key context, we found a :, stopping here",
375
388
  )
376
389
  break
377
- if self.context.current == ContextValues.OBJECT_VALUE and char in [
378
- ",",
379
- "}",
380
- ]:
390
+ if (
391
+ (missing_quotes or not self.stream_stable)
392
+ and self.context.current == ContextValues.OBJECT_VALUE
393
+ and char
394
+ in [
395
+ ",",
396
+ "}",
397
+ ]
398
+ ):
381
399
  rstring_delimiter_missing = True
382
400
  # check if this is a case in which the closing comma is NOT missing instead
383
401
  i = self.skip_to_character(character=rstring_delimiter, idx=1)
@@ -446,7 +464,11 @@ class JSONParser:
446
464
  "While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn't determine that a right delimiter was present. Stopping here",
447
465
  )
448
466
  break
449
- if char == "]" and ContextValues.ARRAY in self.context.context:
467
+ if (
468
+ (missing_quotes or not self.stream_stable)
469
+ and char == "]"
470
+ and ContextValues.ARRAY in self.context.context
471
+ ):
450
472
  # We found the end of an array and we are in array context
451
473
  # So let's check if we find a rstring_delimiter forward otherwise end early
452
474
  i = self.skip_to_character(rstring_delimiter)
@@ -456,6 +478,9 @@ class JSONParser:
456
478
  string_acc += char
457
479
  self.index += 1
458
480
  char = self.get_char_at()
481
+ # Unclosed string ends with a \ character. This character is ignored if stream_stable = True.
482
+ if self.stream_stable and not char and string_acc[-1] == "\\":
483
+ string_acc = string_acc[:-1]
459
484
  if char and string_acc[-1] == "\\":
460
485
  # This is a special case, if people use real strings this might happen
461
486
  self.log("Found a stray escape sequence, normalizing it")
@@ -665,14 +690,18 @@ class JSONParser:
665
690
  # A fallout of the previous special case in the while loop,
666
691
  # we need to update the index only if we had a closing quote
667
692
  if char != rstring_delimiter:
668
- self.log(
669
- "While parsing a string, we missed the closing quote, ignoring",
670
- )
671
- string_acc = string_acc.rstrip()
693
+ # if stream_stable = True, unclosed strings do not trim trailing whitespace characters
694
+ if not self.stream_stable:
695
+ self.log(
696
+ "While parsing a string, we missed the closing quote, ignoring",
697
+ )
698
+ string_acc = string_acc.rstrip()
672
699
  else:
673
700
  self.index += 1
674
701
 
675
- if missing_quotes or (string_acc and string_acc[-1] == "\n"):
702
+ if not self.stream_stable and (
703
+ missing_quotes or (string_acc and string_acc[-1] == "\n")
704
+ ):
676
705
  # Clean the whitespaces for some corner cases
677
706
  string_acc = string_acc.rstrip()
678
707
 
@@ -38,6 +38,7 @@ def repair_json(
38
38
  json_fd: Optional[TextIO] = None,
39
39
  ensure_ascii: bool = True,
40
40
  chunk_length: int = 0,
41
+ stream_stable: bool = False,
41
42
  ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
42
43
  """
43
44
  Given a json formatted string, it will try to decode it and, if it fails, it will try to fix it.
@@ -50,11 +51,11 @@ def repair_json(
50
51
  json_fd (Optional[TextIO], optional): File descriptor for JSON input. Do not use! Use `from_file` or `load` instead. Defaults to None.
51
52
  ensure_ascii (bool, optional): Set to False to avoid converting non-latin characters to ascii (for example when using chinese characters). Defaults to True. Ignored if `skip_json_loads` is True.
52
53
  chunk_length (int, optional): Size in bytes of the file chunks to read at once. Ignored if `json_fd` is None. Do not use! Use `from_file` or `load` instead. Defaults to 1MB.
53
-
54
+ stream_stable (bool, optional): When the json to be repaired is the accumulation of streaming json at a certain moment.If this parameter to True will keep the repair results stable.
54
55
  Returns:
55
56
  Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: The repaired JSON or a tuple with the repaired JSON and repair log.
56
57
  """
57
- parser = JSONParser(json_str, json_fd, logging, chunk_length)
58
+ parser = JSONParser(json_str, json_fd, logging, chunk_length, stream_stable)
58
59
  if skip_json_loads:
59
60
  parsed_json = parser.parse()
60
61
  else:
@@ -76,6 +77,7 @@ def loads(
76
77
  json_str: str,
77
78
  skip_json_loads: bool = False,
78
79
  logging: bool = False,
80
+ stream_stable: bool = False,
79
81
  ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
80
82
  """
81
83
  This function works like `json.loads()` except that it will fix your JSON in the process.
@@ -94,6 +96,7 @@ def loads(
94
96
  return_objects=True,
95
97
  skip_json_loads=skip_json_loads,
96
98
  logging=logging,
99
+ stream_stable=stream_stable,
97
100
  )
98
101
 
99
102
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: json_repair
3
- Version: 0.43.0
3
+ Version: 0.44.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -188,6 +188,14 @@ Some rules of thumb to use:
188
188
  - `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
189
189
  - If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
190
190
 
191
+ ### Use json_repair with streaming
192
+
193
+ Sometimes you are streaming some data and want to repair the JSON coming from it. Normally this won't work but you can pass `stream_stable` to `repair_json()` or `loads()` to make it work:
194
+
195
+ ```
196
+ stream_output = repair_json(stream_input, stream_stable=True)
197
+ ```
198
+
191
199
  ### Use json_repair from CLI
192
200
 
193
201
  Install the library for command-line with:
@@ -1,14 +1,14 @@
1
1
  json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
2
2
  json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
3
  json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
4
- json_repair/json_parser.py,sha256=GFziN2KELYDWElzPda5wPfSeFIHYF8enJSr0c2YzKmQ,40451
5
- json_repair/json_repair.py,sha256=k-5HRRlCqrxNmJi0u1KE3IUeL4HXqi1XZ7oAL-NFDLo,10314
4
+ json_repair/json_parser.py,sha256=ID60F0RMzaCpeHPkZbuidJcsmrVBiPmQDRUOgjoeedE,41972
5
+ json_repair/json_repair.py,sha256=o84um759Alft7mlj7lXZFtPQZQPjbo5Jxraa7dTdiRg,10621
6
6
  json_repair/object_comparer.py,sha256=SeicB6_N4BHAEPon7s2BELEaJc4oyR9ZhfX2RgPk6Bw,1682
7
7
  json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  json_repair/string_file_wrapper.py,sha256=koZmdq2-Z5K7XF1bDqX6dEbNaVMJYcMTjq-aGe6NQvA,4526
9
- json_repair-0.43.0.dist-info/licenses/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
10
- json_repair-0.43.0.dist-info/METADATA,sha256=fR8K3LENGHxwkjG2pVRedVsh6XGBq0JWPV3weLz1KIM,11860
11
- json_repair-0.43.0.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
12
- json_repair-0.43.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
13
- json_repair-0.43.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
14
- json_repair-0.43.0.dist-info/RECORD,,
9
+ json_repair-0.44.0.dist-info/licenses/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
10
+ json_repair-0.44.0.dist-info/METADATA,sha256=mu_r9oiyo_35hwk745ZTFoMZrJ9PBjRPjFKgICkKSZQ,12157
11
+ json_repair-0.44.0.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
12
+ json_repair-0.44.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
13
+ json_repair-0.44.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
14
+ json_repair-0.44.0.dist-info/RECORD,,