json-repair 0.43.0__py3-none-any.whl → 0.44.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- from typing import Any, Dict, List, Literal, Optional, TextIO, Tuple, Union
1
+ from typing import Any, ClassVar, Dict, List, Literal, Optional, TextIO, Tuple, Union
2
2
 
3
3
  from .json_context import ContextValues, JsonContext
4
4
  from .object_comparer import ObjectComparer
@@ -9,7 +9,7 @@ JSONReturnType = Union[Dict[str, Any], List[Any], str, float, int, bool, None]
9
9
 
10
10
  class JSONParser:
11
11
  # Constants
12
- STRING_DELIMITERS = ['"', "'", "“", "”"]
12
+ STRING_DELIMITERS: ClassVar[list[str]] = ['"', "'", "“", "”"]
13
13
 
14
14
  def __init__(
15
15
  self,
@@ -17,6 +17,7 @@ class JSONParser:
17
17
  json_fd: Optional[TextIO],
18
18
  logging: Optional[bool],
19
19
  json_fd_chunk_length: int = 0,
20
+ stream_stable: bool = False,
20
21
  ) -> None:
21
22
  # The string to parse
22
23
  self.json_str: Union[str, StringFileWrapper] = json_str
@@ -40,6 +41,14 @@ class JSONParser:
40
41
  else:
41
42
  # No-op
42
43
  self.log = lambda *args, **kwargs: None
44
+ # When the json to be repaired is the accumulation of streaming json at a certain moment.
45
+ # e.g. json obtained from llm response.
46
+ # If this parameter to True will keep the repair results stable. For example:
47
+ # case 1: '{"key": "val\\' => '{"key": "val"}'
48
+ # case 2: '{"key": "val\\n' => '{"key": "val\\n"}'
49
+ # case 3: '{"key": "val\\n123,`key2:value2' => '{"key": "val\\n123,`key2:value2"}'
50
+ # case 4: '{"key": "val\\n123,`key2:value2`"}' => '{"key": "val\\n123,`key2:value2`"}'
51
+ self.stream_stable = stream_stable
43
52
 
44
53
  def parse(
45
54
  self,
@@ -159,6 +168,10 @@ class JSONParser:
159
168
  and isinstance(new_array[0], list)
160
169
  else new_array
161
170
  )
171
+ self.skip_whitespaces_at()
172
+ if self.get_char_at() == ",":
173
+ self.index += 1
174
+ self.skip_whitespaces_at()
162
175
  continue
163
176
  else:
164
177
  self.index = rollback_index
@@ -330,7 +343,7 @@ class JSONParser:
330
343
  # Ok this is not a doubled quote, check if this is an empty string or not
331
344
  i = self.skip_whitespaces_at(idx=1, move_main_index=False)
332
345
  next_c = self.get_char_at(i)
333
- if next_c in self.STRING_DELIMITERS + ["{", "["]:
346
+ if next_c in [*self.STRING_DELIMITERS, "{", "["]:
334
347
  # something fishy is going on here
335
348
  self.log(
336
349
  "While parsing a string, we found a doubled quote but also another quote afterwards, ignoring it",
@@ -374,10 +387,15 @@ class JSONParser:
374
387
  "While parsing a string missing the left delimiter in object key context, we found a :, stopping here",
375
388
  )
376
389
  break
377
- if self.context.current == ContextValues.OBJECT_VALUE and char in [
378
- ",",
379
- "}",
380
- ]:
390
+ if (
391
+ (missing_quotes or not self.stream_stable)
392
+ and self.context.current == ContextValues.OBJECT_VALUE
393
+ and char
394
+ in [
395
+ ",",
396
+ "}",
397
+ ]
398
+ ):
381
399
  rstring_delimiter_missing = True
382
400
  # check if this is a case in which the closing comma is NOT missing instead
383
401
  i = self.skip_to_character(character=rstring_delimiter, idx=1)
@@ -446,7 +464,11 @@ class JSONParser:
446
464
  "While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn't determine that a right delimiter was present. Stopping here",
447
465
  )
448
466
  break
449
- if char == "]" and ContextValues.ARRAY in self.context.context:
467
+ if (
468
+ (missing_quotes or not self.stream_stable)
469
+ and char == "]"
470
+ and ContextValues.ARRAY in self.context.context
471
+ ):
450
472
  # We found the end of an array and we are in array context
451
473
  # So let's check if we find a rstring_delimiter forward otherwise end early
452
474
  i = self.skip_to_character(rstring_delimiter)
@@ -456,6 +478,9 @@ class JSONParser:
456
478
  string_acc += char
457
479
  self.index += 1
458
480
  char = self.get_char_at()
481
+ # Unclosed string ends with a \ character. This character is ignored if stream_stable = True.
482
+ if self.stream_stable and not char and string_acc[-1] == "\\":
483
+ string_acc = string_acc[:-1]
459
484
  if char and string_acc[-1] == "\\":
460
485
  # This is a special case, if people use real strings this might happen
461
486
  self.log("Found a stray escape sequence, normalizing it")
@@ -665,14 +690,18 @@ class JSONParser:
665
690
  # A fallout of the previous special case in the while loop,
666
691
  # we need to update the index only if we had a closing quote
667
692
  if char != rstring_delimiter:
668
- self.log(
669
- "While parsing a string, we missed the closing quote, ignoring",
670
- )
671
- string_acc = string_acc.rstrip()
693
+ # if stream_stable = True, unclosed strings do not trim trailing whitespace characters
694
+ if not self.stream_stable:
695
+ self.log(
696
+ "While parsing a string, we missed the closing quote, ignoring",
697
+ )
698
+ string_acc = string_acc.rstrip()
672
699
  else:
673
700
  self.index += 1
674
701
 
675
- if missing_quotes or (string_acc and string_acc[-1] == "\n"):
702
+ if not self.stream_stable and (
703
+ missing_quotes or (string_acc and string_acc[-1] == "\n")
704
+ ):
676
705
  # Clean the whitespaces for some corner cases
677
706
  string_acc = string_acc.rstrip()
678
707
 
@@ -25,11 +25,37 @@ All supported use cases are in the unit tests
25
25
  import argparse
26
26
  import json
27
27
  import sys
28
- from typing import Dict, List, Optional, TextIO, Tuple, Union
28
+ from typing import Dict, List, Literal, Optional, TextIO, Tuple, Union, overload
29
29
 
30
30
  from .json_parser import JSONParser, JSONReturnType
31
31
 
32
32
 
33
+ @overload
34
+ def repair_json(
35
+ json_str: str = "",
36
+ return_objects: Literal[False] = False,
37
+ skip_json_loads: bool = False,
38
+ logging: bool = False,
39
+ json_fd: Optional[TextIO] = None,
40
+ ensure_ascii: bool = True,
41
+ chunk_length: int = 0,
42
+ stream_stable: bool = False,
43
+ ) -> str: ...
44
+
45
+
46
+ @overload
47
+ def repair_json(
48
+ json_str: str = "",
49
+ return_objects: Literal[True] = True,
50
+ skip_json_loads: bool = False,
51
+ logging: bool = False,
52
+ json_fd: Optional[TextIO] = None,
53
+ ensure_ascii: bool = True,
54
+ chunk_length: int = 0,
55
+ stream_stable: bool = False,
56
+ ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: ...
57
+
58
+
33
59
  def repair_json(
34
60
  json_str: str = "",
35
61
  return_objects: bool = False,
@@ -38,6 +64,7 @@ def repair_json(
38
64
  json_fd: Optional[TextIO] = None,
39
65
  ensure_ascii: bool = True,
40
66
  chunk_length: int = 0,
67
+ stream_stable: bool = False,
41
68
  ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
42
69
  """
43
70
  Given a json formatted string, it will try to decode it and, if it fails, it will try to fix it.
@@ -50,11 +77,11 @@ def repair_json(
50
77
  json_fd (Optional[TextIO], optional): File descriptor for JSON input. Do not use! Use `from_file` or `load` instead. Defaults to None.
51
78
  ensure_ascii (bool, optional): Set to False to avoid converting non-latin characters to ascii (for example when using chinese characters). Defaults to True. Ignored if `skip_json_loads` is True.
52
79
  chunk_length (int, optional): Size in bytes of the file chunks to read at once. Ignored if `json_fd` is None. Do not use! Use `from_file` or `load` instead. Defaults to 1MB.
53
-
80
+ stream_stable (bool, optional): When the json to be repaired is the accumulation of streaming json at a certain moment.If this parameter to True will keep the repair results stable.
54
81
  Returns:
55
82
  Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: The repaired JSON or a tuple with the repaired JSON and repair log.
56
83
  """
57
- parser = JSONParser(json_str, json_fd, logging, chunk_length)
84
+ parser = JSONParser(json_str, json_fd, logging, chunk_length, stream_stable)
58
85
  if skip_json_loads:
59
86
  parsed_json = parser.parse()
60
87
  else:
@@ -76,7 +103,8 @@ def loads(
76
103
  json_str: str,
77
104
  skip_json_loads: bool = False,
78
105
  logging: bool = False,
79
- ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
106
+ stream_stable: bool = False,
107
+ ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]], str]:
80
108
  """
81
109
  This function works like `json.loads()` except that it will fix your JSON in the process.
82
110
  It is a wrapper around the `repair_json()` function with `return_objects=True`.
@@ -87,13 +115,14 @@ def loads(
87
115
  logging (bool, optional): If True, return a tuple with the repaired json and a log of all repair actions. Defaults to False.
88
116
 
89
117
  Returns:
90
- Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: The repaired JSON object or a tuple with the repaired JSON object and repair log.
118
+ Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]], str]: The repaired JSON object or a tuple with the repaired JSON object and repair log.
91
119
  """
92
120
  return repair_json(
93
121
  json_str=json_str,
94
122
  return_objects=True,
95
123
  skip_json_loads=skip_json_loads,
96
124
  logging=logging,
125
+ stream_stable=stream_stable,
97
126
  )
98
127
 
99
128
 
@@ -238,7 +267,7 @@ def cli(inline_args: Optional[List[str]] = None) -> int:
238
267
  else:
239
268
  print(json.dumps(result, indent=args.indent, ensure_ascii=ensure_ascii))
240
269
  except Exception as e: # pragma: no cover
241
- print(f"Error: {str(e)}", file=sys.stderr)
270
+ print(f"Error: {e!s}", file=sys.stderr)
242
271
  return 1
243
272
 
244
273
  return 0 # Success
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: json_repair
3
- Version: 0.43.0
3
+ Version: 0.44.1
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -188,6 +188,14 @@ Some rules of thumb to use:
188
188
  - `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
189
189
  - If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
190
190
 
191
+ ### Use json_repair with streaming
192
+
193
+ Sometimes you are streaming some data and want to repair the JSON coming from it. Normally this won't work but you can pass `stream_stable` to `repair_json()` or `loads()` to make it work:
194
+
195
+ ```
196
+ stream_output = repair_json(stream_input, stream_stable=True)
197
+ ```
198
+
191
199
  ### Use json_repair from CLI
192
200
 
193
201
  Install the library for command-line with:
@@ -1,14 +1,14 @@
1
1
  json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
2
2
  json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
3
  json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
4
- json_repair/json_parser.py,sha256=GFziN2KELYDWElzPda5wPfSeFIHYF8enJSr0c2YzKmQ,40451
5
- json_repair/json_repair.py,sha256=k-5HRRlCqrxNmJi0u1KE3IUeL4HXqi1XZ7oAL-NFDLo,10314
4
+ json_repair/json_parser.py,sha256=wmDgXAroQ4gYZdi4Tbdn3LKXnx2x2v_uanzSzqP0aSQ,42003
5
+ json_repair/json_repair.py,sha256=r-Mtr16U_n2wmHX_zNRZI2ZlLc0AV0fLWlLzGEWjJa0,11312
6
6
  json_repair/object_comparer.py,sha256=SeicB6_N4BHAEPon7s2BELEaJc4oyR9ZhfX2RgPk6Bw,1682
7
7
  json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  json_repair/string_file_wrapper.py,sha256=koZmdq2-Z5K7XF1bDqX6dEbNaVMJYcMTjq-aGe6NQvA,4526
9
- json_repair-0.43.0.dist-info/licenses/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
10
- json_repair-0.43.0.dist-info/METADATA,sha256=fR8K3LENGHxwkjG2pVRedVsh6XGBq0JWPV3weLz1KIM,11860
11
- json_repair-0.43.0.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
12
- json_repair-0.43.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
13
- json_repair-0.43.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
14
- json_repair-0.43.0.dist-info/RECORD,,
9
+ json_repair-0.44.1.dist-info/licenses/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
10
+ json_repair-0.44.1.dist-info/METADATA,sha256=VJv39wNseOemAA5tQe1dIW4ZPXFoLXFTPBCGLlLRwN8,12157
11
+ json_repair-0.44.1.dist-info/WHEEL,sha256=ooBFpIzZCPdw3uqIQsOo4qqbA4ZRPxHnOH7peeONza0,91
12
+ json_repair-0.44.1.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
13
+ json_repair-0.44.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
14
+ json_repair-0.44.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.0.0)
2
+ Generator: setuptools (80.0.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5