json-repair 0.48.0__py3-none-any.whl → 0.50.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -155,7 +155,7 @@ class JSONParser:
155
155
  return idx
156
156
  return idx
157
157
 
158
- def skip_to_character(self, character: str, idx: int = 0) -> int:
158
+ def skip_to_character(self, character: str | list, idx: int = 0) -> int:
159
159
  """
160
160
  This function quickly iterates to find a character, syntactic sugar to make the code more concise
161
161
  """
@@ -163,12 +163,16 @@ class JSONParser:
163
163
  char = self.json_str[self.index + idx]
164
164
  except IndexError:
165
165
  return idx
166
- while char != character:
166
+ character_list = character if isinstance(character, list) else [character]
167
+ while char not in character_list:
167
168
  idx += 1
168
169
  try:
169
170
  char = self.json_str[self.index + idx]
170
171
  except IndexError:
171
172
  return idx
173
+ if self.json_str[self.index + idx - 1] == "\\":
174
+ # Ah shoot this was actually escaped, continue
175
+ return self.skip_to_character(character, idx + 1)
172
176
  return idx
173
177
 
174
178
  def _log(self, text: str) -> None:
@@ -66,7 +66,7 @@ def repair_json(
66
66
  chunk_length: int = 0,
67
67
  stream_stable: bool = False,
68
68
  **json_dumps_args,
69
- ) -> JSONReturnType | tuple[JSONReturnType, list[dict[str, str]]]:
69
+ ) -> JSONReturnType | tuple[JSONReturnType, list[dict[str, str]]] | tuple[JSONReturnType, list]:
70
70
  """
71
71
  Given a json formatted string, it will try to decode it and, if it fails, it will try to fix it.
72
72
 
@@ -74,13 +74,13 @@ def repair_json(
74
74
  json_str (str, optional): The JSON string to repair. Defaults to an empty string.
75
75
  return_objects (bool, optional): If True, return the decoded data structure. Defaults to False.
76
76
  skip_json_loads (bool, optional): If True, skip calling the built-in json.loads() function to verify that the json is valid before attempting to repair. Defaults to False.
77
- logging (bool, optional): If True, return a tuple with the repaired json and a log of all repair actions. Defaults to False.
77
+ logging (bool, optional): If True, return a tuple with the repaired json and a log of all repair actions. Defaults to False. When no repairs were required, the repair log will be an empty list.
78
78
  json_fd (Optional[TextIO], optional): File descriptor for JSON input. Do not use! Use `from_file` or `load` instead. Defaults to None.
79
79
  ensure_ascii (bool, optional): Set to False to avoid converting non-latin characters to ascii (for example when using chinese characters). Defaults to True. Ignored if `skip_json_loads` is True.
80
80
  chunk_length (int, optional): Size in bytes of the file chunks to read at once. Ignored if `json_fd` is None. Do not use! Use `from_file` or `load` instead. Defaults to 1MB.
81
81
  stream_stable (bool, optional): When the json to be repaired is the accumulation of streaming json at a certain moment.If this parameter to True will keep the repair results stable.
82
82
  Returns:
83
- Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: The repaired JSON or a tuple with the repaired JSON and repair log.
83
+ Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: The repaired JSON or a tuple with the repaired JSON and repair log when logging is True.
84
84
  """
85
85
  parser = JSONParser(json_str, json_fd, logging, chunk_length, stream_stable)
86
86
  if skip_json_loads:
@@ -93,6 +93,10 @@ def repair_json(
93
93
  # It's useful to return the actual object instead of the json string,
94
94
  # it allows this lib to be a replacement of the json library
95
95
  if return_objects or logging:
96
+ # If logging is True, the user should expect a tuple.
97
+ # If json.load(s) worked, the repair log list is empty
98
+ if logging and not isinstance(parsed_json, tuple):
99
+ return parsed_json, []
96
100
  return parsed_json
97
101
  # Avoid returning only a pair of quotes if it's an empty string
98
102
  elif parsed_json == "":
@@ -104,11 +104,17 @@ def parse_string(self: "JSONParser") -> str | bool | None:
104
104
  char = self.get_char_at()
105
105
  unmatched_delimiter = False
106
106
  while char and char != rstring_delimiter:
107
- if missing_quotes and self.context.current == ContextValues.OBJECT_KEY and (char == ":" or char.isspace()):
108
- self.log(
109
- "While parsing a string missing the left delimiter in object key context, we found a :, stopping here",
110
- )
111
- break
107
+ if missing_quotes:
108
+ if self.context.current == ContextValues.OBJECT_KEY and (char == ":" or char.isspace()):
109
+ self.log(
110
+ "While parsing a string missing the left delimiter in object key context, we found a :, stopping here",
111
+ )
112
+ break
113
+ elif self.context.current == ContextValues.ARRAY and char in ["]", ","]:
114
+ self.log(
115
+ "While parsing a string missing the left delimiter in array context, we found a ] or ,, stopping here",
116
+ )
117
+ break
112
118
  if (
113
119
  not self.stream_stable
114
120
  and self.context.current == ContextValues.OBJECT_VALUE
@@ -385,16 +391,30 @@ def parse_string(self: "JSONParser") -> str | bool | None:
385
391
  self.index += 1
386
392
  char = self.get_char_at()
387
393
  elif self.context.current == ContextValues.ARRAY:
388
- # If we got up to here it means that this is a situation like this:
389
- # ["bla bla bla "puppy" bla bla bla "kitty" bla bla"]
390
- # So we need to ignore this quote
391
- self.log(
392
- "While parsing a string in Array context, we detected a quoted section that would have closed the string but has a different meaning here, ignoring it",
393
- )
394
- unmatched_delimiter = not unmatched_delimiter
395
- string_acc += str(char)
396
- self.index += 1
397
- char = self.get_char_at()
394
+ # Let's check if after this quote there are two quotes in a row followed by a comma or a closing bracket
395
+ i = self.skip_to_character(character=[rstring_delimiter, "]"], idx=i + 1)
396
+ next_c = self.get_char_at(i)
397
+ even_delimiters = next_c and next_c == rstring_delimiter
398
+ while even_delimiters and next_c and next_c == rstring_delimiter:
399
+ i = self.skip_to_character(character=[rstring_delimiter, "]"], idx=i + 1)
400
+ i = self.skip_to_character(character=[rstring_delimiter, "]"], idx=i + 1)
401
+ next_c = self.get_char_at(i)
402
+ # i = self.skip_whitespaces_at(idx=i + 1, move_main_index=False)
403
+ # next_c = self.get_char_at(i)
404
+ # if next_c in [",", "]"]:
405
+ if even_delimiters and next_c != "]":
406
+ # If we got up to here it means that this is a situation like this:
407
+ # ["bla bla bla "puppy" bla bla bla "kitty" bla bla"]
408
+ # So we need to ignore this quote
409
+ self.log(
410
+ "While parsing a string in Array context, we detected a quoted section that would have closed the string but has a different meaning here, ignoring it",
411
+ )
412
+ unmatched_delimiter = not unmatched_delimiter
413
+ string_acc += str(char)
414
+ self.index += 1
415
+ char = self.get_char_at()
416
+ else:
417
+ break
398
418
  elif self.context.current == ContextValues.OBJECT_KEY:
399
419
  # In this case we just ignore this and move on
400
420
  self.log(
@@ -1,36 +1,14 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: json_repair
3
- Version: 0.48.0
3
+ Version: 0.50.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
- License: MIT License
7
-
8
- Copyright (c) 2023 Stefano Baccianella
9
-
10
- Permission is hereby granted, free of charge, to any person obtaining a copy
11
- of this software and associated documentation files (the "Software"), to deal
12
- in the Software without restriction, including without limitation the rights
13
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
- copies of the Software, and to permit persons to whom the Software is
15
- furnished to do so, subject to the following conditions:
16
-
17
- The above copyright notice and this permission notice shall be included in all
18
- copies or substantial portions of the Software.
19
-
20
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
- SOFTWARE.
27
-
6
+ License-Expression: MIT
28
7
  Project-URL: Homepage, https://github.com/mangiucugna/json_repair/
29
8
  Project-URL: Bug Tracker, https://github.com/mangiucugna/json_repair/issues
30
9
  Project-URL: Live demo, https://mangiucugna.github.io/json_repair/
31
10
  Keywords: JSON,REPAIR,LLM,PARSER
32
11
  Classifier: Programming Language :: Python :: 3
33
- Classifier: License :: OSI Approved :: MIT License
34
12
  Classifier: Operating System :: OS Independent
35
13
  Requires-Python: >=3.10
36
14
  Description-Content-Type: text/markdown
@@ -39,7 +17,8 @@ Dynamic: license-file
39
17
 
40
18
  [![PyPI](https://img.shields.io/pypi/v/json-repair)](https://pypi.org/project/json-repair/)
41
19
  ![Python version](https://img.shields.io/badge/python-3.10+-important)
42
- [![PyPI downloads](https://img.shields.io/pypi/dm/json-repair)](https://pypi.org/project/json-repair/)
20
+ [![PyPI Downloads](https://static.pepy.tech/badge/json-repair/month)](https://pepy.tech/projects/json-repair)
21
+ [![PyPI Downloads](https://static.pepy.tech/badge/json-repair)](https://pepy.tech/projects/json-repair)
43
22
  [![Github Sponsors](https://img.shields.io/github/sponsors/mangiucugna)](https://github.com/sponsors/mangiucugna)
44
23
  [![GitHub Repo stars](https://img.shields.io/github/stars/mangiucugna/json_repair?style=flat)](https://github.com/mangiucugna/json_repair/stargazers)
45
24
 
@@ -69,11 +48,6 @@ I searched for a lightweight python package that was able to reliably fix this p
69
48
 
70
49
  *So I wrote one*
71
50
 
72
- ### Wouldn't GPT-4o Structured Output make this library outdated?
73
-
74
- As part of my job we use OpenAI APIs and we noticed that even with structured output sometimes the result isn't a fully valid json.
75
- So we still use this library to cover those outliers.
76
-
77
51
  # Supported use cases
78
52
 
79
53
  ### Fixing Syntax Errors in JSON
@@ -2,20 +2,20 @@ json_repair/__init__.py,sha256=JdJIZNCKV3MfIviryqK8NH8yGssCta2-192CekcwH-o,174
2
2
  json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
3
  json_repair/constants.py,sha256=cv2gvyosuq0me0600WyTysM9avrtfXPuXYR26tawcuo,158
4
4
  json_repair/json_context.py,sha256=WsMOjqpGSr6aaDONcrk8UFtTurzWon2Qq9AoBBYseoI,934
5
- json_repair/json_parser.py,sha256=rTuL8ESslQ4XK9fkLmBIpS4e8xr6QwlZRVyJwzJFqBE,7356
6
- json_repair/json_repair.py,sha256=txblCJtcTpXcQaT15tavulkJPtyRYe2cfYpPHZcvPv0,11233
5
+ json_repair/json_parser.py,sha256=glod61Zc6HtVxwvGxBwbz8WEU0BB5LkNZXLw4Z956yI,7632
6
+ json_repair/json_repair.py,sha256=sDhXzDZxu0QmaFzICPTtf_q7yOY1A1Lf_iQG6Potsco,11572
7
7
  json_repair/object_comparer.py,sha256=XKV3MRab8H7_v4sm-wpEa5le0XX9OeycWo5S-MFm-GI,1716
8
8
  json_repair/parse_array.py,sha256=-rh65JcfT-FtXiR6s8RYlMfI-6LzVr08ytlDh6Z2CFE,2181
9
9
  json_repair/parse_boolean_or_null.py,sha256=WMSkvvxsp4wvauBcDqtt9WnLMD5SMoxeRfZFXp3FEBc,890
10
10
  json_repair/parse_comment.py,sha256=JHtQ_QlxOvPNnMh7lhUaoTjFGelqjhTNq7qn9xUE7SU,2648
11
11
  json_repair/parse_number.py,sha256=33zAtkbuVzi9Lqjxu7cXn9WlVzd3WjRx9Ln_LFzVL4o,1259
12
12
  json_repair/parse_object.py,sha256=UzkY0C5NSE2CtVnZwugMyhhtUJPgs0MwBb4kF4l2ftU,4563
13
- json_repair/parse_string.py,sha256=Ju1txvEWrOQnncigBOnlkEdVwXYGz4jaKr9QOpjAx5o,22947
13
+ json_repair/parse_string.py,sha256=FTTlgfjXcR2N4Et_f2qBhqXvfEps_L0oznp_ORup1_A,24323
14
14
  json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  json_repair/string_file_wrapper.py,sha256=tGkWBEUPE-CZPf4uSM5NE9oSDTpskX0myJiXsl-gbds,4333
16
- json_repair-0.48.0.dist-info/licenses/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
17
- json_repair-0.48.0.dist-info/METADATA,sha256=--ltEVHVgM9Jh9wxa--Ad22rM34kXNO5faj1oHtBIO8,12411
18
- json_repair-0.48.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
19
- json_repair-0.48.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
20
- json_repair-0.48.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
21
- json_repair-0.48.0.dist-info/RECORD,,
16
+ json_repair-0.50.0.dist-info/licenses/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
17
+ json_repair-0.50.0.dist-info/METADATA,sha256=_90s7Q5c2SSf--pfiykcfjLXdDZqBTjGU269JXj1uPI,10987
18
+ json_repair-0.50.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
19
+ json_repair-0.50.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
20
+ json_repair-0.50.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
21
+ json_repair-0.50.0.dist-info/RECORD,,