json-repair 0.13.0__py3-none-any.whl → 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -34,6 +34,7 @@ class JSONParser:
34
34
  self.index = 0
35
35
  # This is used in the object member parsing to manage the special cases of missing quotes in key or value
36
36
  self.context = ""
37
+ self.context_stack = []
37
38
 
38
39
  def parse(self) -> Union[Dict[str, Any], List[Any], str, float, int, bool, None]:
39
40
  return self.parse_json()
@@ -97,7 +98,7 @@ class JSONParser:
97
98
 
98
99
  # We are now searching for they string key
99
100
  # Context is used in the string parser to manage the lack of quotes
100
- self.context = "object_key"
101
+ self.update_context("object_key")
101
102
 
102
103
  self.skip_whitespaces_at()
103
104
 
@@ -119,12 +120,13 @@ class JSONParser:
119
120
  if (self.get_char_at() or "") != ":":
120
121
  self.insert_char_at(":")
121
122
  self.index += 1
122
- self.context = "object_value"
123
+ self.update_context("")
124
+ self.update_context("object_value")
123
125
  # The value can be any valid json
124
126
  value = self.parse_json()
125
127
 
126
128
  # Reset context since our job is done
127
- self.context = ""
129
+ self.update_context("")
128
130
  obj[key] = value
129
131
 
130
132
  if (self.get_char_at() or "") in [",", "'", '"']:
@@ -157,6 +159,9 @@ class JSONParser:
157
159
  while char and (char.isspace() or char == ","):
158
160
  self.index += 1
159
161
  char = self.get_char_at()
162
+ # If this is the right value of an object and we are closing the object, it means the array is over
163
+ if self.context == "object_value" and char == "}":
164
+ break
160
165
 
161
166
  # Especially at the end of an LLM generated json you might miss the last "]"
162
167
  char = self.get_char_at()
@@ -166,6 +171,7 @@ class JSONParser:
166
171
  # Remove trailing "," before adding the "]"
167
172
  self.remove_char_at()
168
173
  self.insert_char_at("]")
174
+ self.index -= 1
169
175
 
170
176
  self.index += 1
171
177
  return arr
@@ -177,15 +183,14 @@ class JSONParser:
177
183
 
178
184
  # Flag to manage corner cases related to missing starting quote
179
185
  fixed_quotes = False
180
- double_delimiter = False
181
186
  lstring_delimiter = rstring_delimiter = '"'
182
187
  if isinstance(string_quotes, list):
183
188
  lstring_delimiter = string_quotes[0]
184
189
  rstring_delimiter = string_quotes[1]
185
190
  elif isinstance(string_quotes, str):
186
191
  lstring_delimiter = rstring_delimiter = string_quotes
192
+ # There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
187
193
  if self.get_char_at(1) == lstring_delimiter:
188
- double_delimiter = True
189
194
  self.index += 1
190
195
  char = self.get_char_at()
191
196
  if char != lstring_delimiter:
@@ -204,7 +209,6 @@ class JSONParser:
204
209
  # * It iterated over the entire sequence
205
210
  # * If we are fixing missing quotes in an object, when it finds the special terminators
206
211
  char = self.get_char_at()
207
- fix_broken_markdown_link = False
208
212
  while char and char != rstring_delimiter:
209
213
  if fixed_quotes:
210
214
  if self.context == "object_key" and (char == ":" or char.isspace()):
@@ -221,19 +225,27 @@ class JSONParser:
221
225
  else:
222
226
  self.remove_char_at(-1)
223
227
  self.index -= 1
224
- # ChatGPT sometimes forget to quote links in markdown like: { "content": "[LINK]("https://google.com")" }
228
+ # ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
225
229
  if (
226
230
  char == rstring_delimiter
227
- # Next character is not a comma
228
- and self.get_char_at(1) != ","
229
- and (
230
- fix_broken_markdown_link
231
- or (self.get_char_at(-2) == "]" and self.get_char_at(-1)) == "("
232
- )
231
+ # Next character is not a delimiter
232
+ and self.get_char_at(1) not in [",", ":", "]", "}"]
233
233
  ):
234
- fix_broken_markdown_link = not fix_broken_markdown_link
235
- self.index += 1
236
- char = self.get_char_at()
234
+ # Special case here, in case of double quotes one after another
235
+ if self.get_char_at(1) == rstring_delimiter:
236
+ # self destruct this character
237
+ self.remove_char_at()
238
+ else:
239
+ # Check if eventually there is a rstring delimiter, otherwise we bail
240
+ i = 2
241
+ next_c = self.get_char_at(i)
242
+ while next_c and next_c != rstring_delimiter:
243
+ i += 1
244
+ next_c = self.get_char_at(i)
245
+ # In that case we ignore this rstring delimiter
246
+ if next_c:
247
+ self.index += 1
248
+ char = self.get_char_at()
237
249
 
238
250
  if char and fixed_quotes and self.context == "object_key" and char.isspace():
239
251
  self.skip_whitespaces_at()
@@ -247,10 +259,8 @@ class JSONParser:
247
259
  self.insert_char_at(rstring_delimiter)
248
260
  else:
249
261
  self.index += 1
250
- if double_delimiter and self.get_char_at() == rstring_delimiter:
251
- self.index += 1
252
262
 
253
- return self.json_str[start:end]
263
+ return self.json_str[start:end].rstrip()
254
264
 
255
265
  def parse_number(self) -> Union[float, int, str]:
256
266
  # <number> is a valid real number expressed in one of a number of given formats
@@ -319,6 +329,19 @@ class JSONParser:
319
329
  except IndexError:
320
330
  return
321
331
 
332
+ def update_context(self, value: str) -> None:
333
+ # If a value is provided update the context variable and save in stack
334
+ if value:
335
+ if self.context:
336
+ self.context_stack.append(self.context)
337
+ self.context = value
338
+ # Otherwise pop and update the context, or empty if the stack is empty
339
+ else:
340
+ try:
341
+ self.context = self.context_stack.pop()
342
+ except Exception:
343
+ self.context = ""
344
+
322
345
 
323
346
  def repair_json(
324
347
  json_str: str, return_objects: bool = False, skip_json_loads: bool = False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.13.0
3
+ Version: 0.14.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -150,7 +150,10 @@ You will need owner access to this repository
150
150
  - Run `python -m build`
151
151
  - Create a new release in Github, making sure to tag all the issues solved and contributors. Create the new tag, same as the one in the build configuration
152
152
  - Once the release is created, a new Github Actions workflow will start to publish on Pypi, make sure it didn't fail
153
-
153
+ ---
154
+ # Repair JSON in other programming languages
155
+ - Typescript: https://github.com/josdejong/jsonrepair
156
+ - Go: https://github.com/RealAlexandreAI/json-repair
154
157
  ---
155
158
  # Bonus Content
156
159
  If you need some good Custom Instructions (System Message) to improve your chatbot responses try https://gist.github.com/mangiucugna/7ec015c4266df11be8aa510be0110fe4
@@ -0,0 +1,7 @@
1
+ json_repair/__init__.py,sha256=AlNie5y6BZBioGi5fzTAUvum_y0U5aL5aNsuQ_68LQc,175
2
+ json_repair/json_repair.py,sha256=8B5HfWoLlUUtRYq1cnbajOxWiMSD9nxNW2cRFPjFfVE,15817
3
+ json_repair-0.14.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
+ json_repair-0.14.0.dist-info/METADATA,sha256=82KldmuVFLXbNy6SXar9MsulkcBUM1K8RX13pNysHQU,7355
5
+ json_repair-0.14.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
+ json_repair-0.14.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
+ json_repair-0.14.0.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- json_repair/__init__.py,sha256=AlNie5y6BZBioGi5fzTAUvum_y0U5aL5aNsuQ_68LQc,175
2
- json_repair/json_repair.py,sha256=BHDcvo8C-vc3QO4dhgD5cP7IZVkGWGDbv8hNmQH1B-I,14653
3
- json_repair-0.13.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
- json_repair-0.13.0.dist-info/METADATA,sha256=E6v_JcBwXe4oMJhtxgF-9-24kG2j5LiH-ivqufNHFpA,7200
5
- json_repair-0.13.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
- json_repair-0.13.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
- json_repair-0.13.0.dist-info/RECORD,,