json-repair 0.33.0__py3-none-any.whl → 0.35.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -7,6 +7,9 @@ JSONReturnType = Union[Dict[str, Any], List[Any], str, float, int, bool, None]
7
7
 
8
8
 
9
9
  class JSONParser:
10
+ # Constants
11
+ STRING_DELIMITERS = ['"', "'", "“", "”"]
12
+
10
13
  def __init__(
11
14
  self,
12
15
  json_str: Union[str, StringFileWrapper],
@@ -89,7 +92,9 @@ class JSONParser:
89
92
  )
90
93
  return ""
91
94
  # <string> starts with a quote
92
- elif not self.context.empty and (char in ['"', "'", "“"] or char.isalpha()):
95
+ elif not self.context.empty and (
96
+ char in self.STRING_DELIMITERS or char.isalpha()
97
+ ):
93
98
  return self.parse_string()
94
99
  # <number> starts with [0-9] or minus
95
100
  elif not self.context.empty and (
@@ -130,6 +135,8 @@ class JSONParser:
130
135
  # <member> starts with a <string>
131
136
  key = ""
132
137
  while self.get_char_at():
138
+ # The rollback index needs to be updated here in case the key is empty
139
+ rollback_index = self.index
133
140
  key = str(self.parse_string())
134
141
 
135
142
  if key != "" or (key == "" and self.get_char_at() == ":"):
@@ -140,6 +147,12 @@ class JSONParser:
140
147
  "While parsing an object we found a duplicate key, closing the object here and rolling back the index",
141
148
  )
142
149
  self.index = rollback_index - 1
150
+ # add an opening curly brace to make this work
151
+ self.json_str = (
152
+ self.json_str[: self.index + 1]
153
+ + "{"
154
+ + self.json_str[self.index + 1 :]
155
+ )
143
156
  break
144
157
 
145
158
  # Skip filler whitespaces
@@ -227,7 +240,7 @@ class JSONParser:
227
240
 
228
241
  char = self.get_char_at()
229
242
  # A valid string can only start with a valid quote or, in our case, with a literal
230
- while char and char not in ['"', "'", "“"] and not char.isalnum():
243
+ while char and char not in self.STRING_DELIMITERS and not char.isalnum():
231
244
  self.index += 1
232
245
  char = self.get_char_at()
233
246
 
@@ -262,35 +275,61 @@ class JSONParser:
262
275
  if not missing_quotes:
263
276
  self.index += 1
264
277
 
278
+ self.skip_whitespaces_at()
265
279
  # There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
266
- if self.get_char_at() == lstring_delimiter:
267
- # If it's an empty key, this was easy
268
- if (
269
- self.context.current == ContextValues.OBJECT_KEY
270
- and self.get_char_at(1) == ":"
271
- ):
272
- self.index += 1
273
- return ""
274
- # Find the next delimiter
275
- i = self.skip_to_character(character=rstring_delimiter, idx=1)
276
- next_c = self.get_char_at(i)
277
- # Now check that the next character is also a delimiter to ensure that we have "".....""
278
- # In that case we ignore this rstring delimiter
279
- if next_c and (self.get_char_at(i + 1) or "") == rstring_delimiter:
280
- self.log(
281
- "While parsing a string, we found a valid starting doubled quote, ignoring it",
282
- )
283
- doubled_quotes = True
284
- self.index += 1
285
- else:
286
- # Ok this is not a doubled quote, check if this is an empty string or not
287
- i = self.skip_whitespaces_at(idx=1, move_main_index=False)
280
+ if self.get_char_at() in self.STRING_DELIMITERS:
281
+ # If the next character is the same type of quote, then we manage it as double quotes
282
+ if self.get_char_at() == lstring_delimiter:
283
+ # If it's an empty key, this was easy
284
+ if (
285
+ self.context.current == ContextValues.OBJECT_KEY
286
+ and self.get_char_at(1) == ":"
287
+ ):
288
+ self.index += 1
289
+ return ""
290
+ if self.get_char_at(1) == lstring_delimiter:
291
+ # There's something fishy about this, we found doubled quotes and then again quotes
292
+ self.log(
293
+ "While parsing a string, we found a doubled quote and then a quote again, ignoring it",
294
+ )
295
+ return ""
296
+ # Find the next delimiter
297
+ i = self.skip_to_character(character=rstring_delimiter, idx=1)
288
298
  next_c = self.get_char_at(i)
289
- if next_c not in [",", "]", "}"]:
299
+ # Now check that the next character is also a delimiter to ensure that we have "".....""
300
+ # In that case we ignore this rstring delimiter
301
+ if next_c and (self.get_char_at(i + 1) or "") == rstring_delimiter:
290
302
  self.log(
291
- "While parsing a string, we found a doubled quote but it was a mistake, removing one quote",
303
+ "While parsing a string, we found a valid starting doubled quote",
292
304
  )
305
+ doubled_quotes = True
293
306
  self.index += 1
307
+ else:
308
+ # Ok this is not a doubled quote, check if this is an empty string or not
309
+ i = self.skip_whitespaces_at(idx=1, move_main_index=False)
310
+ next_c = self.get_char_at(i)
311
+ if next_c in self.STRING_DELIMITERS + ["{", "["]:
312
+ # something fishy is going on here
313
+ self.log(
314
+ "While parsing a string, we found a doubled quote but also another quote afterwards, ignoring it",
315
+ )
316
+ self.index += 1
317
+ return ""
318
+ elif next_c not in [",", "]", "}"]:
319
+ self.log(
320
+ "While parsing a string, we found a doubled quote but it was a mistake, removing one quote",
321
+ )
322
+ self.index += 1
323
+ else:
324
+ # Otherwise we need to do another check before continuing
325
+ i = self.skip_to_character(character=rstring_delimiter, idx=1)
326
+ next_c = self.get_char_at(i)
327
+ if not next_c:
328
+ # mmmm that delimiter never appears again, this is a mistake
329
+ self.log(
330
+ "While parsing a string, we found a quote but it was a mistake, ignoring it",
331
+ )
332
+ return ""
294
333
 
295
334
  # Initialize our return value
296
335
  string_acc = ""
@@ -404,6 +443,38 @@ class JSONParser:
404
443
  string_acc += escape_seqs.get(char, char) or char
405
444
  self.index += 1
406
445
  char = self.get_char_at()
446
+ # If we are in object key context and we find a colon, it could be a missing right quote
447
+ if (
448
+ char == ":"
449
+ and not missing_quotes
450
+ and self.context.current == ContextValues.OBJECT_KEY
451
+ ):
452
+ # Ok now we need to check if this is followed by a value like "..."
453
+ i = self.skip_to_character(character=lstring_delimiter, idx=1)
454
+ next_c = self.get_char_at(i)
455
+ if next_c:
456
+ i += 1
457
+ # found the first delimiter
458
+ i = self.skip_to_character(character=rstring_delimiter, idx=i)
459
+ next_c = self.get_char_at(i)
460
+ if next_c:
461
+ # found a second delimiter
462
+ i += 1
463
+ # Skip spaces
464
+ i = self.skip_whitespaces_at(idx=i, move_main_index=False)
465
+ next_c = self.get_char_at(i)
466
+ if next_c and next_c in [",", "}"]:
467
+ # Ok then this is a missing right quote
468
+ self.log(
469
+ "While parsing a string missing the right delimiter in object key context, we found a :, stopping here",
470
+ )
471
+ break
472
+ else:
473
+ # The string ended without finding a lstring_delimiter, I will assume this is a missing right quote
474
+ self.log(
475
+ "While parsing a string missing the right delimiter in object key context, we found a :, stopping here",
476
+ )
477
+ break
407
478
  # ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
408
479
  if char == rstring_delimiter:
409
480
  # Special case here, in case of double quotes one after another
@@ -96,3 +96,24 @@ class StringFileWrapper:
96
96
  self.length = self.fd.tell()
97
97
  self.fd.seek(current_position)
98
98
  return self.length
99
+
100
+ def __setitem__(self, index: Union[int, slice], value: str) -> None:
101
+ """
102
+ Set a character or a slice of characters in the file.
103
+
104
+ Args:
105
+ index (slice): The slice of characters to set.
106
+ value (str): The value to set at the specified index or slice.
107
+ """
108
+ if isinstance(index, slice):
109
+ start = index.start or 0
110
+ else:
111
+ start = index or 0
112
+
113
+ if start < 0:
114
+ start += len(self)
115
+
116
+ current_position = self.fd.tell()
117
+ self.fd.seek(start)
118
+ self.fd.write(value)
119
+ self.fd.seek(current_position)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.33.0
3
+ Version: 0.35.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -0,0 +1,13 @@
1
+ json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
2
+ json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
+ json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
4
+ json_repair/json_parser.py,sha256=9ZHHQmfHPYQxSy93yjYPYtIHL415HiDo26hSuJCX4MA,35581
5
+ json_repair/json_repair.py,sha256=LINLSJBs3cJMfs1YRDaIpfWR5PJLs87Oe06G5yQjY18,9729
6
+ json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ json_repair/string_file_wrapper.py,sha256=koZmdq2-Z5K7XF1bDqX6dEbNaVMJYcMTjq-aGe6NQvA,4526
8
+ json_repair-0.35.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
9
+ json_repair-0.35.0.dist-info/METADATA,sha256=hUDqZJmhcKN7_8UDQk6FfHjvBvPE-jbzkjiEBqP-26I,11794
10
+ json_repair-0.35.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
11
+ json_repair-0.35.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
12
+ json_repair-0.35.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
13
+ json_repair-0.35.0.dist-info/RECORD,,
@@ -1,13 +0,0 @@
1
- json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
2
- json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
- json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
4
- json_repair/json_parser.py,sha256=qLMNN6xJm-8CM4446WizbENCBnFKzEoSM4VLZXTGXaQ,31831
5
- json_repair/json_repair.py,sha256=LINLSJBs3cJMfs1YRDaIpfWR5PJLs87Oe06G5yQjY18,9729
6
- json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- json_repair/string_file_wrapper.py,sha256=EHLhNBWoyUitzT08thytYJiNZh_klEFwfT8zutPSdb4,3905
8
- json_repair-0.33.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
9
- json_repair-0.33.0.dist-info/METADATA,sha256=-cTTyNlIlv3xouXyqxRuHDhSXT-2_QuB5AdeKpDbxs4,11794
10
- json_repair-0.33.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
11
- json_repair-0.33.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
12
- json_repair-0.33.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
13
- json_repair-0.33.0.dist-info/RECORD,,