json-repair 0.33.0__py3-none-any.whl → 0.35.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,9 @@ JSONReturnType = Union[Dict[str, Any], List[Any], str, float, int, bool, None]
7
7
 
8
8
 
9
9
  class JSONParser:
10
+ # Constants
11
+ STRING_DELIMITERS = ['"', "'", "“", "”"]
12
+
10
13
  def __init__(
11
14
  self,
12
15
  json_str: Union[str, StringFileWrapper],
@@ -89,7 +92,9 @@ class JSONParser:
89
92
  )
90
93
  return ""
91
94
  # <string> starts with a quote
92
- elif not self.context.empty and (char in ['"', "'", "“"] or char.isalpha()):
95
+ elif not self.context.empty and (
96
+ char in self.STRING_DELIMITERS or char.isalpha()
97
+ ):
93
98
  return self.parse_string()
94
99
  # <number> starts with [0-9] or minus
95
100
  elif not self.context.empty and (
@@ -130,6 +135,8 @@ class JSONParser:
130
135
  # <member> starts with a <string>
131
136
  key = ""
132
137
  while self.get_char_at():
138
+ # The rollback index needs to be updated here in case the key is empty
139
+ rollback_index = self.index
133
140
  key = str(self.parse_string())
134
141
 
135
142
  if key != "" or (key == "" and self.get_char_at() == ":"):
@@ -140,6 +147,12 @@ class JSONParser:
140
147
  "While parsing an object we found a duplicate key, closing the object here and rolling back the index",
141
148
  )
142
149
  self.index = rollback_index - 1
150
+ # add an opening curly brace to make this work
151
+ self.json_str = (
152
+ self.json_str[: self.index + 1]
153
+ + "{"
154
+ + self.json_str[self.index + 1 :]
155
+ )
143
156
  break
144
157
 
145
158
  # Skip filler whitespaces
@@ -227,7 +240,7 @@ class JSONParser:
227
240
 
228
241
  char = self.get_char_at()
229
242
  # A valid string can only start with a valid quote or, in our case, with a literal
230
- while char and char not in ['"', "'", "“"] and not char.isalnum():
243
+ while char and char not in self.STRING_DELIMITERS and not char.isalnum():
231
244
  self.index += 1
232
245
  char = self.get_char_at()
233
246
 
@@ -262,35 +275,61 @@ class JSONParser:
262
275
  if not missing_quotes:
263
276
  self.index += 1
264
277
 
278
+ self.skip_whitespaces_at()
265
279
  # There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
266
- if self.get_char_at() == lstring_delimiter:
267
- # If it's an empty key, this was easy
268
- if (
269
- self.context.current == ContextValues.OBJECT_KEY
270
- and self.get_char_at(1) == ":"
271
- ):
272
- self.index += 1
273
- return ""
274
- # Find the next delimiter
275
- i = self.skip_to_character(character=rstring_delimiter, idx=1)
276
- next_c = self.get_char_at(i)
277
- # Now check that the next character is also a delimiter to ensure that we have "".....""
278
- # In that case we ignore this rstring delimiter
279
- if next_c and (self.get_char_at(i + 1) or "") == rstring_delimiter:
280
- self.log(
281
- "While parsing a string, we found a valid starting doubled quote, ignoring it",
282
- )
283
- doubled_quotes = True
284
- self.index += 1
285
- else:
286
- # Ok this is not a doubled quote, check if this is an empty string or not
287
- i = self.skip_whitespaces_at(idx=1, move_main_index=False)
280
+ if self.get_char_at() in self.STRING_DELIMITERS:
281
+ # If the next character is the same type of quote, then we manage it as double quotes
282
+ if self.get_char_at() == lstring_delimiter:
283
+ # If it's an empty key, this was easy
284
+ if (
285
+ self.context.current == ContextValues.OBJECT_KEY
286
+ and self.get_char_at(1) == ":"
287
+ ):
288
+ self.index += 1
289
+ return ""
290
+ if self.get_char_at(1) == lstring_delimiter:
291
+ # There's something fishy about this, we found doubled quotes and then again quotes
292
+ self.log(
293
+ "While parsing a string, we found a doubled quote and then a quote again, ignoring it",
294
+ )
295
+ return ""
296
+ # Find the next delimiter
297
+ i = self.skip_to_character(character=rstring_delimiter, idx=1)
288
298
  next_c = self.get_char_at(i)
289
- if next_c not in [",", "]", "}"]:
299
+ # Now check that the next character is also a delimiter to ensure that we have "".....""
300
+ # In that case we ignore this rstring delimiter
301
+ if next_c and (self.get_char_at(i + 1) or "") == rstring_delimiter:
290
302
  self.log(
291
- "While parsing a string, we found a doubled quote but it was a mistake, removing one quote",
303
+ "While parsing a string, we found a valid starting doubled quote",
292
304
  )
305
+ doubled_quotes = True
293
306
  self.index += 1
307
+ else:
308
+ # Ok this is not a doubled quote, check if this is an empty string or not
309
+ i = self.skip_whitespaces_at(idx=1, move_main_index=False)
310
+ next_c = self.get_char_at(i)
311
+ if next_c in self.STRING_DELIMITERS + ["{", "["]:
312
+ # something fishy is going on here
313
+ self.log(
314
+ "While parsing a string, we found a doubled quote but also another quote afterwards, ignoring it",
315
+ )
316
+ self.index += 1
317
+ return ""
318
+ elif next_c not in [",", "]", "}"]:
319
+ self.log(
320
+ "While parsing a string, we found a doubled quote but it was a mistake, removing one quote",
321
+ )
322
+ self.index += 1
323
+ else:
324
+ # Otherwise we need to do another check before continuing
325
+ i = self.skip_to_character(character=rstring_delimiter, idx=1)
326
+ next_c = self.get_char_at(i)
327
+ if not next_c:
328
+ # mmmm that delimiter never appears again, this is a mistake
329
+ self.log(
330
+ "While parsing a string, we found a quote but it was a mistake, ignoring it",
331
+ )
332
+ return ""
294
333
 
295
334
  # Initialize our return value
296
335
  string_acc = ""
@@ -404,6 +443,38 @@ class JSONParser:
404
443
  string_acc += escape_seqs.get(char, char) or char
405
444
  self.index += 1
406
445
  char = self.get_char_at()
446
+ # If we are in object key context and we find a colon, it could be a missing right quote
447
+ if (
448
+ char == ":"
449
+ and not missing_quotes
450
+ and self.context.current == ContextValues.OBJECT_KEY
451
+ ):
452
+ # Ok now we need to check if this is followed by a value like "..."
453
+ i = self.skip_to_character(character=lstring_delimiter, idx=1)
454
+ next_c = self.get_char_at(i)
455
+ if next_c:
456
+ i += 1
457
+ # found the first delimiter
458
+ i = self.skip_to_character(character=rstring_delimiter, idx=i)
459
+ next_c = self.get_char_at(i)
460
+ if next_c:
461
+ # found a second delimiter
462
+ i += 1
463
+ # Skip spaces
464
+ i = self.skip_whitespaces_at(idx=i, move_main_index=False)
465
+ next_c = self.get_char_at(i)
466
+ if next_c and next_c in [",", "}"]:
467
+ # Ok then this is a missing right quote
468
+ self.log(
469
+ "While parsing a string missing the right delimiter in object key context, we found a :, stopping here",
470
+ )
471
+ break
472
+ else:
473
+ # The string ended without finding a lstring_delimiter, I will assume this is a missing right quote
474
+ self.log(
475
+ "While parsing a string missing the right delimiter in object key context, we found a :, stopping here",
476
+ )
477
+ break
407
478
  # ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
408
479
  if char == rstring_delimiter:
409
480
  # Special case here, in case of double quotes one after another
@@ -96,3 +96,24 @@ class StringFileWrapper:
96
96
  self.length = self.fd.tell()
97
97
  self.fd.seek(current_position)
98
98
  return self.length
99
+
100
+ def __setitem__(self, index: Union[int, slice], value: str) -> None:
101
+ """
102
+ Set a character or a slice of characters in the file.
103
+
104
+ Args:
105
+ index (slice): The slice of characters to set.
106
+ value (str): The value to set at the specified index or slice.
107
+ """
108
+ if isinstance(index, slice):
109
+ start = index.start or 0
110
+ else:
111
+ start = index or 0
112
+
113
+ if start < 0:
114
+ start += len(self)
115
+
116
+ current_position = self.fd.tell()
117
+ self.fd.seek(start)
118
+ self.fd.write(value)
119
+ self.fd.seek(current_position)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.33.0
3
+ Version: 0.35.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -0,0 +1,13 @@
1
+ json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
2
+ json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
+ json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
4
+ json_repair/json_parser.py,sha256=9ZHHQmfHPYQxSy93yjYPYtIHL415HiDo26hSuJCX4MA,35581
5
+ json_repair/json_repair.py,sha256=LINLSJBs3cJMfs1YRDaIpfWR5PJLs87Oe06G5yQjY18,9729
6
+ json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ json_repair/string_file_wrapper.py,sha256=koZmdq2-Z5K7XF1bDqX6dEbNaVMJYcMTjq-aGe6NQvA,4526
8
+ json_repair-0.35.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
9
+ json_repair-0.35.0.dist-info/METADATA,sha256=hUDqZJmhcKN7_8UDQk6FfHjvBvPE-jbzkjiEBqP-26I,11794
10
+ json_repair-0.35.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
11
+ json_repair-0.35.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
12
+ json_repair-0.35.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
13
+ json_repair-0.35.0.dist-info/RECORD,,
@@ -1,13 +0,0 @@
1
- json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
2
- json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
- json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
4
- json_repair/json_parser.py,sha256=qLMNN6xJm-8CM4446WizbENCBnFKzEoSM4VLZXTGXaQ,31831
5
- json_repair/json_repair.py,sha256=LINLSJBs3cJMfs1YRDaIpfWR5PJLs87Oe06G5yQjY18,9729
6
- json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- json_repair/string_file_wrapper.py,sha256=EHLhNBWoyUitzT08thytYJiNZh_klEFwfT8zutPSdb4,3905
8
- json_repair-0.33.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
9
- json_repair-0.33.0.dist-info/METADATA,sha256=-cTTyNlIlv3xouXyqxRuHDhSXT-2_QuB5AdeKpDbxs4,11794
10
- json_repair-0.33.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
11
- json_repair-0.33.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
12
- json_repair-0.33.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
13
- json_repair-0.33.0.dist-info/RECORD,,