json-repair 0.46.1__py3-none-any.whl → 0.46.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
json_repair/__init__.py CHANGED
@@ -1,4 +1,3 @@
1
- from .json_repair import from_file as from_file
2
- from .json_repair import load as load
3
- from .json_repair import loads as loads
4
- from .json_repair import repair_json as repair_json
1
+ from .json_repair import from_file, load, loads, repair_json
2
+
3
+ __all__ = ["from_file", "load", "loads", "repair_json"]
@@ -105,14 +105,10 @@ class JSONParser:
105
105
  )
106
106
  return ""
107
107
  # <string> starts with a quote
108
- elif not self.context.empty and (
109
- char in self.STRING_DELIMITERS or char.isalpha()
110
- ):
108
+ elif not self.context.empty and (char in self.STRING_DELIMITERS or char.isalpha()):
111
109
  return self.parse_string()
112
110
  # <number> starts with [0-9] or minus
113
- elif not self.context.empty and (
114
- char.isdigit() or char == "-" or char == "."
115
- ):
111
+ elif not self.context.empty and (char.isdigit() or char == "-" or char == "."):
116
112
  return self.parse_number()
117
113
  elif char in ["#", "/"]:
118
114
  return self.parse_comment()
@@ -164,8 +160,7 @@ class JSONParser:
164
160
  if isinstance(prev_value, list):
165
161
  prev_value.extend(
166
162
  new_array[0]
167
- if len(new_array) == 1
168
- and isinstance(new_array[0], list)
163
+ if len(new_array) == 1 and isinstance(new_array[0], list)
169
164
  else new_array
170
165
  )
171
166
  self.skip_whitespaces_at()
@@ -185,11 +180,7 @@ class JSONParser:
185
180
  )
186
181
  self.index = rollback_index - 1
187
182
  # add an opening curly brace to make this work
188
- self.json_str = (
189
- self.json_str[: self.index + 1]
190
- + "{"
191
- + self.json_str[self.index + 1 :]
192
- )
183
+ self.json_str = self.json_str[: self.index + 1] + "{" + self.json_str[self.index + 1 :]
193
184
  break
194
185
 
195
186
  # Skip filler whitespaces
@@ -242,10 +233,7 @@ class JSONParser:
242
233
  i = 1
243
234
  i = self.skip_to_character(char, i)
244
235
  i = self.skip_whitespaces_at(idx=i + 1, move_main_index=False)
245
- if self.get_char_at(i) == ":":
246
- value = self.parse_object()
247
- else:
248
- value = self.parse_string()
236
+ value = self.parse_object() if self.get_char_at(i) == ":" else self.parse_string()
249
237
  else:
250
238
  value = self.parse_json()
251
239
 
@@ -307,10 +295,7 @@ class JSONParser:
307
295
  elif char.isalnum():
308
296
  # This could be a <boolean> and not a string. Because (T)rue or (F)alse or (N)ull are valid
309
297
  # But remember, object keys are only of type string
310
- if (
311
- char.lower() in ["t", "f", "n"]
312
- and self.context.current != ContextValues.OBJECT_KEY
313
- ):
298
+ if char.lower() in ["t", "f", "n"] and self.context.current != ContextValues.OBJECT_KEY:
314
299
  value = self.parse_boolean_or_null()
315
300
  if value != "":
316
301
  return value
@@ -323,15 +308,9 @@ class JSONParser:
323
308
  self.index += 1
324
309
 
325
310
  # There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
326
- if (
327
- self.get_char_at() in self.STRING_DELIMITERS
328
- and self.get_char_at() == lstring_delimiter
329
- ):
311
+ if self.get_char_at() in self.STRING_DELIMITERS and self.get_char_at() == lstring_delimiter:
330
312
  # If it's an empty key, this was easy
331
- if (
332
- self.context.current == ContextValues.OBJECT_KEY
333
- and self.get_char_at(1) == ":"
334
- ):
313
+ if self.context.current == ContextValues.OBJECT_KEY and self.get_char_at(1) == ":":
335
314
  self.index += 1
336
315
  return ""
337
316
  if self.get_char_at(1) == lstring_delimiter:
@@ -380,11 +359,7 @@ class JSONParser:
380
359
  char = self.get_char_at()
381
360
  unmatched_delimiter = False
382
361
  while char and char != rstring_delimiter:
383
- if (
384
- missing_quotes
385
- and self.context.current == ContextValues.OBJECT_KEY
386
- and (char == ":" or char.isspace())
387
- ):
362
+ if missing_quotes and self.context.current == ContextValues.OBJECT_KEY and (char == ":" or char.isspace()):
388
363
  self.log(
389
364
  "While parsing a string missing the left delimiter in object key context, we found a :, stopping here",
390
365
  )
@@ -421,9 +396,7 @@ class JSONParser:
421
396
  else:
422
397
  # But again, this could just be something a bit stupid like "lorem, "ipsum" sic"
423
398
  # Check if we find a : afterwards (skipping space)
424
- i = self.skip_whitespaces_at(
425
- idx=i + 1, move_main_index=False
426
- )
399
+ i = self.skip_whitespaces_at(idx=i + 1, move_main_index=False)
427
400
  next_c = self.get_char_at(i)
428
401
  if next_c and next_c != ":":
429
402
  rstring_delimiter_missing = False
@@ -486,12 +459,19 @@ class JSONParser:
486
459
  string_acc += escape_seqs.get(char, char) or char
487
460
  self.index += 1
488
461
  char = self.get_char_at()
462
+ elif char in ["u", "x"]:
463
+ # If we find a unicode escape sequence, normalize it
464
+ num_chars = 4 if char == "u" else 2
465
+ next_chars = self.json_str[self.index + 1 : self.index + 1 + num_chars]
466
+ if len(next_chars) == num_chars and all(c in "0123456789abcdefABCDEF" for c in next_chars):
467
+ self.log("Found a unicode escape sequence, normalizing it")
468
+ string_acc = string_acc[:-1]
469
+ string_acc += chr(int(next_chars, 16))
470
+ self.index += 1 + num_chars
471
+ char = self.get_char_at()
472
+ continue
489
473
  # If we are in object key context and we find a colon, it could be a missing right quote
490
- if (
491
- char == ":"
492
- and not missing_quotes
493
- and self.context.current == ContextValues.OBJECT_KEY
494
- ):
474
+ if char == ":" and not missing_quotes and self.context.current == ContextValues.OBJECT_KEY:
495
475
  # Ok now we need to check if this is followed by a value like "..."
496
476
  i = self.skip_to_character(character=lstring_delimiter, idx=1)
497
477
  next_c = self.get_char_at(i)
@@ -522,14 +502,9 @@ class JSONParser:
522
502
  if char == rstring_delimiter:
523
503
  # Special case here, in case of double quotes one after another
524
504
  if doubled_quotes and self.get_char_at(1) == rstring_delimiter:
525
- self.log(
526
- "While parsing a string, we found a doubled quote, ignoring it"
527
- )
505
+ self.log("While parsing a string, we found a doubled quote, ignoring it")
528
506
  self.index += 1
529
- elif (
530
- missing_quotes
531
- and self.context.current == ContextValues.OBJECT_VALUE
532
- ):
507
+ elif missing_quotes and self.context.current == ContextValues.OBJECT_VALUE:
533
508
  # In case of missing starting quote I need to check if the delimeter is the end or the beginning of a key
534
509
  i = 1
535
510
  next_c = self.get_char_at(i)
@@ -573,18 +548,9 @@ class JSONParser:
573
548
  check_comma_in_object_value = False
574
549
  # If we are in an object context, let's check for the right delimiters
575
550
  if (
576
- (
577
- ContextValues.OBJECT_KEY in self.context.context
578
- and next_c in [":", "}"]
579
- )
580
- or (
581
- ContextValues.OBJECT_VALUE in self.context.context
582
- and next_c == "}"
583
- )
584
- or (
585
- ContextValues.ARRAY in self.context.context
586
- and next_c in ["]", ","]
587
- )
551
+ (ContextValues.OBJECT_KEY in self.context.context and next_c in [":", "}"])
552
+ or (ContextValues.OBJECT_VALUE in self.context.context and next_c == "}")
553
+ or (ContextValues.ARRAY in self.context.context and next_c in ["]", ","])
588
554
  or (
589
555
  check_comma_in_object_value
590
556
  and self.context.current == ContextValues.OBJECT_VALUE
@@ -595,10 +561,7 @@ class JSONParser:
595
561
  i += 1
596
562
  next_c = self.get_char_at(i)
597
563
  # If we stopped for a comma in object_value context, let's check if find a "} at the end of the string
598
- if (
599
- next_c == ","
600
- and self.context.current == ContextValues.OBJECT_VALUE
601
- ):
564
+ if next_c == "," and self.context.current == ContextValues.OBJECT_VALUE:
602
565
  i += 1
603
566
  i = self.skip_to_character(character=rstring_delimiter, idx=i)
604
567
  next_c = self.get_char_at(i)
@@ -606,29 +569,20 @@ class JSONParser:
606
569
  i += 1
607
570
  i = self.skip_whitespaces_at(idx=i, move_main_index=False)
608
571
  next_c = self.get_char_at(i)
609
- elif (
610
- next_c == rstring_delimiter and self.get_char_at(i - 1) != "\\"
611
- ):
572
+ elif next_c == rstring_delimiter and self.get_char_at(i - 1) != "\\":
612
573
  # Check if self.index:self.index+i is only whitespaces, break if that's the case
613
- if all(
614
- str(self.get_char_at(j)).isspace()
615
- for j in range(1, i)
616
- if self.get_char_at(j)
617
- ):
574
+ if all(str(self.get_char_at(j)).isspace() for j in range(1, i) if self.get_char_at(j)):
618
575
  break
619
576
  if self.context.current == ContextValues.OBJECT_VALUE:
620
577
  # But this might not be it! This could be just a missing comma
621
578
  # We found a delimiter and we need to check if this is a key
622
579
  # so find a rstring_delimiter and a colon after
623
- i = self.skip_to_character(
624
- character=rstring_delimiter, idx=i + 1
625
- )
580
+ i = self.skip_to_character(character=rstring_delimiter, idx=i + 1)
626
581
  i += 1
627
582
  next_c = self.get_char_at(i)
628
583
  while next_c and next_c != ":":
629
584
  if next_c in [",", "]", "}"] or (
630
- next_c == rstring_delimiter
631
- and self.get_char_at(i - 1) != "\\"
585
+ next_c == rstring_delimiter and self.get_char_at(i - 1) != "\\"
632
586
  ):
633
587
  break
634
588
  i += 1
@@ -661,12 +615,7 @@ class JSONParser:
661
615
  string_acc += str(char)
662
616
  self.index += 1
663
617
  char = self.get_char_at()
664
- if (
665
- char
666
- and missing_quotes
667
- and self.context.current == ContextValues.OBJECT_KEY
668
- and char.isspace()
669
- ):
618
+ if char and missing_quotes and self.context.current == ContextValues.OBJECT_KEY and char.isspace():
670
619
  self.log(
671
620
  "While parsing a string, handling an extreme corner case in which the LLM added a comment instead of valid string, invalidate the string and return an empty value",
672
621
  )
@@ -686,9 +635,7 @@ class JSONParser:
686
635
  else:
687
636
  self.index += 1
688
637
 
689
- if not self.stream_stable and (
690
- missing_quotes or (string_acc and string_acc[-1] == "\n")
691
- ):
638
+ if not self.stream_stable and (missing_quotes or (string_acc and string_acc[-1] == "\n")):
692
639
  # Clean the whitespaces for some corner cases
693
640
  string_acc = string_acc.rstrip()
694
641
 
@@ -796,9 +743,7 @@ class JSONParser:
796
743
  while True:
797
744
  char = self.get_char_at()
798
745
  if not char:
799
- self.log(
800
- "Reached end-of-string while parsing block comment; unclosed block comment."
801
- )
746
+ self.log("Reached end-of-string while parsing block comment; unclosed block comment.")
802
747
  break
803
748
  comment += char
804
749
  self.index += 1
@@ -236,10 +236,7 @@ def cli(inline_args: list[str] | None = None) -> int:
236
236
  help="Number of spaces for indentation (Default 2)",
237
237
  )
238
238
 
239
- if inline_args is None: # pragma: no cover
240
- args = parser.parse_args()
241
- else:
242
- args = parser.parse_args(inline_args)
239
+ args = parser.parse_args() if inline_args is None else parser.parse_args(inline_args)
243
240
 
244
241
  # Inline mode requires a filename, so error out if none was provided.
245
242
  if args.inline and not args.filename: # pragma: no cover
@@ -30,10 +30,7 @@ class ObjectComparer: # pragma: no cover
30
30
  elif isinstance(obj1, list):
31
31
  if len(obj1) != len(obj2):
32
32
  return False
33
- for i in range(len(obj1)):
34
- if not ObjectComparer.is_same_object(obj1[i], obj2[i]):
35
- return False
36
- return True
33
+ return all(ObjectComparer.is_same_object(obj1[i], obj2[i]) for i in range(len(obj1)))
37
34
 
38
35
  # For atoms: types already match, so just return True
39
36
  return True
@@ -4,7 +4,7 @@ from typing import TextIO
4
4
 
5
5
  class StringFileWrapper:
6
6
  # This is a trick to simplify the code, transform the filedescriptor handling into a string handling
7
- def __init__(self, fd: TextIO, CHUNK_LENGTH: int) -> None:
7
+ def __init__(self, fd: TextIO, chunk_length: int) -> None:
8
8
  """
9
9
  Initialize the StringFileWrapper with a file descriptor and chunk length.
10
10
 
@@ -23,10 +23,10 @@ class StringFileWrapper:
23
23
  # Buffers are 1MB strings that are read from the file
24
24
  # and kept in memory to keep reads low
25
25
  self.buffers: dict[int, str] = {}
26
- # CHUNK_LENGTH is in bytes
27
- if not CHUNK_LENGTH or CHUNK_LENGTH < 2:
28
- CHUNK_LENGTH = 1_000_000
29
- self.buffer_length = CHUNK_LENGTH
26
+ # chunk_length is in bytes
27
+ if not chunk_length or chunk_length < 2:
28
+ chunk_length = 1_000_000
29
+ self.buffer_length = chunk_length
30
30
 
31
31
  def get_buffer(self, index: int) -> str:
32
32
  """
@@ -65,19 +65,11 @@ class StringFileWrapper:
65
65
  buffer_index = index.start // self.buffer_length
66
66
  buffer_end = index.stop // self.buffer_length
67
67
  if buffer_index == buffer_end:
68
- return self.get_buffer(buffer_index)[
69
- index.start % self.buffer_length : index.stop % self.buffer_length
70
- ]
68
+ return self.get_buffer(buffer_index)[index.start % self.buffer_length : index.stop % self.buffer_length]
71
69
  else:
72
- start_slice = self.get_buffer(buffer_index)[
73
- index.start % self.buffer_length :
74
- ]
75
- end_slice = self.get_buffer(buffer_end)[
76
- : index.stop % self.buffer_length
77
- ]
78
- middle_slices = [
79
- self.get_buffer(i) for i in range(buffer_index + 1, buffer_end)
80
- ]
70
+ start_slice = self.get_buffer(buffer_index)[index.start % self.buffer_length :]
71
+ end_slice = self.get_buffer(buffer_end)[: index.stop % self.buffer_length]
72
+ middle_slices = [self.get_buffer(i) for i in range(buffer_index + 1, buffer_end)]
81
73
  return start_slice + "".join(middle_slices) + end_slice
82
74
  else:
83
75
  buffer_index = index // self.buffer_length
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: json_repair
3
- Version: 0.46.1
3
+ Version: 0.46.2
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -0,0 +1,14 @@
1
+ json_repair/__init__.py,sha256=6FDD6dEVM5Pb5o4Zodgw4ex30Hzy-YvNRy0vts9SQ4I,118
2
+ json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
+ json_repair/json_context.py,sha256=WsMOjqpGSr6aaDONcrk8UFtTurzWon2Qq9AoBBYseoI,934
4
+ json_repair/json_parser.py,sha256=B-DgJfyQOMHQ3F0RIBnltUGnGw0DFM-J7xOcLmCylVs,39744
5
+ json_repair/json_repair.py,sha256=pyH5fCkS1lyNPVjkqXerQ91lBz3eTHDPgV1QtnvJm-Y,11243
6
+ json_repair/object_comparer.py,sha256=LlIF0MisRglzC-CiG5AxAEDCBWBHeJd-6uXYx0uRmCk,1175
7
+ json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ json_repair/string_file_wrapper.py,sha256=tGkWBEUPE-CZPf4uSM5NE9oSDTpskX0myJiXsl-gbds,4333
9
+ json_repair-0.46.2.dist-info/licenses/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
10
+ json_repair-0.46.2.dist-info/METADATA,sha256=-EKRFk4rzF6I4EqFqEVfXJn7aPFrgFzdf1oCZfWgYLE,12208
11
+ json_repair-0.46.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
12
+ json_repair-0.46.2.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
13
+ json_repair-0.46.2.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
14
+ json_repair-0.46.2.dist-info/RECORD,,
@@ -1,14 +0,0 @@
1
- json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
2
- json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
- json_repair/json_context.py,sha256=WsMOjqpGSr6aaDONcrk8UFtTurzWon2Qq9AoBBYseoI,934
4
- json_repair/json_parser.py,sha256=7IPu-tin9jLX_y1F9tn3UVpqILARhZYFaTTvq9xrLnU,40451
5
- json_repair/json_repair.py,sha256=9wxf0vVNfr_RNQI1rbVPvxQ9feEwwvgnvkiYXwGEBX8,11292
6
- json_repair/object_comparer.py,sha256=5-LK-s_2MAHddTxqXSzSkaIFvPXKGLh6swC1gyN74Lk,1245
7
- json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- json_repair/string_file_wrapper.py,sha256=uwW4B1s9Cf-iF3ANsCz-RPu2ddCqDETrt8bdojh8ufA,4485
9
- json_repair-0.46.1.dist-info/licenses/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
10
- json_repair-0.46.1.dist-info/METADATA,sha256=y-p_aOKtX4eu7p-JNj6IO3s8svB06IityZRnRKEN_xE,12208
11
- json_repair-0.46.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
12
- json_repair-0.46.1.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
13
- json_repair-0.46.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
14
- json_repair-0.46.1.dist-info/RECORD,,