json-repair 0.46.0__py3-none-any.whl → 0.46.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
json_repair/__init__.py CHANGED
@@ -1,4 +1,3 @@
1
- from .json_repair import from_file as from_file
2
- from .json_repair import load as load
3
- from .json_repair import loads as loads
4
- from .json_repair import repair_json as repair_json
1
+ from .json_repair import from_file, load, loads, repair_json
2
+
3
+ __all__ = ["from_file", "load", "loads", "repair_json"]
@@ -41,7 +41,7 @@ class JSONParser:
41
41
  self.log = self._log
42
42
  else:
43
43
  # No-op
44
- self.log = lambda *args, **kwargs: None
44
+ self.log = lambda *args, **kwargs: None # noqa: ARG005
45
45
  # When the json to be repaired is the accumulation of streaming json at a certain moment.
46
46
  # e.g. json obtained from llm response.
47
47
  # If this parameter to True will keep the repair results stable. For example:
@@ -67,6 +67,9 @@ class JSONParser:
67
67
  # replace the last entry with the new one since the new one seems an update
68
68
  json.pop()
69
69
  json.append(j)
70
+ else:
71
+ # this was a bust, move the index
72
+ self.index += 1
70
73
  # If nothing extra was found, don't return an array
71
74
  if len(json) == 1:
72
75
  self.log(
@@ -102,14 +105,10 @@ class JSONParser:
102
105
  )
103
106
  return ""
104
107
  # <string> starts with a quote
105
- elif not self.context.empty and (
106
- char in self.STRING_DELIMITERS or char.isalpha()
107
- ):
108
+ elif not self.context.empty and (char in self.STRING_DELIMITERS or char.isalpha()):
108
109
  return self.parse_string()
109
110
  # <number> starts with [0-9] or minus
110
- elif not self.context.empty and (
111
- char.isdigit() or char == "-" or char == "."
112
- ):
111
+ elif not self.context.empty and (char.isdigit() or char == "-" or char == "."):
113
112
  return self.parse_number()
114
113
  elif char in ["#", "/"]:
115
114
  return self.parse_comment()
@@ -161,8 +160,7 @@ class JSONParser:
161
160
  if isinstance(prev_value, list):
162
161
  prev_value.extend(
163
162
  new_array[0]
164
- if len(new_array) == 1
165
- and isinstance(new_array[0], list)
163
+ if len(new_array) == 1 and isinstance(new_array[0], list)
166
164
  else new_array
167
165
  )
168
166
  self.skip_whitespaces_at()
@@ -182,11 +180,7 @@ class JSONParser:
182
180
  )
183
181
  self.index = rollback_index - 1
184
182
  # add an opening curly brace to make this work
185
- self.json_str = (
186
- self.json_str[: self.index + 1]
187
- + "{"
188
- + self.json_str[self.index + 1 :]
189
- )
183
+ self.json_str = self.json_str[: self.index + 1] + "{" + self.json_str[self.index + 1 :]
190
184
  break
191
185
 
192
186
  # Skip filler whitespaces
@@ -239,10 +233,7 @@ class JSONParser:
239
233
  i = 1
240
234
  i = self.skip_to_character(char, i)
241
235
  i = self.skip_whitespaces_at(idx=i + 1, move_main_index=False)
242
- if self.get_char_at(i) == ":":
243
- value = self.parse_object()
244
- else:
245
- value = self.parse_string()
236
+ value = self.parse_object() if self.get_char_at(i) == ":" else self.parse_string()
246
237
  else:
247
238
  value = self.parse_json()
248
239
 
@@ -304,10 +295,7 @@ class JSONParser:
304
295
  elif char.isalnum():
305
296
  # This could be a <boolean> and not a string. Because (T)rue or (F)alse or (N)ull are valid
306
297
  # But remember, object keys are only of type string
307
- if (
308
- char.lower() in ["t", "f", "n"]
309
- and self.context.current != ContextValues.OBJECT_KEY
310
- ):
298
+ if char.lower() in ["t", "f", "n"] and self.context.current != ContextValues.OBJECT_KEY:
311
299
  value = self.parse_boolean_or_null()
312
300
  if value != "":
313
301
  return value
@@ -320,15 +308,9 @@ class JSONParser:
320
308
  self.index += 1
321
309
 
322
310
  # There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
323
- if (
324
- self.get_char_at() in self.STRING_DELIMITERS
325
- and self.get_char_at() == lstring_delimiter
326
- ):
311
+ if self.get_char_at() in self.STRING_DELIMITERS and self.get_char_at() == lstring_delimiter:
327
312
  # If it's an empty key, this was easy
328
- if (
329
- self.context.current == ContextValues.OBJECT_KEY
330
- and self.get_char_at(1) == ":"
331
- ):
313
+ if self.context.current == ContextValues.OBJECT_KEY and self.get_char_at(1) == ":":
332
314
  self.index += 1
333
315
  return ""
334
316
  if self.get_char_at(1) == lstring_delimiter:
@@ -377,11 +359,7 @@ class JSONParser:
377
359
  char = self.get_char_at()
378
360
  unmatched_delimiter = False
379
361
  while char and char != rstring_delimiter:
380
- if (
381
- missing_quotes
382
- and self.context.current == ContextValues.OBJECT_KEY
383
- and (char == ":" or char.isspace())
384
- ):
362
+ if missing_quotes and self.context.current == ContextValues.OBJECT_KEY and (char == ":" or char.isspace()):
385
363
  self.log(
386
364
  "While parsing a string missing the left delimiter in object key context, we found a :, stopping here",
387
365
  )
@@ -418,9 +396,7 @@ class JSONParser:
418
396
  else:
419
397
  # But again, this could just be something a bit stupid like "lorem, "ipsum" sic"
420
398
  # Check if we find a : afterwards (skipping space)
421
- i = self.skip_whitespaces_at(
422
- idx=i + 1, move_main_index=False
423
- )
399
+ i = self.skip_whitespaces_at(idx=i + 1, move_main_index=False)
424
400
  next_c = self.get_char_at(i)
425
401
  if next_c and next_c != ":":
426
402
  rstring_delimiter_missing = False
@@ -483,12 +459,19 @@ class JSONParser:
483
459
  string_acc += escape_seqs.get(char, char) or char
484
460
  self.index += 1
485
461
  char = self.get_char_at()
462
+ elif char in ["u", "x"]:
463
+ # If we find a unicode escape sequence, normalize it
464
+ num_chars = 4 if char == "u" else 2
465
+ next_chars = self.json_str[self.index + 1 : self.index + 1 + num_chars]
466
+ if len(next_chars) == num_chars and all(c in "0123456789abcdefABCDEF" for c in next_chars):
467
+ self.log("Found a unicode escape sequence, normalizing it")
468
+ string_acc = string_acc[:-1]
469
+ string_acc += chr(int(next_chars, 16))
470
+ self.index += 1 + num_chars
471
+ char = self.get_char_at()
472
+ continue
486
473
  # If we are in object key context and we find a colon, it could be a missing right quote
487
- if (
488
- char == ":"
489
- and not missing_quotes
490
- and self.context.current == ContextValues.OBJECT_KEY
491
- ):
474
+ if char == ":" and not missing_quotes and self.context.current == ContextValues.OBJECT_KEY:
492
475
  # Ok now we need to check if this is followed by a value like "..."
493
476
  i = self.skip_to_character(character=lstring_delimiter, idx=1)
494
477
  next_c = self.get_char_at(i)
@@ -519,14 +502,9 @@ class JSONParser:
519
502
  if char == rstring_delimiter:
520
503
  # Special case here, in case of double quotes one after another
521
504
  if doubled_quotes and self.get_char_at(1) == rstring_delimiter:
522
- self.log(
523
- "While parsing a string, we found a doubled quote, ignoring it"
524
- )
505
+ self.log("While parsing a string, we found a doubled quote, ignoring it")
525
506
  self.index += 1
526
- elif (
527
- missing_quotes
528
- and self.context.current == ContextValues.OBJECT_VALUE
529
- ):
507
+ elif missing_quotes and self.context.current == ContextValues.OBJECT_VALUE:
530
508
  # In case of missing starting quote I need to check if the delimeter is the end or the beginning of a key
531
509
  i = 1
532
510
  next_c = self.get_char_at(i)
@@ -570,18 +548,9 @@ class JSONParser:
570
548
  check_comma_in_object_value = False
571
549
  # If we are in an object context, let's check for the right delimiters
572
550
  if (
573
- (
574
- ContextValues.OBJECT_KEY in self.context.context
575
- and next_c in [":", "}"]
576
- )
577
- or (
578
- ContextValues.OBJECT_VALUE in self.context.context
579
- and next_c == "}"
580
- )
581
- or (
582
- ContextValues.ARRAY in self.context.context
583
- and next_c in ["]", ","]
584
- )
551
+ (ContextValues.OBJECT_KEY in self.context.context and next_c in [":", "}"])
552
+ or (ContextValues.OBJECT_VALUE in self.context.context and next_c == "}")
553
+ or (ContextValues.ARRAY in self.context.context and next_c in ["]", ","])
585
554
  or (
586
555
  check_comma_in_object_value
587
556
  and self.context.current == ContextValues.OBJECT_VALUE
@@ -592,10 +561,7 @@ class JSONParser:
592
561
  i += 1
593
562
  next_c = self.get_char_at(i)
594
563
  # If we stopped for a comma in object_value context, let's check if find a "} at the end of the string
595
- if (
596
- next_c == ","
597
- and self.context.current == ContextValues.OBJECT_VALUE
598
- ):
564
+ if next_c == "," and self.context.current == ContextValues.OBJECT_VALUE:
599
565
  i += 1
600
566
  i = self.skip_to_character(character=rstring_delimiter, idx=i)
601
567
  next_c = self.get_char_at(i)
@@ -603,29 +569,20 @@ class JSONParser:
603
569
  i += 1
604
570
  i = self.skip_whitespaces_at(idx=i, move_main_index=False)
605
571
  next_c = self.get_char_at(i)
606
- elif (
607
- next_c == rstring_delimiter and self.get_char_at(i - 1) != "\\"
608
- ):
572
+ elif next_c == rstring_delimiter and self.get_char_at(i - 1) != "\\":
609
573
  # Check if self.index:self.index+i is only whitespaces, break if that's the case
610
- if all(
611
- str(self.get_char_at(j)).isspace()
612
- for j in range(1, i)
613
- if self.get_char_at(j)
614
- ):
574
+ if all(str(self.get_char_at(j)).isspace() for j in range(1, i) if self.get_char_at(j)):
615
575
  break
616
576
  if self.context.current == ContextValues.OBJECT_VALUE:
617
577
  # But this might not be it! This could be just a missing comma
618
578
  # We found a delimiter and we need to check if this is a key
619
579
  # so find a rstring_delimiter and a colon after
620
- i = self.skip_to_character(
621
- character=rstring_delimiter, idx=i + 1
622
- )
580
+ i = self.skip_to_character(character=rstring_delimiter, idx=i + 1)
623
581
  i += 1
624
582
  next_c = self.get_char_at(i)
625
583
  while next_c and next_c != ":":
626
584
  if next_c in [",", "]", "}"] or (
627
- next_c == rstring_delimiter
628
- and self.get_char_at(i - 1) != "\\"
585
+ next_c == rstring_delimiter and self.get_char_at(i - 1) != "\\"
629
586
  ):
630
587
  break
631
588
  i += 1
@@ -658,12 +615,7 @@ class JSONParser:
658
615
  string_acc += str(char)
659
616
  self.index += 1
660
617
  char = self.get_char_at()
661
- if (
662
- char
663
- and missing_quotes
664
- and self.context.current == ContextValues.OBJECT_KEY
665
- and char.isspace()
666
- ):
618
+ if char and missing_quotes and self.context.current == ContextValues.OBJECT_KEY and char.isspace():
667
619
  self.log(
668
620
  "While parsing a string, handling an extreme corner case in which the LLM added a comment instead of valid string, invalidate the string and return an empty value",
669
621
  )
@@ -683,9 +635,7 @@ class JSONParser:
683
635
  else:
684
636
  self.index += 1
685
637
 
686
- if not self.stream_stable and (
687
- missing_quotes or (string_acc and string_acc[-1] == "\n")
688
- ):
638
+ if not self.stream_stable and (missing_quotes or (string_acc and string_acc[-1] == "\n")):
689
639
  # Clean the whitespaces for some corner cases
690
640
  string_acc = string_acc.rstrip()
691
641
 
@@ -793,9 +743,7 @@ class JSONParser:
793
743
  while True:
794
744
  char = self.get_char_at()
795
745
  if not char:
796
- self.log(
797
- "Reached end-of-string while parsing block comment; unclosed block comment."
798
- )
746
+ self.log("Reached end-of-string while parsing block comment; unclosed block comment.")
799
747
  break
800
748
  comment += char
801
749
  self.index += 1
@@ -236,10 +236,7 @@ def cli(inline_args: list[str] | None = None) -> int:
236
236
  help="Number of spaces for indentation (Default 2)",
237
237
  )
238
238
 
239
- if inline_args is None: # pragma: no cover
240
- args = parser.parse_args()
241
- else:
242
- args = parser.parse_args(inline_args)
239
+ args = parser.parse_args() if inline_args is None else parser.parse_args(inline_args)
243
240
 
244
241
  # Inline mode requires a filename, so error out if none was provided.
245
242
  if args.inline and not args.filename: # pragma: no cover
@@ -6,7 +6,7 @@ class ObjectComparer: # pragma: no cover
6
6
  pass # No operation performed in the constructor
7
7
 
8
8
  @staticmethod
9
- def is_same_object(obj1: Any, obj2: Any, path: str = "") -> bool:
9
+ def is_same_object(obj1: Any, obj2: Any) -> bool:
10
10
  """
11
11
  Recursively compares two objects and ensures that:
12
12
  - Their types match
@@ -30,10 +30,7 @@ class ObjectComparer: # pragma: no cover
30
30
  elif isinstance(obj1, list):
31
31
  if len(obj1) != len(obj2):
32
32
  return False
33
- for i in range(len(obj1)):
34
- if not ObjectComparer.is_same_object(obj1[i], obj2[i]):
35
- return False
36
- return True
33
+ return all(ObjectComparer.is_same_object(obj1[i], obj2[i]) for i in range(len(obj1)))
37
34
 
38
35
  # For atoms: types already match, so just return True
39
36
  return True
@@ -4,7 +4,7 @@ from typing import TextIO
4
4
 
5
5
  class StringFileWrapper:
6
6
  # This is a trick to simplify the code, transform the filedescriptor handling into a string handling
7
- def __init__(self, fd: TextIO, CHUNK_LENGTH: int) -> None:
7
+ def __init__(self, fd: TextIO, chunk_length: int) -> None:
8
8
  """
9
9
  Initialize the StringFileWrapper with a file descriptor and chunk length.
10
10
 
@@ -23,10 +23,10 @@ class StringFileWrapper:
23
23
  # Buffers are 1MB strings that are read from the file
24
24
  # and kept in memory to keep reads low
25
25
  self.buffers: dict[int, str] = {}
26
- # CHUNK_LENGTH is in bytes
27
- if not CHUNK_LENGTH or CHUNK_LENGTH < 2:
28
- CHUNK_LENGTH = 1_000_000
29
- self.buffer_length = CHUNK_LENGTH
26
+ # chunk_length is in bytes
27
+ if not chunk_length or chunk_length < 2:
28
+ chunk_length = 1_000_000
29
+ self.buffer_length = chunk_length
30
30
 
31
31
  def get_buffer(self, index: int) -> str:
32
32
  """
@@ -65,19 +65,11 @@ class StringFileWrapper:
65
65
  buffer_index = index.start // self.buffer_length
66
66
  buffer_end = index.stop // self.buffer_length
67
67
  if buffer_index == buffer_end:
68
- return self.get_buffer(buffer_index)[
69
- index.start % self.buffer_length : index.stop % self.buffer_length
70
- ]
68
+ return self.get_buffer(buffer_index)[index.start % self.buffer_length : index.stop % self.buffer_length]
71
69
  else:
72
- start_slice = self.get_buffer(buffer_index)[
73
- index.start % self.buffer_length :
74
- ]
75
- end_slice = self.get_buffer(buffer_end)[
76
- : index.stop % self.buffer_length
77
- ]
78
- middle_slices = [
79
- self.get_buffer(i) for i in range(buffer_index + 1, buffer_end)
80
- ]
70
+ start_slice = self.get_buffer(buffer_index)[index.start % self.buffer_length :]
71
+ end_slice = self.get_buffer(buffer_end)[: index.stop % self.buffer_length]
72
+ middle_slices = [self.get_buffer(i) for i in range(buffer_index + 1, buffer_end)]
81
73
  return start_slice + "".join(middle_slices) + end_slice
82
74
  else:
83
75
  buffer_index = index // self.buffer_length
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: json_repair
3
- Version: 0.46.0
3
+ Version: 0.46.2
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -38,7 +38,7 @@ License-File: LICENSE
38
38
  Dynamic: license-file
39
39
 
40
40
  [![PyPI](https://img.shields.io/pypi/v/json-repair)](https://pypi.org/project/json-repair/)
41
- ![Python version](https://img.shields.io/badge/python-3.9+-important)
41
+ ![Python version](https://img.shields.io/badge/python-3.10+-important)
42
42
  [![PyPI downloads](https://img.shields.io/pypi/dm/json-repair)](https://pypi.org/project/json-repair/)
43
43
  [![Github Sponsors](https://img.shields.io/github/sponsors/mangiucugna)](https://github.com/sponsors/mangiucugna)
44
44
  [![GitHub Repo stars](https://img.shields.io/github/stars/mangiucugna/json_repair?style=flat)](https://github.com/mangiucugna/json_repair/stargazers)
@@ -289,6 +289,7 @@ You will need owner access to this repository
289
289
  - Typescript: https://github.com/josdejong/jsonrepair
290
290
  - Go: https://github.com/RealAlexandreAI/json-repair
291
291
  - Ruby: https://github.com/sashazykov/json-repair-rb
292
+ - Rust: https://github.com/oramasearch/llm_json
292
293
  ---
293
294
  ## Star History
294
295
 
@@ -0,0 +1,14 @@
1
+ json_repair/__init__.py,sha256=6FDD6dEVM5Pb5o4Zodgw4ex30Hzy-YvNRy0vts9SQ4I,118
2
+ json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
+ json_repair/json_context.py,sha256=WsMOjqpGSr6aaDONcrk8UFtTurzWon2Qq9AoBBYseoI,934
4
+ json_repair/json_parser.py,sha256=B-DgJfyQOMHQ3F0RIBnltUGnGw0DFM-J7xOcLmCylVs,39744
5
+ json_repair/json_repair.py,sha256=pyH5fCkS1lyNPVjkqXerQ91lBz3eTHDPgV1QtnvJm-Y,11243
6
+ json_repair/object_comparer.py,sha256=LlIF0MisRglzC-CiG5AxAEDCBWBHeJd-6uXYx0uRmCk,1175
7
+ json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ json_repair/string_file_wrapper.py,sha256=tGkWBEUPE-CZPf4uSM5NE9oSDTpskX0myJiXsl-gbds,4333
9
+ json_repair-0.46.2.dist-info/licenses/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
10
+ json_repair-0.46.2.dist-info/METADATA,sha256=-EKRFk4rzF6I4EqFqEVfXJn7aPFrgFzdf1oCZfWgYLE,12208
11
+ json_repair-0.46.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
12
+ json_repair-0.46.2.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
13
+ json_repair-0.46.2.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
14
+ json_repair-0.46.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.8.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,14 +0,0 @@
1
- json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
2
- json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
- json_repair/json_context.py,sha256=WsMOjqpGSr6aaDONcrk8UFtTurzWon2Qq9AoBBYseoI,934
4
- json_repair/json_parser.py,sha256=y-JZiKUjC5IdB2u4OR2SyDQcOWO2EIJz1iRKHlLRXYo,40323
5
- json_repair/json_repair.py,sha256=9wxf0vVNfr_RNQI1rbVPvxQ9feEwwvgnvkiYXwGEBX8,11292
6
- json_repair/object_comparer.py,sha256=ZjxrzepSNGrhiwzid2Dm657x1Aj-E1-h37bDygK8ByE,1261
7
- json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- json_repair/string_file_wrapper.py,sha256=uwW4B1s9Cf-iF3ANsCz-RPu2ddCqDETrt8bdojh8ufA,4485
9
- json_repair-0.46.0.dist-info/licenses/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
10
- json_repair-0.46.0.dist-info/METADATA,sha256=xp8t58AzqrRymn8hBe0KSjcM92Auek4vj56dzCdDSM8,12159
11
- json_repair-0.46.0.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
12
- json_repair-0.46.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
13
- json_repair-0.46.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
14
- json_repair-0.46.0.dist-info/RECORD,,