json-repair 0.36.1__py3-none-any.whl → 0.38.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- json_repair/json_parser.py +88 -9
- {json_repair-0.36.1.dist-info → json_repair-0.38.0.dist-info}/METADATA +1 -1
- {json_repair-0.36.1.dist-info → json_repair-0.38.0.dist-info}/RECORD +7 -7
- {json_repair-0.36.1.dist-info → json_repair-0.38.0.dist-info}/LICENSE +0 -0
- {json_repair-0.36.1.dist-info → json_repair-0.38.0.dist-info}/WHEEL +0 -0
- {json_repair-0.36.1.dist-info → json_repair-0.38.0.dist-info}/entry_points.txt +0 -0
- {json_repair-0.36.1.dist-info → json_repair-0.38.0.dist-info}/top_level.txt +0 -0
json_repair/json_parser.py
CHANGED
@@ -101,6 +101,8 @@ class JSONParser:
|
|
101
101
|
char.isdigit() or char == "-" or char == "."
|
102
102
|
):
|
103
103
|
return self.parse_number()
|
104
|
+
elif char in ["#", "/"]:
|
105
|
+
return self.parse_comment()
|
104
106
|
# If everything else fails, we just ignore and move on
|
105
107
|
else:
|
106
108
|
self.index += 1
|
@@ -138,8 +140,9 @@ class JSONParser:
|
|
138
140
|
# The rollback index needs to be updated here in case the key is empty
|
139
141
|
rollback_index = self.index
|
140
142
|
key = str(self.parse_string())
|
141
|
-
|
142
|
-
|
143
|
+
if key == "":
|
144
|
+
self.skip_whitespaces_at()
|
145
|
+
if key != "" or (key == "" and self.get_char_at() in [":", "}"]):
|
143
146
|
# If the string is empty but there is a object divider, we are done here
|
144
147
|
break
|
145
148
|
if ContextValues.ARRAY in self.context.context and key in obj:
|
@@ -199,11 +202,10 @@ class JSONParser:
|
|
199
202
|
self.skip_whitespaces_at()
|
200
203
|
value = self.parse_json()
|
201
204
|
|
202
|
-
# It is possible that parse_json() returns nothing valid, so we
|
205
|
+
# It is possible that parse_json() returns nothing valid, so we increase by 1
|
203
206
|
if value == "":
|
204
|
-
|
205
|
-
|
206
|
-
if value == "..." and self.get_char_at(-1) == ".":
|
207
|
+
self.index += 1
|
208
|
+
elif value == "..." and self.get_char_at(-1) == ".":
|
207
209
|
self.log(
|
208
210
|
"While parsing an array, found a stray '...'; ignoring it",
|
209
211
|
)
|
@@ -243,6 +245,8 @@ class JSONParser:
|
|
243
245
|
lstring_delimiter = rstring_delimiter = '"'
|
244
246
|
|
245
247
|
char = self.get_char_at()
|
248
|
+
if char in ["#", "/"]:
|
249
|
+
return self.parse_comment()
|
246
250
|
# A valid string can only start with a valid quote or, in our case, with a literal
|
247
251
|
while char and char not in self.STRING_DELIMITERS and not char.isalnum():
|
248
252
|
self.index += 1
|
@@ -438,7 +442,7 @@ class JSONParser:
|
|
438
442
|
string_acc += char
|
439
443
|
self.index += 1
|
440
444
|
char = self.get_char_at()
|
441
|
-
if char and
|
445
|
+
if char and string_acc[-1] == "\\":
|
442
446
|
# This is a special case, if people use real strings this might happen
|
443
447
|
self.log("Found a stray escape sequence, normalizing it")
|
444
448
|
if char in [rstring_delimiter, "t", "n", "r", "b", "\\"]:
|
@@ -646,10 +650,15 @@ class JSONParser:
|
|
646
650
|
self.log(
|
647
651
|
"While parsing a string, we missed the closing quote, ignoring",
|
648
652
|
)
|
653
|
+
string_acc = string_acc.rstrip()
|
649
654
|
else:
|
650
655
|
self.index += 1
|
651
656
|
|
652
|
-
|
657
|
+
if missing_quotes or (string_acc and string_acc[-1] == "\n"):
|
658
|
+
# Clean the whitespaces for some corner cases
|
659
|
+
string_acc = string_acc.rstrip()
|
660
|
+
|
661
|
+
return string_acc
|
653
662
|
|
654
663
|
def parse_number(self) -> Union[float, int, str, JSONReturnType]:
|
655
664
|
# <number> is a valid real number expressed in one of a number of given formats
|
@@ -661,7 +670,7 @@ class JSONParser:
|
|
661
670
|
number_str += char
|
662
671
|
self.index += 1
|
663
672
|
char = self.get_char_at()
|
664
|
-
if
|
673
|
+
if number_str and number_str[-1] in "-eE/,":
|
665
674
|
# The number ends with a non valid character for a number/currency, rolling back one
|
666
675
|
number_str = number_str[:-1]
|
667
676
|
self.index -= 1
|
@@ -748,6 +757,76 @@ class JSONParser:
|
|
748
757
|
return self.skip_to_character(character=character, idx=idx + 1)
|
749
758
|
return idx
|
750
759
|
|
760
|
+
def parse_comment(self) -> str:
|
761
|
+
"""
|
762
|
+
Parse code-like comments:
|
763
|
+
|
764
|
+
- "# comment": A line comment that continues until a newline.
|
765
|
+
- "// comment": A line comment that continues until a newline.
|
766
|
+
- "/* comment */": A block comment that continues until the closing delimiter "*/".
|
767
|
+
|
768
|
+
The comment is skipped over and an empty string is returned so that comments do not interfere
|
769
|
+
with the actual JSON elements.
|
770
|
+
"""
|
771
|
+
char = self.get_char_at()
|
772
|
+
termination_characters = ["\n", "\r"]
|
773
|
+
if ContextValues.ARRAY in self.context.context:
|
774
|
+
termination_characters.append("]")
|
775
|
+
if ContextValues.OBJECT_VALUE in self.context.context:
|
776
|
+
termination_characters.append("}")
|
777
|
+
if ContextValues.OBJECT_KEY in self.context.context:
|
778
|
+
termination_characters.append(":")
|
779
|
+
# Line comment starting with #
|
780
|
+
if char == "#":
|
781
|
+
comment = ""
|
782
|
+
while char and char not in termination_characters:
|
783
|
+
comment += char
|
784
|
+
self.index += 1
|
785
|
+
char = self.get_char_at()
|
786
|
+
self.log(f"Found line comment: {comment}")
|
787
|
+
return ""
|
788
|
+
|
789
|
+
# Comments starting with '/'
|
790
|
+
elif char == "/":
|
791
|
+
next_char = self.get_char_at(1)
|
792
|
+
# Handle line comment starting with //
|
793
|
+
if next_char == "/":
|
794
|
+
comment = "//"
|
795
|
+
self.index += 2 # Skip both slashes.
|
796
|
+
char = self.get_char_at()
|
797
|
+
while char and char not in termination_characters:
|
798
|
+
comment += char
|
799
|
+
self.index += 1
|
800
|
+
char = self.get_char_at()
|
801
|
+
self.log(f"Found line comment: {comment}")
|
802
|
+
return ""
|
803
|
+
# Handle block comment starting with /*
|
804
|
+
elif next_char == "*":
|
805
|
+
comment = "/*"
|
806
|
+
self.index += 2 # Skip '/*'
|
807
|
+
while True:
|
808
|
+
char = self.get_char_at()
|
809
|
+
if not char:
|
810
|
+
self.log(
|
811
|
+
"Reached end-of-string while parsing block comment; unclosed block comment."
|
812
|
+
)
|
813
|
+
break
|
814
|
+
comment += char
|
815
|
+
self.index += 1
|
816
|
+
if comment.endswith("*/"):
|
817
|
+
break
|
818
|
+
self.log(f"Found block comment: {comment}")
|
819
|
+
return ""
|
820
|
+
else:
|
821
|
+
# Not a recognized comment pattern, skip the slash.
|
822
|
+
self.index += 1
|
823
|
+
return ""
|
824
|
+
|
825
|
+
else:
|
826
|
+
# Should not be reached: if for some reason the current character does not start a comment, skip it.
|
827
|
+
self.index += 1
|
828
|
+
return ""
|
829
|
+
|
751
830
|
def _log(self, text: str) -> None:
|
752
831
|
window: int = 10
|
753
832
|
start: int = max(self.index - window, 0)
|
@@ -1,13 +1,13 @@
|
|
1
1
|
json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
|
2
2
|
json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
|
3
3
|
json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
|
4
|
-
json_repair/json_parser.py,sha256=
|
4
|
+
json_repair/json_parser.py,sha256=BQsH8CRy59C2176bMwVerfqbHDXfLoEC1v5frmCiv7M,39020
|
5
5
|
json_repair/json_repair.py,sha256=LINLSJBs3cJMfs1YRDaIpfWR5PJLs87Oe06G5yQjY18,9729
|
6
6
|
json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
7
|
json_repair/string_file_wrapper.py,sha256=koZmdq2-Z5K7XF1bDqX6dEbNaVMJYcMTjq-aGe6NQvA,4526
|
8
|
-
json_repair-0.
|
9
|
-
json_repair-0.
|
10
|
-
json_repair-0.
|
11
|
-
json_repair-0.
|
12
|
-
json_repair-0.
|
13
|
-
json_repair-0.
|
8
|
+
json_repair-0.38.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
9
|
+
json_repair-0.38.0.dist-info/METADATA,sha256=GBsf-E48jHl2gfFs7kks6AluILNnN43en2QpQc_hmwE,11794
|
10
|
+
json_repair-0.38.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
11
|
+
json_repair-0.38.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
|
12
|
+
json_repair-0.38.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
13
|
+
json_repair-0.38.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|