json-repair 0.6.0__py3-none-any.whl → 0.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_repair.py +15 -14
- {json_repair-0.6.0.dist-info → json_repair-0.6.2.dist-info}/METADATA +1 -1
- json_repair-0.6.2.dist-info/RECORD +7 -0
- json_repair-0.6.0.dist-info/RECORD +0 -7
- {json_repair-0.6.0.dist-info → json_repair-0.6.2.dist-info}/LICENSE +0 -0
- {json_repair-0.6.0.dist-info → json_repair-0.6.2.dist-info}/WHEEL +0 -0
- {json_repair-0.6.0.dist-info → json_repair-0.6.2.dist-info}/top_level.txt +0 -0
json_repair/json_repair.py
CHANGED
@@ -67,7 +67,7 @@ class JSONParser:
|
|
67
67
|
elif char.isdigit() or char == "-":
|
68
68
|
return self.parse_number()
|
69
69
|
# <boolean> could be (T)rue or (F)alse or (N)ull
|
70
|
-
elif char
|
70
|
+
elif char.lower() in ["t", "f", "n"]:
|
71
71
|
return self.parse_boolean_or_null()
|
72
72
|
# This might be a <string> that is missing the starting '"'
|
73
73
|
elif char.isalpha():
|
@@ -207,16 +207,18 @@ class JSONParser:
|
|
207
207
|
if fixed_quotes:
|
208
208
|
if self.context == "object_key" and (char == ":" or char.isspace()):
|
209
209
|
break
|
210
|
-
elif self.context == "object_value" and
|
210
|
+
elif self.context == "object_value" and char in [",", "}"]:
|
211
211
|
break
|
212
212
|
self.index += 1
|
213
213
|
char = self.get_char_at()
|
214
214
|
# ChatGPT sometimes forget to quote links in markdown like: { "content": "[LINK]("https://google.com")" }
|
215
|
-
if
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
215
|
+
if (
|
216
|
+
char == string_terminator
|
217
|
+
# Next character is not a comma
|
218
|
+
and self.get_char_at(1) != ","
|
219
|
+
and (
|
220
|
+
fix_broken_markdown_link
|
221
|
+
or (self.get_char_at(-2) == "]" and self.get_char_at(-1)) == "("
|
220
222
|
)
|
221
223
|
):
|
222
224
|
fix_broken_markdown_link = not fix_broken_markdown_link
|
@@ -238,7 +240,7 @@ class JSONParser:
|
|
238
240
|
|
239
241
|
return self.json_str[start:end]
|
240
242
|
|
241
|
-
def parse_number(self) -> Union[float, int]:
|
243
|
+
def parse_number(self) -> Union[float, int, str]:
|
242
244
|
# <number> is a valid real number expressed in one of a number of given formats
|
243
245
|
number_str = ""
|
244
246
|
number_chars = set("0123456789-.eE")
|
@@ -256,11 +258,11 @@ class JSONParser:
|
|
256
258
|
# This is a string then
|
257
259
|
return self.parse_string()
|
258
260
|
|
259
|
-
def parse_boolean_or_null(self) -> Union[bool, None]:
|
261
|
+
def parse_boolean_or_null(self) -> Union[bool, str, None]:
|
260
262
|
# <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
|
261
263
|
boolean_map = {"true": (True, 4), "false": (False, 5), "null": (None, 4)}
|
262
264
|
for key, (value, length) in boolean_map.items():
|
263
|
-
if self.json_str.startswith(key, self.index):
|
265
|
+
if self.json_str.lower().startswith(key, self.index):
|
264
266
|
self.index += length
|
265
267
|
return value
|
266
268
|
|
@@ -271,10 +273,10 @@ class JSONParser:
|
|
271
273
|
self.json_str = self.json_str[: self.index] + char + self.json_str[self.index :]
|
272
274
|
self.index += 1
|
273
275
|
|
274
|
-
def get_char_at(self) -> Union[str, bool]:
|
276
|
+
def get_char_at(self, count: int = 0) -> Union[str, bool]:
|
275
277
|
# Why not use something simpler? Because we might be out of bounds and doing this check all the time is annoying
|
276
278
|
try:
|
277
|
-
return self.json_str[self.index]
|
279
|
+
return self.json_str[self.index + count]
|
278
280
|
except IndexError:
|
279
281
|
return False
|
280
282
|
|
@@ -305,8 +307,7 @@ def repair_json(
|
|
305
307
|
It will return the fixed string by default.
|
306
308
|
When `return_objects=True` is passed, it will return the decoded data structure instead.
|
307
309
|
"""
|
308
|
-
json_str =
|
309
|
-
json_str = re.sub(r"\s+$", "", json_str)
|
310
|
+
json_str = json_str.strip()
|
310
311
|
json_str = re.sub(r"/\*.*?\*/", "", json_str)
|
311
312
|
parser = JSONParser(json_str)
|
312
313
|
if skip_json_loads:
|
@@ -0,0 +1,7 @@
|
|
1
|
+
json_repair/__init__.py,sha256=p9mZnte8Bg18NcxqgJ7vopH2gQv_XbZ0dRnk686QuRE,92
|
2
|
+
json_repair/json_repair.py,sha256=Zk5JFdO7n5SyDZZ8SUEFJp6--ey5Pa-jlpXEBzwqtJQ,13311
|
3
|
+
json_repair-0.6.2.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
+
json_repair-0.6.2.dist-info/METADATA,sha256=6zUF2tGJnxPzvog3uw_v9970mx6MgccC4X46ScJHcxQ,6011
|
5
|
+
json_repair-0.6.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
6
|
+
json_repair-0.6.2.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
+
json_repair-0.6.2.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
json_repair/__init__.py,sha256=p9mZnte8Bg18NcxqgJ7vopH2gQv_XbZ0dRnk686QuRE,92
|
2
|
-
json_repair/json_repair.py,sha256=LsyNEo2mDgEcwyJo8A0WeH8ftl3GGxZ5rCcltXcINwI,13264
|
3
|
-
json_repair-0.6.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
-
json_repair-0.6.0.dist-info/METADATA,sha256=t9EOoV2aaymk5054TKJK5xKLUiCax0DmCDhwpLQ31Oo,6011
|
5
|
-
json_repair-0.6.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
6
|
-
json_repair-0.6.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
-
json_repair-0.6.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|