json-repair 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_repair.py +26 -18
- {json_repair-0.4.0.dist-info → json_repair-0.4.1.dist-info}/METADATA +5 -1
- json_repair-0.4.1.dist-info/RECORD +7 -0
- json_repair-0.4.0.dist-info/RECORD +0 -7
- {json_repair-0.4.0.dist-info → json_repair-0.4.1.dist-info}/LICENSE +0 -0
- {json_repair-0.4.0.dist-info → json_repair-0.4.1.dist-info}/WHEEL +0 -0
- {json_repair-0.4.0.dist-info → json_repair-0.4.1.dist-info}/top_level.txt +0 -0
json_repair/json_repair.py
CHANGED
@@ -62,7 +62,7 @@ class JSONParser:
|
|
62
62
|
# <number> starts with [0-9] or minus
|
63
63
|
elif char.isdigit() or char == "-":
|
64
64
|
return self.parse_number()
|
65
|
-
# <boolean> could (T)rue or (F)alse or (N)ull
|
65
|
+
# <boolean> could be (T)rue or (F)alse or (N)ull
|
66
66
|
elif char == "t" or char == "f" or char == "n":
|
67
67
|
return self.parse_boolean_or_null()
|
68
68
|
# This might be a <string> that is missing the starting '"'
|
@@ -70,6 +70,7 @@ class JSONParser:
|
|
70
70
|
return self.parse_string()
|
71
71
|
# Ignore whitespaces outside of strings
|
72
72
|
elif char.isspace():
|
73
|
+
self.index += 1
|
73
74
|
self.skip_whitespaces_at()
|
74
75
|
return self.parse_json()
|
75
76
|
# If everything else fails, then we give up and return an exception
|
@@ -104,6 +105,7 @@ class JSONParser:
|
|
104
105
|
self.context = "object_key"
|
105
106
|
|
106
107
|
# <member> starts with a <string>
|
108
|
+
self.skip_whitespaces_at()
|
107
109
|
key = self.parse_string()
|
108
110
|
while key == "":
|
109
111
|
key = self.parse_string()
|
@@ -176,8 +178,6 @@ class JSONParser:
|
|
176
178
|
# Flag to manage corner cases related to missing starting quote
|
177
179
|
fixed_quotes = False
|
178
180
|
# i.e. { name: "John" }
|
179
|
-
# Remove any trailing space
|
180
|
-
self.skip_whitespaces_at()
|
181
181
|
if self.get_char_at() != '"':
|
182
182
|
self.insert_char_at('"')
|
183
183
|
fixed_quotes = True
|
@@ -218,11 +218,7 @@ class JSONParser:
|
|
218
218
|
self.skip_whitespaces_at()
|
219
219
|
# This string is invalid if there's no valid termination afterwards
|
220
220
|
|
221
|
-
if (
|
222
|
-
self.get_char_at() != ":"
|
223
|
-
or self.get_char_at() != ","
|
224
|
-
or self.get_char_at() != "}"
|
225
|
-
):
|
221
|
+
if self.get_char_at() not in [":", ","]:
|
226
222
|
return ""
|
227
223
|
|
228
224
|
end = self.index
|
@@ -236,11 +232,13 @@ class JSONParser:
|
|
236
232
|
|
237
233
|
def parse_number(self) -> Union[float, int]:
|
238
234
|
# <number> is a valid real number expressed in one of a number of given formats
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
number_str
|
243
|
-
self.index +=
|
235
|
+
number_str = ""
|
236
|
+
char = self.get_char_at()
|
237
|
+
while char and (char.isdigit() or char in "-.eE"):
|
238
|
+
number_str += char
|
239
|
+
self.index += 1
|
240
|
+
char = self.get_char_at()
|
241
|
+
if number_str:
|
244
242
|
if "." in number_str or "e" in number_str or "E" in number_str:
|
245
243
|
return float(number_str)
|
246
244
|
else:
|
@@ -264,24 +262,34 @@ class JSONParser:
|
|
264
262
|
# This is a string then
|
265
263
|
return self.parse_string()
|
266
264
|
|
267
|
-
# This is a string then
|
268
|
-
return self.parse_string()
|
269
|
-
|
270
265
|
def insert_char_at(self, char: str) -> None:
|
271
266
|
self.json_str = self.json_str[: self.index] + char + self.json_str[self.index :]
|
272
267
|
self.index += 1
|
273
268
|
|
274
269
|
def get_char_at(self) -> Union[str, bool]:
|
275
270
|
# Why not use something simpler? Because we might be out of bounds and doing this check all the time is annoying
|
276
|
-
|
271
|
+
try:
|
272
|
+
return self.json_str[self.index]
|
273
|
+
except IndexError:
|
274
|
+
return False
|
277
275
|
|
278
276
|
def remove_char_at(self) -> None:
|
279
277
|
self.json_str = self.json_str[: self.index] + self.json_str[self.index + 1 :]
|
280
278
|
|
281
279
|
def skip_whitespaces_at(self) -> None:
|
282
280
|
# Remove trailing spaces
|
283
|
-
|
281
|
+
# I'd rather not do this BUT this method is called so many times that it makes sense to expand get_char_at
|
282
|
+
# At least this is what the profiler said and I believe in our lord and savior the profiler
|
283
|
+
try:
|
284
|
+
char = self.json_str[self.index]
|
285
|
+
except IndexError:
|
286
|
+
return
|
287
|
+
while char and char.isspace():
|
284
288
|
self.index += 1
|
289
|
+
try:
|
290
|
+
char = self.json_str[self.index]
|
291
|
+
except IndexError:
|
292
|
+
return
|
285
293
|
|
286
294
|
|
287
295
|
def repair_json(
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json-repair
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.1
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -85,6 +85,10 @@ If you find this library too slow because is using `json.loads()` you can skip t
|
|
85
85
|
|
86
86
|
I made a choice of not using any fast json library to avoid having any external dependency, so that anybody can use it regardless of their stack.
|
87
87
|
|
88
|
+
Some rules of thumb to use:
|
89
|
+
- Setting `return_objects=True` will always be faster because the parser returns an object already and it doesn't have serialize that object to JSON
|
90
|
+
- `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
|
91
|
+
|
88
92
|
# How it works
|
89
93
|
This module will parse the JSON file following the BNF definition:
|
90
94
|
|
@@ -0,0 +1,7 @@
|
|
1
|
+
json_repair/__init__.py,sha256=p9mZnte8Bg18NcxqgJ7vopH2gQv_XbZ0dRnk686QuRE,92
|
2
|
+
json_repair/json_repair.py,sha256=Qxy8eQpkm9e1qVUDvhYOuID7flLHIzAsr5cB1NWb3Y4,12974
|
3
|
+
json_repair-0.4.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
+
json_repair-0.4.1.dist-info/METADATA,sha256=MXc1lBLV9wr-DNTqqkzuH8W-VYiDNY9UVsvsHJGqMw4,6155
|
5
|
+
json_repair-0.4.1.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
|
6
|
+
json_repair-0.4.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
+
json_repair-0.4.1.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
json_repair/__init__.py,sha256=p9mZnte8Bg18NcxqgJ7vopH2gQv_XbZ0dRnk686QuRE,92
|
2
|
-
json_repair/json_repair.py,sha256=FJf3c9FVBoI09zQXA6RE2sX0fDc0vTKWaOLW86fccf8,12709
|
3
|
-
json_repair-0.4.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
-
json_repair-0.4.0.dist-info/METADATA,sha256=55m7tDzhs_R7g19ecNGFoY4fChmfp6Ft1TzdK8MxN5Q,5889
|
5
|
-
json_repair-0.4.0.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
|
6
|
-
json_repair-0.4.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
-
json_repair-0.4.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|