json-repair 0.9.0__py3-none-any.whl → 0.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_repair.py +22 -15
- {json_repair-0.9.0.dist-info → json_repair-0.10.1.dist-info}/METADATA +6 -6
- json_repair-0.10.1.dist-info/RECORD +7 -0
- json_repair-0.9.0.dist-info/RECORD +0 -7
- {json_repair-0.9.0.dist-info → json_repair-0.10.1.dist-info}/LICENSE +0 -0
- {json_repair-0.9.0.dist-info → json_repair-0.10.1.dist-info}/WHEEL +0 -0
- {json_repair-0.9.0.dist-info → json_repair-0.10.1.dist-info}/top_level.txt +0 -0
json_repair/json_repair.py
CHANGED
@@ -61,7 +61,9 @@ class JSONParser:
|
|
61
61
|
elif char == '"':
|
62
62
|
return self.parse_string()
|
63
63
|
elif char == "'":
|
64
|
-
return self.parse_string(
|
64
|
+
return self.parse_string(string_quotes="'")
|
65
|
+
elif char == "“":
|
66
|
+
return self.parse_string(string_quotes=["“", "”"])
|
65
67
|
# <number> starts with [0-9] or minus
|
66
68
|
elif char.isdigit() or char == "-":
|
67
69
|
return self.parse_number()
|
@@ -102,9 +104,7 @@ class JSONParser:
|
|
102
104
|
# <member> starts with a <string>
|
103
105
|
key = ""
|
104
106
|
while key == "" and self.get_char_at():
|
105
|
-
key = self.
|
106
|
-
use_single_quotes=(self.json_str[self.index] == "'")
|
107
|
-
)
|
107
|
+
key = self.parse_json()
|
108
108
|
|
109
109
|
# This can happen sometimes like { "": "value" }
|
110
110
|
if key == "" and self.get_char_at() == ":":
|
@@ -112,7 +112,7 @@ class JSONParser:
|
|
112
112
|
break
|
113
113
|
|
114
114
|
# We reached the end here
|
115
|
-
if
|
115
|
+
if (self.get_char_at() or "}") == "}":
|
116
116
|
continue
|
117
117
|
|
118
118
|
# An extreme case of missing ":" after a key
|
@@ -170,19 +170,22 @@ class JSONParser:
|
|
170
170
|
self.index += 1
|
171
171
|
return arr
|
172
172
|
|
173
|
-
def parse_string(self,
|
173
|
+
def parse_string(self, string_quotes=False) -> str:
|
174
174
|
# <string> is a string of valid characters enclosed in quotes
|
175
175
|
# i.e. { name: "John" }
|
176
176
|
# Somehow all weird cases in an invalid JSON happen to be resolved in this function, so be careful here
|
177
177
|
|
178
178
|
# Flag to manage corner cases related to missing starting quote
|
179
179
|
fixed_quotes = False
|
180
|
-
|
181
|
-
if
|
182
|
-
|
180
|
+
lstring_delimiter = rstring_delimiter = '"'
|
181
|
+
if isinstance(string_quotes, list):
|
182
|
+
lstring_delimiter = string_quotes[0]
|
183
|
+
rstring_delimiter = string_quotes[1]
|
184
|
+
elif isinstance(string_quotes, str):
|
185
|
+
lstring_delimiter = rstring_delimiter = string_quotes
|
183
186
|
char = self.get_char_at()
|
184
|
-
if char !=
|
185
|
-
self.insert_char_at(
|
187
|
+
if char != lstring_delimiter:
|
188
|
+
self.insert_char_at(lstring_delimiter)
|
186
189
|
fixed_quotes = True
|
187
190
|
else:
|
188
191
|
self.index += 1
|
@@ -198,7 +201,7 @@ class JSONParser:
|
|
198
201
|
# * If we are fixing missing quotes in an object, when it finds the special terminators
|
199
202
|
char = self.get_char_at()
|
200
203
|
fix_broken_markdown_link = False
|
201
|
-
while char and char !=
|
204
|
+
while char and char != rstring_delimiter:
|
202
205
|
if fixed_quotes:
|
203
206
|
if self.context == "object_key" and (char == ":" or char.isspace()):
|
204
207
|
break
|
@@ -206,9 +209,13 @@ class JSONParser:
|
|
206
209
|
break
|
207
210
|
self.index += 1
|
208
211
|
char = self.get_char_at()
|
212
|
+
# If the string contains escaped delimiters we should respect that
|
213
|
+
if char == rstring_delimiter and self.get_char_at(-1) == "\\":
|
214
|
+
self.index += 1
|
215
|
+
char = self.get_char_at()
|
209
216
|
# ChatGPT sometimes forget to quote links in markdown like: { "content": "[LINK]("https://google.com")" }
|
210
217
|
if (
|
211
|
-
char ==
|
218
|
+
char == rstring_delimiter
|
212
219
|
# Next character is not a comma
|
213
220
|
and self.get_char_at(1) != ","
|
214
221
|
and (
|
@@ -228,8 +235,8 @@ class JSONParser:
|
|
228
235
|
end = self.index
|
229
236
|
|
230
237
|
# A fallout of the previous special case in the while loop, we need to update the index only if we had a closing quote
|
231
|
-
if char !=
|
232
|
-
self.insert_char_at(
|
238
|
+
if char != rstring_delimiter:
|
239
|
+
self.insert_char_at(rstring_delimiter)
|
233
240
|
else:
|
234
241
|
self.index += 1
|
235
242
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.10.1
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -56,20 +56,20 @@ I searched for a lightweight python package that was able to reliably fix this p
|
|
56
56
|
You can use this library to completely replace `json.loads()`:
|
57
57
|
|
58
58
|
import json_repair
|
59
|
-
|
59
|
+
|
60
60
|
decoded_object = json_repair.loads(json_string)
|
61
61
|
|
62
62
|
or just
|
63
63
|
|
64
64
|
import json_repair
|
65
|
-
|
65
|
+
|
66
66
|
decoded_object = json_repair.repair_json(json_string, return_objects=True)
|
67
|
-
|
67
|
+
|
68
68
|
### Performance considerations
|
69
69
|
If you find this library too slow because is using `json.loads()` you can skip that by passing `skip_json_loads=True` to `repair_json`. Like:
|
70
70
|
|
71
71
|
from json_repair import repair_json
|
72
|
-
|
72
|
+
|
73
73
|
good_json_string = repair_json(bad_json_string, skip_json_loads=True)
|
74
74
|
|
75
75
|
I made a choice of not using any fast json library to avoid having any external dependency, so that anybody can use it regardless of their stack.
|
@@ -77,7 +77,7 @@ I made a choice of not using any fast json library to avoid having any external
|
|
77
77
|
Some rules of thumb to use:
|
78
78
|
- Setting `return_objects=True` will always be faster because the parser returns an object already and it doesn't have serialize that object to JSON
|
79
79
|
- `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
|
80
|
-
|
80
|
+
- If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
|
81
81
|
## Adding to requirements
|
82
82
|
**Please pin this library only on the major version!**
|
83
83
|
|
@@ -0,0 +1,7 @@
|
|
1
|
+
json_repair/__init__.py,sha256=p9mZnte8Bg18NcxqgJ7vopH2gQv_XbZ0dRnk686QuRE,92
|
2
|
+
json_repair/json_repair.py,sha256=OIjr9L0CyysGrEEdfeeEkKoiFLaX2sGSnn2MYBqGHHo,13826
|
3
|
+
json_repair-0.10.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
+
json_repair-0.10.1.dist-info/METADATA,sha256=kPI-1mCI6HuVeV0eVV98GR5BS6Tk04tkxDkxKCdKG7E,6404
|
5
|
+
json_repair-0.10.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
6
|
+
json_repair-0.10.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
+
json_repair-0.10.1.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
json_repair/__init__.py,sha256=p9mZnte8Bg18NcxqgJ7vopH2gQv_XbZ0dRnk686QuRE,92
|
2
|
-
json_repair/json_repair.py,sha256=63d6OrbAW3dbJ0C06eGtxSB8x5EASRI3OvdsXvmotKk,13373
|
3
|
-
json_repair-0.9.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
-
json_repair-0.9.0.dist-info/METADATA,sha256=ueRJPKZhwXF66UmZXxcLNVwkOJbjGSbsRR8hhKo78MI,6311
|
5
|
-
json_repair-0.9.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
6
|
-
json_repair-0.9.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
-
json_repair-0.9.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|