json-repair 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_repair.py +47 -49
- {json_repair-0.4.4.dist-info → json_repair-0.5.0.dist-info}/METADATA +2 -2
- json_repair-0.5.0.dist-info/RECORD +7 -0
- json_repair-0.4.4.dist-info/RECORD +0 -7
- {json_repair-0.4.4.dist-info → json_repair-0.5.0.dist-info}/LICENSE +0 -0
- {json_repair-0.4.4.dist-info → json_repair-0.5.0.dist-info}/WHEEL +0 -0
- {json_repair-0.4.4.dist-info → json_repair-0.5.0.dist-info}/top_level.txt +0 -0
json_repair/json_repair.py
CHANGED
@@ -61,6 +61,8 @@ class JSONParser:
|
|
61
61
|
# <string> starts with '"'
|
62
62
|
elif char == '"':
|
63
63
|
return self.parse_string()
|
64
|
+
elif char == "'":
|
65
|
+
return self.parse_string(use_single_quotes=True)
|
64
66
|
# <number> starts with [0-9] or minus
|
65
67
|
elif char.isdigit() or char == "-":
|
66
68
|
return self.parse_number()
|
@@ -100,18 +102,19 @@ class JSONParser:
|
|
100
102
|
# Context is used in the string parser to manage the lack of quotes
|
101
103
|
self.context = "object_key"
|
102
104
|
|
103
|
-
# <member> starts with a <string>
|
104
105
|
self.skip_whitespaces_at()
|
105
|
-
|
106
|
+
|
107
|
+
# <member> starts with a <string>
|
108
|
+
key = ""
|
106
109
|
while key == "" and self.get_char_at():
|
107
|
-
key = self.parse_string(
|
110
|
+
key = self.parse_string(
|
111
|
+
use_single_quotes=(self.json_str[self.index] == "'")
|
112
|
+
)
|
108
113
|
|
109
114
|
# We reached the end here
|
110
115
|
if key == "}":
|
111
116
|
continue
|
112
117
|
|
113
|
-
# Reset context
|
114
|
-
self.context = ""
|
115
118
|
# An extreme case of missing ":" after a key
|
116
119
|
if (self.get_char_at() or "") != ":":
|
117
120
|
self.insert_char_at(":")
|
@@ -119,6 +122,8 @@ class JSONParser:
|
|
119
122
|
self.context = "object_value"
|
120
123
|
# The value can be any valid json
|
121
124
|
value = self.parse_json()
|
125
|
+
|
126
|
+
# Reset context since our job is done
|
122
127
|
self.context = ""
|
123
128
|
obj[key] = value
|
124
129
|
|
@@ -141,6 +146,7 @@ class JSONParser:
|
|
141
146
|
while (self.get_char_at() or "]") != "]":
|
142
147
|
value = self.parse_json()
|
143
148
|
|
149
|
+
# It is possible that parse_json() returns nothing valid, so we stop
|
144
150
|
if not value:
|
145
151
|
break
|
146
152
|
|
@@ -153,9 +159,10 @@ class JSONParser:
|
|
153
159
|
char = self.get_char_at()
|
154
160
|
|
155
161
|
# Especially at the end of an LLM generated json you might miss the last "]"
|
156
|
-
|
162
|
+
char = self.get_char_at()
|
163
|
+
if char and char != "]":
|
157
164
|
# Sometimes when you fix a missing "]" you'll have a trailing "," there that makes the JSON invalid
|
158
|
-
if
|
165
|
+
if char == ",":
|
159
166
|
# Remove trailing "," before adding the "]"
|
160
167
|
self.remove_char_at()
|
161
168
|
self.insert_char_at("]")
|
@@ -163,18 +170,24 @@ class JSONParser:
|
|
163
170
|
self.index += 1
|
164
171
|
return arr
|
165
172
|
|
166
|
-
def parse_string(self) -> str:
|
173
|
+
def parse_string(self, use_single_quotes=False) -> str:
|
167
174
|
# <string> is a string of valid characters enclosed in quotes
|
175
|
+
# i.e. { name: "John" }
|
168
176
|
# Somehow all weird cases in an invalid JSON happen to be resolved in this function, so be careful here
|
177
|
+
|
169
178
|
# Flag to manage corner cases related to missing starting quote
|
170
179
|
fixed_quotes = False
|
171
|
-
|
172
|
-
if
|
173
|
-
|
180
|
+
string_terminator = '"'
|
181
|
+
if use_single_quotes:
|
182
|
+
string_terminator = "'"
|
183
|
+
char = self.get_char_at()
|
184
|
+
if char != string_terminator:
|
185
|
+
self.insert_char_at(string_terminator)
|
174
186
|
fixed_quotes = True
|
175
187
|
else:
|
176
188
|
self.index += 1
|
177
|
-
|
189
|
+
|
190
|
+
# Start position of the string (to use later in the return value)
|
178
191
|
start = self.index
|
179
192
|
|
180
193
|
# Here things get a bit hairy because a string missing the final quote can also be a key or a value in an object
|
@@ -184,38 +197,26 @@ class JSONParser:
|
|
184
197
|
# * It iterated over the entire sequence
|
185
198
|
# * If we are fixing missing quotes in an object, when it finds the special terminators
|
186
199
|
char = self.get_char_at()
|
187
|
-
while
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
not fixed_quotes
|
194
|
-
or self.context != "object_value"
|
195
|
-
or (char != "," and char != "}")
|
196
|
-
)
|
197
|
-
):
|
200
|
+
while char and char != string_terminator:
|
201
|
+
if fixed_quotes:
|
202
|
+
if self.context == "object_key" and (char == ":" or char.isspace()):
|
203
|
+
break
|
204
|
+
elif self.context == "object_value" and (char == "," or char == "}"):
|
205
|
+
break
|
198
206
|
self.index += 1
|
199
207
|
char = self.get_char_at()
|
200
208
|
|
201
|
-
|
202
|
-
if (
|
203
|
-
fixed_quotes
|
204
|
-
and self.context == "object_key"
|
205
|
-
and (self.get_char_at() or "").isspace()
|
206
|
-
):
|
207
|
-
# skip whitespaces
|
209
|
+
if char and fixed_quotes and self.context == "object_key" and char.isspace():
|
208
210
|
self.skip_whitespaces_at()
|
209
|
-
|
210
|
-
|
211
|
-
if (self.get_char_at() or "") not in [":", ","]:
|
211
|
+
if self.get_char_at() not in [":", ","]:
|
212
212
|
return ""
|
213
213
|
|
214
214
|
end = self.index
|
215
|
-
|
216
|
-
self.insert_char_at('"')
|
215
|
+
|
217
216
|
# A fallout of the previous special case in the while loop, we need to update the index only if we had a closing quote
|
218
|
-
if
|
217
|
+
if char != string_terminator:
|
218
|
+
self.insert_char_at(string_terminator)
|
219
|
+
else:
|
219
220
|
self.index += 1
|
220
221
|
|
221
222
|
return self.json_str[start:end]
|
@@ -223,8 +224,9 @@ class JSONParser:
|
|
223
224
|
def parse_number(self) -> Union[float, int]:
|
224
225
|
# <number> is a valid real number expressed in one of a number of given formats
|
225
226
|
number_str = ""
|
227
|
+
number_chars = set("0123456789-.eE")
|
226
228
|
char = self.get_char_at()
|
227
|
-
while char and
|
229
|
+
while char and char in number_chars:
|
228
230
|
number_str += char
|
229
231
|
self.index += 1
|
230
232
|
char = self.get_char_at()
|
@@ -239,18 +241,14 @@ class JSONParser:
|
|
239
241
|
|
240
242
|
def parse_boolean_or_null(self) -> Union[bool, None]:
|
241
243
|
# <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
return None
|
251
|
-
else:
|
252
|
-
# This is a string then
|
253
|
-
return self.parse_string()
|
244
|
+
boolean_map = {"true": (True, 4), "false": (False, 5), "null": (None, 4)}
|
245
|
+
for key, (value, length) in boolean_map.items():
|
246
|
+
if self.json_str.startswith(key, self.index):
|
247
|
+
self.index += length
|
248
|
+
return value
|
249
|
+
|
250
|
+
# This is a string then
|
251
|
+
return self.parse_string()
|
254
252
|
|
255
253
|
def insert_char_at(self, char: str) -> None:
|
256
254
|
self.json_str = self.json_str[: self.index] + char + self.json_str[self.index :]
|
@@ -0,0 +1,7 @@
|
|
1
|
+
json_repair/__init__.py,sha256=p9mZnte8Bg18NcxqgJ7vopH2gQv_XbZ0dRnk686QuRE,92
|
2
|
+
json_repair/json_repair.py,sha256=Hdevgq1_s8JaHCALBd7gTxp8_DexKezz8g6Cm0LcJTc,12505
|
3
|
+
json_repair-0.5.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
+
json_repair-0.5.0.dist-info/METADATA,sha256=6_UiFGc3X93VQ8tFU1y02b16fbMyRybrQu1B7wwBmL8,6011
|
5
|
+
json_repair-0.5.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
6
|
+
json_repair-0.5.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
+
json_repair-0.5.0.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
json_repair/__init__.py,sha256=p9mZnte8Bg18NcxqgJ7vopH2gQv_XbZ0dRnk686QuRE,92
|
2
|
-
json_repair/json_repair.py,sha256=b02twJkjYPlKBU8bCMud3ReIATlb2Tn_CkP1u4e4tN4,12562
|
3
|
-
json_repair-0.4.4.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
-
json_repair-0.4.4.dist-info/METADATA,sha256=JgJrJ_qMBYQS06W_O2PfJoMT-ngjlvx4aLg3AYVdQ18,6011
|
5
|
-
json_repair-0.4.4.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
6
|
-
json_repair-0.4.4.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
-
json_repair-0.4.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|