json-repair 0.4.4__py3-none-any.whl → 0.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_repair.py +38 -46
- {json_repair-0.4.4.dist-info → json_repair-0.4.5.dist-info}/METADATA +1 -1
- json_repair-0.4.5.dist-info/RECORD +7 -0
- json_repair-0.4.4.dist-info/RECORD +0 -7
- {json_repair-0.4.4.dist-info → json_repair-0.4.5.dist-info}/LICENSE +0 -0
- {json_repair-0.4.4.dist-info → json_repair-0.4.5.dist-info}/WHEEL +0 -0
- {json_repair-0.4.4.dist-info → json_repair-0.4.5.dist-info}/top_level.txt +0 -0
json_repair/json_repair.py
CHANGED
@@ -100,9 +100,10 @@ class JSONParser:
|
|
100
100
|
# Context is used in the string parser to manage the lack of quotes
|
101
101
|
self.context = "object_key"
|
102
102
|
|
103
|
-
# <member> starts with a <string>
|
104
103
|
self.skip_whitespaces_at()
|
105
|
-
|
104
|
+
|
105
|
+
# <member> starts with a <string>
|
106
|
+
key = ""
|
106
107
|
while key == "" and self.get_char_at():
|
107
108
|
key = self.parse_string()
|
108
109
|
|
@@ -110,8 +111,6 @@ class JSONParser:
|
|
110
111
|
if key == "}":
|
111
112
|
continue
|
112
113
|
|
113
|
-
# Reset context
|
114
|
-
self.context = ""
|
115
114
|
# An extreme case of missing ":" after a key
|
116
115
|
if (self.get_char_at() or "") != ":":
|
117
116
|
self.insert_char_at(":")
|
@@ -119,6 +118,8 @@ class JSONParser:
|
|
119
118
|
self.context = "object_value"
|
120
119
|
# The value can be any valid json
|
121
120
|
value = self.parse_json()
|
121
|
+
|
122
|
+
# Reset context since our job is done
|
122
123
|
self.context = ""
|
123
124
|
obj[key] = value
|
124
125
|
|
@@ -141,6 +142,7 @@ class JSONParser:
|
|
141
142
|
while (self.get_char_at() or "]") != "]":
|
142
143
|
value = self.parse_json()
|
143
144
|
|
145
|
+
# It is possible that parse_json() returns nothing valid, so we stop
|
144
146
|
if not value:
|
145
147
|
break
|
146
148
|
|
@@ -153,9 +155,10 @@ class JSONParser:
|
|
153
155
|
char = self.get_char_at()
|
154
156
|
|
155
157
|
# Especially at the end of an LLM generated json you might miss the last "]"
|
156
|
-
|
158
|
+
char = self.get_char_at()
|
159
|
+
if char and char != "]":
|
157
160
|
# Sometimes when you fix a missing "]" you'll have a trailing "," there that makes the JSON invalid
|
158
|
-
if
|
161
|
+
if char == ",":
|
159
162
|
# Remove trailing "," before adding the "]"
|
160
163
|
self.remove_char_at()
|
161
164
|
self.insert_char_at("]")
|
@@ -165,16 +168,20 @@ class JSONParser:
|
|
165
168
|
|
166
169
|
def parse_string(self) -> str:
|
167
170
|
# <string> is a string of valid characters enclosed in quotes
|
171
|
+
# i.e. { name: "John" }
|
168
172
|
# Somehow all weird cases in an invalid JSON happen to be resolved in this function, so be careful here
|
173
|
+
|
169
174
|
# Flag to manage corner cases related to missing starting quote
|
170
175
|
fixed_quotes = False
|
171
|
-
|
172
|
-
|
176
|
+
|
177
|
+
char = self.get_char_at()
|
178
|
+
if char != '"':
|
173
179
|
self.insert_char_at('"')
|
174
180
|
fixed_quotes = True
|
175
181
|
else:
|
176
182
|
self.index += 1
|
177
|
-
|
183
|
+
|
184
|
+
# Start position of the string (to use later in the return value)
|
178
185
|
start = self.index
|
179
186
|
|
180
187
|
# Here things get a bit hairy because a string missing the final quote can also be a key or a value in an object
|
@@ -184,38 +191,26 @@ class JSONParser:
|
|
184
191
|
# * It iterated over the entire sequence
|
185
192
|
# * If we are fixing missing quotes in an object, when it finds the special terminators
|
186
193
|
char = self.get_char_at()
|
187
|
-
while
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
not fixed_quotes
|
194
|
-
or self.context != "object_value"
|
195
|
-
or (char != "," and char != "}")
|
196
|
-
)
|
197
|
-
):
|
194
|
+
while char and char != '"':
|
195
|
+
if fixed_quotes:
|
196
|
+
if self.context == "object_key" and (char == ":" or char.isspace()):
|
197
|
+
break
|
198
|
+
elif self.context == "object_value" and (char == "," or char == "}"):
|
199
|
+
break
|
198
200
|
self.index += 1
|
199
201
|
char = self.get_char_at()
|
200
202
|
|
201
|
-
|
202
|
-
if (
|
203
|
-
fixed_quotes
|
204
|
-
and self.context == "object_key"
|
205
|
-
and (self.get_char_at() or "").isspace()
|
206
|
-
):
|
207
|
-
# skip whitespaces
|
203
|
+
if char and fixed_quotes and self.context == "object_key" and char.isspace():
|
208
204
|
self.skip_whitespaces_at()
|
209
|
-
|
210
|
-
|
211
|
-
if (self.get_char_at() or "") not in [":", ","]:
|
205
|
+
if self.get_char_at() not in [":", ","]:
|
212
206
|
return ""
|
213
207
|
|
214
208
|
end = self.index
|
215
|
-
|
216
|
-
self.insert_char_at('"')
|
209
|
+
|
217
210
|
# A fallout of the previous special case in the while loop, we need to update the index only if we had a closing quote
|
218
|
-
if
|
211
|
+
if char != '"':
|
212
|
+
self.insert_char_at('"')
|
213
|
+
else:
|
219
214
|
self.index += 1
|
220
215
|
|
221
216
|
return self.json_str[start:end]
|
@@ -223,8 +218,9 @@ class JSONParser:
|
|
223
218
|
def parse_number(self) -> Union[float, int]:
|
224
219
|
# <number> is a valid real number expressed in one of a number of given formats
|
225
220
|
number_str = ""
|
221
|
+
number_chars = set("0123456789-.eE")
|
226
222
|
char = self.get_char_at()
|
227
|
-
while char and
|
223
|
+
while char and char in number_chars:
|
228
224
|
number_str += char
|
229
225
|
self.index += 1
|
230
226
|
char = self.get_char_at()
|
@@ -239,18 +235,14 @@ class JSONParser:
|
|
239
235
|
|
240
236
|
def parse_boolean_or_null(self) -> Union[bool, None]:
|
241
237
|
# <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
return None
|
251
|
-
else:
|
252
|
-
# This is a string then
|
253
|
-
return self.parse_string()
|
238
|
+
boolean_map = {"true": (True, 4), "false": (False, 5), "null": (None, 4)}
|
239
|
+
for key, (value, length) in boolean_map.items():
|
240
|
+
if self.json_str.startswith(key, self.index):
|
241
|
+
self.index += length
|
242
|
+
return value
|
243
|
+
|
244
|
+
# This is a string then
|
245
|
+
return self.parse_string()
|
254
246
|
|
255
247
|
def insert_char_at(self, char: str) -> None:
|
256
248
|
self.json_str = self.json_str[: self.index] + char + self.json_str[self.index :]
|
@@ -0,0 +1,7 @@
|
|
1
|
+
json_repair/__init__.py,sha256=p9mZnte8Bg18NcxqgJ7vopH2gQv_XbZ0dRnk686QuRE,92
|
2
|
+
json_repair/json_repair.py,sha256=ezQraeyIucpwRgnh12S5m7Mn2eR5i0rKdwAkLrkAhEA,12136
|
3
|
+
json_repair-0.4.5.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
+
json_repair-0.4.5.dist-info/METADATA,sha256=C6GUgX3IMBJfr_kecH3ZD6jkjdgfgdI0S0vFnivHaK4,6011
|
5
|
+
json_repair-0.4.5.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
6
|
+
json_repair-0.4.5.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
+
json_repair-0.4.5.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
json_repair/__init__.py,sha256=p9mZnte8Bg18NcxqgJ7vopH2gQv_XbZ0dRnk686QuRE,92
|
2
|
-
json_repair/json_repair.py,sha256=b02twJkjYPlKBU8bCMud3ReIATlb2Tn_CkP1u4e4tN4,12562
|
3
|
-
json_repair-0.4.4.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
-
json_repair-0.4.4.dist-info/METADATA,sha256=JgJrJ_qMBYQS06W_O2PfJoMT-ngjlvx4aLg3AYVdQ18,6011
|
5
|
-
json_repair-0.4.4.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
6
|
-
json_repair-0.4.4.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
-
json_repair-0.4.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|