json-repair 0.4.3__py3-none-any.whl → 0.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_repair.py +42 -46
- {json_repair-0.4.3.dist-info → json_repair-0.4.5.dist-info}/METADATA +1 -1
- json_repair-0.4.5.dist-info/RECORD +7 -0
- {json_repair-0.4.3.dist-info → json_repair-0.4.5.dist-info}/WHEEL +1 -1
- json_repair-0.4.3.dist-info/RECORD +0 -7
- {json_repair-0.4.3.dist-info → json_repair-0.4.5.dist-info}/LICENSE +0 -0
- {json_repair-0.4.3.dist-info → json_repair-0.4.5.dist-info}/top_level.txt +0 -0
json_repair/json_repair.py
CHANGED
@@ -100,9 +100,10 @@ class JSONParser:
|
|
100
100
|
# Context is used in the string parser to manage the lack of quotes
|
101
101
|
self.context = "object_key"
|
102
102
|
|
103
|
-
# <member> starts with a <string>
|
104
103
|
self.skip_whitespaces_at()
|
105
|
-
|
104
|
+
|
105
|
+
# <member> starts with a <string>
|
106
|
+
key = ""
|
106
107
|
while key == "" and self.get_char_at():
|
107
108
|
key = self.parse_string()
|
108
109
|
|
@@ -110,8 +111,6 @@ class JSONParser:
|
|
110
111
|
if key == "}":
|
111
112
|
continue
|
112
113
|
|
113
|
-
# Reset context
|
114
|
-
self.context = ""
|
115
114
|
# An extreme case of missing ":" after a key
|
116
115
|
if (self.get_char_at() or "") != ":":
|
117
116
|
self.insert_char_at(":")
|
@@ -119,6 +118,8 @@ class JSONParser:
|
|
119
118
|
self.context = "object_value"
|
120
119
|
# The value can be any valid json
|
121
120
|
value = self.parse_json()
|
121
|
+
|
122
|
+
# Reset context since our job is done
|
122
123
|
self.context = ""
|
123
124
|
obj[key] = value
|
124
125
|
|
@@ -140,6 +141,11 @@ class JSONParser:
|
|
140
141
|
# Stop when you either find the closing parentheses or you have iterated over the entire string
|
141
142
|
while (self.get_char_at() or "]") != "]":
|
142
143
|
value = self.parse_json()
|
144
|
+
|
145
|
+
# It is possible that parse_json() returns nothing valid, so we stop
|
146
|
+
if not value:
|
147
|
+
break
|
148
|
+
|
143
149
|
arr.append(value)
|
144
150
|
|
145
151
|
# skip over whitespace after a value but before closing ]
|
@@ -149,9 +155,10 @@ class JSONParser:
|
|
149
155
|
char = self.get_char_at()
|
150
156
|
|
151
157
|
# Especially at the end of an LLM generated json you might miss the last "]"
|
152
|
-
|
158
|
+
char = self.get_char_at()
|
159
|
+
if char and char != "]":
|
153
160
|
# Sometimes when you fix a missing "]" you'll have a trailing "," there that makes the JSON invalid
|
154
|
-
if
|
161
|
+
if char == ",":
|
155
162
|
# Remove trailing "," before adding the "]"
|
156
163
|
self.remove_char_at()
|
157
164
|
self.insert_char_at("]")
|
@@ -161,16 +168,20 @@ class JSONParser:
|
|
161
168
|
|
162
169
|
def parse_string(self) -> str:
|
163
170
|
# <string> is a string of valid characters enclosed in quotes
|
171
|
+
# i.e. { name: "John" }
|
164
172
|
# Somehow all weird cases in an invalid JSON happen to be resolved in this function, so be careful here
|
173
|
+
|
165
174
|
# Flag to manage corner cases related to missing starting quote
|
166
175
|
fixed_quotes = False
|
167
|
-
|
168
|
-
|
176
|
+
|
177
|
+
char = self.get_char_at()
|
178
|
+
if char != '"':
|
169
179
|
self.insert_char_at('"')
|
170
180
|
fixed_quotes = True
|
171
181
|
else:
|
172
182
|
self.index += 1
|
173
|
-
|
183
|
+
|
184
|
+
# Start position of the string (to use later in the return value)
|
174
185
|
start = self.index
|
175
186
|
|
176
187
|
# Here things get a bit hairy because a string missing the final quote can also be a key or a value in an object
|
@@ -180,38 +191,26 @@ class JSONParser:
|
|
180
191
|
# * It iterated over the entire sequence
|
181
192
|
# * If we are fixing missing quotes in an object, when it finds the special terminators
|
182
193
|
char = self.get_char_at()
|
183
|
-
while
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
not fixed_quotes
|
190
|
-
or self.context != "object_value"
|
191
|
-
or (char != "," and char != "}")
|
192
|
-
)
|
193
|
-
):
|
194
|
+
while char and char != '"':
|
195
|
+
if fixed_quotes:
|
196
|
+
if self.context == "object_key" and (char == ":" or char.isspace()):
|
197
|
+
break
|
198
|
+
elif self.context == "object_value" and (char == "," or char == "}"):
|
199
|
+
break
|
194
200
|
self.index += 1
|
195
201
|
char = self.get_char_at()
|
196
202
|
|
197
|
-
|
198
|
-
if (
|
199
|
-
fixed_quotes
|
200
|
-
and self.context == "object_key"
|
201
|
-
and (self.get_char_at() or "").isspace()
|
202
|
-
):
|
203
|
-
# skip whitespaces
|
203
|
+
if char and fixed_quotes and self.context == "object_key" and char.isspace():
|
204
204
|
self.skip_whitespaces_at()
|
205
|
-
|
206
|
-
|
207
|
-
if (self.get_char_at() or "") not in [":", ","]:
|
205
|
+
if self.get_char_at() not in [":", ","]:
|
208
206
|
return ""
|
209
207
|
|
210
208
|
end = self.index
|
211
|
-
|
212
|
-
self.insert_char_at('"')
|
209
|
+
|
213
210
|
# A fallout of the previous special case in the while loop, we need to update the index only if we had a closing quote
|
214
|
-
if
|
211
|
+
if char != '"':
|
212
|
+
self.insert_char_at('"')
|
213
|
+
else:
|
215
214
|
self.index += 1
|
216
215
|
|
217
216
|
return self.json_str[start:end]
|
@@ -219,8 +218,9 @@ class JSONParser:
|
|
219
218
|
def parse_number(self) -> Union[float, int]:
|
220
219
|
# <number> is a valid real number expressed in one of a number of given formats
|
221
220
|
number_str = ""
|
221
|
+
number_chars = set("0123456789-.eE")
|
222
222
|
char = self.get_char_at()
|
223
|
-
while char and
|
223
|
+
while char and char in number_chars:
|
224
224
|
number_str += char
|
225
225
|
self.index += 1
|
226
226
|
char = self.get_char_at()
|
@@ -235,18 +235,14 @@ class JSONParser:
|
|
235
235
|
|
236
236
|
def parse_boolean_or_null(self) -> Union[bool, None]:
|
237
237
|
# <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
return None
|
247
|
-
else:
|
248
|
-
# This is a string then
|
249
|
-
return self.parse_string()
|
238
|
+
boolean_map = {"true": (True, 4), "false": (False, 5), "null": (None, 4)}
|
239
|
+
for key, (value, length) in boolean_map.items():
|
240
|
+
if self.json_str.startswith(key, self.index):
|
241
|
+
self.index += length
|
242
|
+
return value
|
243
|
+
|
244
|
+
# This is a string then
|
245
|
+
return self.parse_string()
|
250
246
|
|
251
247
|
def insert_char_at(self, char: str) -> None:
|
252
248
|
self.json_str = self.json_str[: self.index] + char + self.json_str[self.index :]
|
@@ -0,0 +1,7 @@
|
|
1
|
+
json_repair/__init__.py,sha256=p9mZnte8Bg18NcxqgJ7vopH2gQv_XbZ0dRnk686QuRE,92
|
2
|
+
json_repair/json_repair.py,sha256=ezQraeyIucpwRgnh12S5m7Mn2eR5i0rKdwAkLrkAhEA,12136
|
3
|
+
json_repair-0.4.5.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
+
json_repair-0.4.5.dist-info/METADATA,sha256=C6GUgX3IMBJfr_kecH3ZD6jkjdgfgdI0S0vFnivHaK4,6011
|
5
|
+
json_repair-0.4.5.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
6
|
+
json_repair-0.4.5.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
+
json_repair-0.4.5.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
json_repair/__init__.py,sha256=p9mZnte8Bg18NcxqgJ7vopH2gQv_XbZ0dRnk686QuRE,92
|
2
|
-
json_repair/json_repair.py,sha256=duu-4stxqeJ1ZrVY02wU8I0y7W4q3lh0ocTT4Cov69U,12512
|
3
|
-
json_repair-0.4.3.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
-
json_repair-0.4.3.dist-info/METADATA,sha256=BtpWejgqXSxlO-vnnlzdAf9QWjztTexudWnKvUUiO9g,6011
|
5
|
-
json_repair-0.4.3.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
|
6
|
-
json_repair-0.4.3.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
-
json_repair-0.4.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|