json-repair 0.13.0__py3-none-any.whl → 0.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_repair.py +42 -19
- {json_repair-0.13.0.dist-info → json_repair-0.14.0.dist-info}/METADATA +5 -2
- json_repair-0.14.0.dist-info/RECORD +7 -0
- json_repair-0.13.0.dist-info/RECORD +0 -7
- {json_repair-0.13.0.dist-info → json_repair-0.14.0.dist-info}/LICENSE +0 -0
- {json_repair-0.13.0.dist-info → json_repair-0.14.0.dist-info}/WHEEL +0 -0
- {json_repair-0.13.0.dist-info → json_repair-0.14.0.dist-info}/top_level.txt +0 -0
json_repair/json_repair.py
CHANGED
@@ -34,6 +34,7 @@ class JSONParser:
|
|
34
34
|
self.index = 0
|
35
35
|
# This is used in the object member parsing to manage the special cases of missing quotes in key or value
|
36
36
|
self.context = ""
|
37
|
+
self.context_stack = []
|
37
38
|
|
38
39
|
def parse(self) -> Union[Dict[str, Any], List[Any], str, float, int, bool, None]:
|
39
40
|
return self.parse_json()
|
@@ -97,7 +98,7 @@ class JSONParser:
|
|
97
98
|
|
98
99
|
# We are now searching for they string key
|
99
100
|
# Context is used in the string parser to manage the lack of quotes
|
100
|
-
self.
|
101
|
+
self.update_context("object_key")
|
101
102
|
|
102
103
|
self.skip_whitespaces_at()
|
103
104
|
|
@@ -119,12 +120,13 @@ class JSONParser:
|
|
119
120
|
if (self.get_char_at() or "") != ":":
|
120
121
|
self.insert_char_at(":")
|
121
122
|
self.index += 1
|
122
|
-
self.
|
123
|
+
self.update_context("")
|
124
|
+
self.update_context("object_value")
|
123
125
|
# The value can be any valid json
|
124
126
|
value = self.parse_json()
|
125
127
|
|
126
128
|
# Reset context since our job is done
|
127
|
-
self.
|
129
|
+
self.update_context("")
|
128
130
|
obj[key] = value
|
129
131
|
|
130
132
|
if (self.get_char_at() or "") in [",", "'", '"']:
|
@@ -157,6 +159,9 @@ class JSONParser:
|
|
157
159
|
while char and (char.isspace() or char == ","):
|
158
160
|
self.index += 1
|
159
161
|
char = self.get_char_at()
|
162
|
+
# If this is the right value of an object and we are closing the object, it means the array is over
|
163
|
+
if self.context == "object_value" and char == "}":
|
164
|
+
break
|
160
165
|
|
161
166
|
# Especially at the end of an LLM generated json you might miss the last "]"
|
162
167
|
char = self.get_char_at()
|
@@ -166,6 +171,7 @@ class JSONParser:
|
|
166
171
|
# Remove trailing "," before adding the "]"
|
167
172
|
self.remove_char_at()
|
168
173
|
self.insert_char_at("]")
|
174
|
+
self.index -= 1
|
169
175
|
|
170
176
|
self.index += 1
|
171
177
|
return arr
|
@@ -177,15 +183,14 @@ class JSONParser:
|
|
177
183
|
|
178
184
|
# Flag to manage corner cases related to missing starting quote
|
179
185
|
fixed_quotes = False
|
180
|
-
double_delimiter = False
|
181
186
|
lstring_delimiter = rstring_delimiter = '"'
|
182
187
|
if isinstance(string_quotes, list):
|
183
188
|
lstring_delimiter = string_quotes[0]
|
184
189
|
rstring_delimiter = string_quotes[1]
|
185
190
|
elif isinstance(string_quotes, str):
|
186
191
|
lstring_delimiter = rstring_delimiter = string_quotes
|
192
|
+
# There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
|
187
193
|
if self.get_char_at(1) == lstring_delimiter:
|
188
|
-
double_delimiter = True
|
189
194
|
self.index += 1
|
190
195
|
char = self.get_char_at()
|
191
196
|
if char != lstring_delimiter:
|
@@ -204,7 +209,6 @@ class JSONParser:
|
|
204
209
|
# * It iterated over the entire sequence
|
205
210
|
# * If we are fixing missing quotes in an object, when it finds the special terminators
|
206
211
|
char = self.get_char_at()
|
207
|
-
fix_broken_markdown_link = False
|
208
212
|
while char and char != rstring_delimiter:
|
209
213
|
if fixed_quotes:
|
210
214
|
if self.context == "object_key" and (char == ":" or char.isspace()):
|
@@ -221,19 +225,27 @@ class JSONParser:
|
|
221
225
|
else:
|
222
226
|
self.remove_char_at(-1)
|
223
227
|
self.index -= 1
|
224
|
-
# ChatGPT sometimes forget to quote
|
228
|
+
# ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
|
225
229
|
if (
|
226
230
|
char == rstring_delimiter
|
227
|
-
# Next character is not a
|
228
|
-
and self.get_char_at(1)
|
229
|
-
and (
|
230
|
-
fix_broken_markdown_link
|
231
|
-
or (self.get_char_at(-2) == "]" and self.get_char_at(-1)) == "("
|
232
|
-
)
|
231
|
+
# Next character is not a delimiter
|
232
|
+
and self.get_char_at(1) not in [",", ":", "]", "}"]
|
233
233
|
):
|
234
|
-
|
235
|
-
self.
|
236
|
-
|
234
|
+
# Special case here, in case of double quotes one after another
|
235
|
+
if self.get_char_at(1) == rstring_delimiter:
|
236
|
+
# self destruct this character
|
237
|
+
self.remove_char_at()
|
238
|
+
else:
|
239
|
+
# Check if eventually there is a rstring delimiter, otherwise we bail
|
240
|
+
i = 2
|
241
|
+
next_c = self.get_char_at(i)
|
242
|
+
while next_c and next_c != rstring_delimiter:
|
243
|
+
i += 1
|
244
|
+
next_c = self.get_char_at(i)
|
245
|
+
# In that case we ignore this rstring delimiter
|
246
|
+
if next_c:
|
247
|
+
self.index += 1
|
248
|
+
char = self.get_char_at()
|
237
249
|
|
238
250
|
if char and fixed_quotes and self.context == "object_key" and char.isspace():
|
239
251
|
self.skip_whitespaces_at()
|
@@ -247,10 +259,8 @@ class JSONParser:
|
|
247
259
|
self.insert_char_at(rstring_delimiter)
|
248
260
|
else:
|
249
261
|
self.index += 1
|
250
|
-
if double_delimiter and self.get_char_at() == rstring_delimiter:
|
251
|
-
self.index += 1
|
252
262
|
|
253
|
-
return self.json_str[start:end]
|
263
|
+
return self.json_str[start:end].rstrip()
|
254
264
|
|
255
265
|
def parse_number(self) -> Union[float, int, str]:
|
256
266
|
# <number> is a valid real number expressed in one of a number of given formats
|
@@ -319,6 +329,19 @@ class JSONParser:
|
|
319
329
|
except IndexError:
|
320
330
|
return
|
321
331
|
|
332
|
+
def update_context(self, value: str) -> None:
|
333
|
+
# If a value is provided update the context variable and save in stack
|
334
|
+
if value:
|
335
|
+
if self.context:
|
336
|
+
self.context_stack.append(self.context)
|
337
|
+
self.context = value
|
338
|
+
# Otherwise pop and update the context, or empty if the stack is empty
|
339
|
+
else:
|
340
|
+
try:
|
341
|
+
self.context = self.context_stack.pop()
|
342
|
+
except Exception:
|
343
|
+
self.context = ""
|
344
|
+
|
322
345
|
|
323
346
|
def repair_json(
|
324
347
|
json_str: str, return_objects: bool = False, skip_json_loads: bool = False
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.14.0
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -150,7 +150,10 @@ You will need owner access to this repository
|
|
150
150
|
- Run `python -m build`
|
151
151
|
- Create a new release in Github, making sure to tag all the issues solved and contributors. Create the new tag, same as the one in the build configuration
|
152
152
|
- Once the release is created, a new Github Actions workflow will start to publish on Pypi, make sure it didn't fail
|
153
|
-
|
153
|
+
---
|
154
|
+
# Repair JSON in other programming languages
|
155
|
+
- Typescript: https://github.com/josdejong/jsonrepair
|
156
|
+
- Go: https://github.com/RealAlexandreAI/json-repair
|
154
157
|
---
|
155
158
|
# Bonus Content
|
156
159
|
If you need some good Custom Instructions (System Message) to improve your chatbot responses try https://gist.github.com/mangiucugna/7ec015c4266df11be8aa510be0110fe4
|
@@ -0,0 +1,7 @@
|
|
1
|
+
json_repair/__init__.py,sha256=AlNie5y6BZBioGi5fzTAUvum_y0U5aL5aNsuQ_68LQc,175
|
2
|
+
json_repair/json_repair.py,sha256=8B5HfWoLlUUtRYq1cnbajOxWiMSD9nxNW2cRFPjFfVE,15817
|
3
|
+
json_repair-0.14.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
+
json_repair-0.14.0.dist-info/METADATA,sha256=82KldmuVFLXbNy6SXar9MsulkcBUM1K8RX13pNysHQU,7355
|
5
|
+
json_repair-0.14.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
6
|
+
json_repair-0.14.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
+
json_repair-0.14.0.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
json_repair/__init__.py,sha256=AlNie5y6BZBioGi5fzTAUvum_y0U5aL5aNsuQ_68LQc,175
|
2
|
-
json_repair/json_repair.py,sha256=BHDcvo8C-vc3QO4dhgD5cP7IZVkGWGDbv8hNmQH1B-I,14653
|
3
|
-
json_repair-0.13.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
-
json_repair-0.13.0.dist-info/METADATA,sha256=E6v_JcBwXe4oMJhtxgF-9-24kG2j5LiH-ivqufNHFpA,7200
|
5
|
-
json_repair-0.13.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
6
|
-
json_repair-0.13.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
-
json_repair-0.13.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|