json-repair 0.15.5__tar.gz → 0.16.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {json_repair-0.15.5/src/json_repair.egg-info → json_repair-0.16.0}/PKG-INFO +1 -1
- {json_repair-0.15.5 → json_repair-0.16.0}/pyproject.toml +1 -1
- {json_repair-0.15.5 → json_repair-0.16.0}/src/json_repair/json_repair.py +23 -14
- {json_repair-0.15.5 → json_repair-0.16.0/src/json_repair.egg-info}/PKG-INFO +1 -1
- {json_repair-0.15.5 → json_repair-0.16.0}/tests/test_json_repair.py +15 -0
- {json_repair-0.15.5 → json_repair-0.16.0}/LICENSE +0 -0
- {json_repair-0.15.5 → json_repair-0.16.0}/README.md +0 -0
- {json_repair-0.15.5 → json_repair-0.16.0}/setup.cfg +0 -0
- {json_repair-0.15.5 → json_repair-0.16.0}/src/json_repair/__init__.py +0 -0
- {json_repair-0.15.5 → json_repair-0.16.0}/src/json_repair.egg-info/SOURCES.txt +0 -0
- {json_repair-0.15.5 → json_repair-0.16.0}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.15.5 → json_repair-0.16.0}/src/json_repair.egg-info/top_level.txt +0 -0
- {json_repair-0.15.5 → json_repair-0.16.0}/tests/test_performance.py +0 -0
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.
|
6
|
+
version = "0.16.0"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -64,7 +64,7 @@ class JSONParser:
|
|
64
64
|
return self.parse_array()
|
65
65
|
# there can be an edge case in which a key is empty and at the end of an object
|
66
66
|
# like "key": }. We return an empty string here to close the object properly
|
67
|
-
elif char == "}"
|
67
|
+
elif char == "}":
|
68
68
|
self.log(
|
69
69
|
"At the end of an object we found a key with missing value, skipping",
|
70
70
|
"info",
|
@@ -78,13 +78,13 @@ class JSONParser:
|
|
78
78
|
elif char == "“":
|
79
79
|
return self.parse_string(string_quotes=["“", "”"])
|
80
80
|
# <number> starts with [0-9] or minus
|
81
|
-
elif char.isdigit() or char == "-" or char == ".":
|
81
|
+
elif self.get_context() != "" and char.isdigit() or char == "-" or char == ".":
|
82
82
|
return self.parse_number()
|
83
83
|
# <boolean> could be (T)rue or (F)alse or (N)ull
|
84
|
-
elif char.lower() in ["t", "f", "n"]:
|
84
|
+
elif self.get_context() != "" and char.lower() in ["t", "f", "n"]:
|
85
85
|
return self.parse_boolean_or_null()
|
86
86
|
# This might be a <string> that is missing the starting '"'
|
87
|
-
elif char.isalpha():
|
87
|
+
elif self.get_context() != "" and char.isalpha():
|
88
88
|
return self.parse_string()
|
89
89
|
# If everything else fails, we just ignore and move on
|
90
90
|
else:
|
@@ -131,6 +131,8 @@ class JSONParser:
|
|
131
131
|
"info",
|
132
132
|
)
|
133
133
|
break
|
134
|
+
elif key == "":
|
135
|
+
self.index += 1
|
134
136
|
|
135
137
|
# We reached the end here
|
136
138
|
if (self.get_char_at() or "}") == "}":
|
@@ -172,8 +174,10 @@ class JSONParser:
|
|
172
174
|
def parse_array(self) -> List[Any]:
|
173
175
|
# <array> ::= '[' [ <json> *(', ' <json>) ] ']' ; A sequence of JSON values separated by commas
|
174
176
|
arr = []
|
177
|
+
self.set_context("array")
|
175
178
|
# Stop when you either find the closing parentheses or you have iterated over the entire string
|
176
179
|
while (self.get_char_at() or "]") != "]":
|
180
|
+
self.skip_whitespaces_at()
|
177
181
|
value = self.parse_json()
|
178
182
|
|
179
183
|
# It is possible that parse_json() returns nothing valid, so we stop
|
@@ -209,6 +213,7 @@ class JSONParser:
|
|
209
213
|
self.index -= 1
|
210
214
|
|
211
215
|
self.index += 1
|
216
|
+
self.reset_context()
|
212
217
|
return arr
|
213
218
|
|
214
219
|
def parse_string(self, string_quotes=False) -> str:
|
@@ -218,6 +223,7 @@ class JSONParser:
|
|
218
223
|
|
219
224
|
# Flag to manage corner cases related to missing starting quote
|
220
225
|
fixed_quotes = False
|
226
|
+
doubled_quotes = False
|
221
227
|
lstring_delimiter = rstring_delimiter = '"'
|
222
228
|
if isinstance(string_quotes, list):
|
223
229
|
lstring_delimiter = string_quotes[0]
|
@@ -239,6 +245,7 @@ class JSONParser:
|
|
239
245
|
"While parsing a string, we found a valid starting doubled quote, ignoring it",
|
240
246
|
"info",
|
241
247
|
)
|
248
|
+
doubled_quotes = True
|
242
249
|
self.index += 1
|
243
250
|
char = self.get_char_at()
|
244
251
|
if char != lstring_delimiter:
|
@@ -279,13 +286,9 @@ class JSONParser:
|
|
279
286
|
self.remove_char_at(-1)
|
280
287
|
self.index -= 1
|
281
288
|
# ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
|
282
|
-
if
|
283
|
-
char == rstring_delimiter
|
284
|
-
# Next character is not a delimiter
|
285
|
-
and self.get_char_at(1) not in [",", ":", "]", "}"]
|
286
|
-
):
|
289
|
+
if char == rstring_delimiter:
|
287
290
|
# Special case here, in case of double quotes one after another
|
288
|
-
if self.get_char_at(1) == rstring_delimiter:
|
291
|
+
if doubled_quotes and self.get_char_at(1) == rstring_delimiter:
|
289
292
|
self.log(
|
290
293
|
"While parsing a string, we found a doubled quote, ignoring it",
|
291
294
|
"info",
|
@@ -294,13 +297,19 @@ class JSONParser:
|
|
294
297
|
self.remove_char_at()
|
295
298
|
else:
|
296
299
|
# Check if eventually there is a rstring delimiter, otherwise we bail
|
297
|
-
i =
|
300
|
+
i = 1
|
298
301
|
next_c = self.get_char_at(i)
|
299
302
|
while next_c and next_c != rstring_delimiter:
|
303
|
+
# If we are in an object context, let's check for the right delimiters
|
304
|
+
if (
|
305
|
+
("object_key" in self.context and next_c == ":")
|
306
|
+
or ("object_value" in self.context and next_c in ["}", ","])
|
307
|
+
or ("array" in self.context and next_c in ["]", ","])
|
308
|
+
):
|
309
|
+
break
|
300
310
|
i += 1
|
301
311
|
next_c = self.get_char_at(i)
|
302
|
-
|
303
|
-
if next_c:
|
312
|
+
if next_c == rstring_delimiter:
|
304
313
|
self.log(
|
305
314
|
"While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
|
306
315
|
"info",
|
@@ -416,7 +425,7 @@ class JSONParser:
|
|
416
425
|
|
417
426
|
def get_context(self) -> str:
|
418
427
|
try:
|
419
|
-
return self.context[
|
428
|
+
return self.context[-1]
|
420
429
|
except Exception:
|
421
430
|
return ""
|
422
431
|
|
@@ -50,6 +50,8 @@ def test_repair_json():
|
|
50
50
|
assert repair_json("[[1\n\n]") == "[[1]]"
|
51
51
|
assert repair_json("{") == "{}"
|
52
52
|
assert repair_json("}") == '""'
|
53
|
+
assert repair_json("string") == '""'
|
54
|
+
assert repair_json("stringbeforeobject {}") == '{}'
|
53
55
|
assert repair_json('{"') == '{}'
|
54
56
|
assert repair_json('["') == '[]'
|
55
57
|
assert repair_json("'\"'") == '"\\\""'
|
@@ -112,6 +114,7 @@ def test_repair_json():
|
|
112
114
|
assert repair_json('{"key": 1/3}') == '{"key": "1/3"}'
|
113
115
|
assert repair_json('{"key": .25}') == '{"key": 0.25}'
|
114
116
|
assert repair_json("""{ "a": "", "b": [ { "c": 1} ] \n}```""") == '{"a": "", "b": [{"c": 1}]}'
|
117
|
+
assert repair_json("Based on the information extracted, here is the filled JSON output: ```json { 'a': 'b' } ```") == '{"a": "b"}'
|
115
118
|
|
116
119
|
|
117
120
|
def test_repair_json_with_objects():
|
@@ -190,6 +193,18 @@ def test_repair_json_with_objects():
|
|
190
193
|
}
|
191
194
|
''', True) == {"resourceType": "Bundle", "id": "1", "type": "collection", "entry": [{"resource": {"resourceType": "Patient", "id": "1", "name": [{"use": "official", "family": "Corwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."]}, {"use": "maiden", "family": "Goodwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."]}]}}]}
|
192
195
|
assert repair_json('{\n"html": "<h3 id="aaa">Waarom meer dan 200 Technical Experts - "Passie voor techniek"?</h3>"}', True) == {'html': '<h3 id="aaa">Waarom meer dan 200 Technical Experts - "Passie voor techniek"?</h3>'}
|
196
|
+
assert repair_json("""
|
197
|
+
[
|
198
|
+
{
|
199
|
+
"foo": "Foo bar baz",
|
200
|
+
"tag": "#foo-bar-baz"
|
201
|
+
},
|
202
|
+
{
|
203
|
+
"foo": "foo bar "foobar" foo bar baz.",
|
204
|
+
"tag": "#foo-bar-foobar"
|
205
|
+
}
|
206
|
+
]
|
207
|
+
""", True) == [{"foo": "Foo bar baz", "tag": "#foo-bar-baz"},{"foo": "foo bar \"foobar\" foo bar baz.", "tag": "#foo-bar-foobar" }]
|
193
208
|
|
194
209
|
|
195
210
|
def test_repair_json_corner_cases_generate_by_gpt():
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|