json-repair 0.15.4__tar.gz → 0.15.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {json_repair-0.15.4/src/json_repair.egg-info → json_repair-0.15.6}/PKG-INFO +1 -1
- {json_repair-0.15.4 → json_repair-0.15.6}/pyproject.toml +1 -1
- {json_repair-0.15.4 → json_repair-0.15.6}/src/json_repair/json_repair.py +26 -18
- {json_repair-0.15.4 → json_repair-0.15.6/src/json_repair.egg-info}/PKG-INFO +1 -1
- {json_repair-0.15.4 → json_repair-0.15.6}/tests/test_json_repair.py +12 -1
- {json_repair-0.15.4 → json_repair-0.15.6}/LICENSE +0 -0
- {json_repair-0.15.4 → json_repair-0.15.6}/README.md +0 -0
- {json_repair-0.15.4 → json_repair-0.15.6}/setup.cfg +0 -0
- {json_repair-0.15.4 → json_repair-0.15.6}/src/json_repair/__init__.py +0 -0
- {json_repair-0.15.4 → json_repair-0.15.6}/src/json_repair.egg-info/SOURCES.txt +0 -0
- {json_repair-0.15.4 → json_repair-0.15.6}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.15.4 → json_repair-0.15.6}/src/json_repair.egg-info/top_level.txt +0 -0
- {json_repair-0.15.4 → json_repair-0.15.6}/tests/test_performance.py +0 -0
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.15.
|
6
|
+
version = "0.15.6"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -174,6 +174,7 @@ class JSONParser:
|
|
174
174
|
arr = []
|
175
175
|
# Stop when you either find the closing parentheses or you have iterated over the entire string
|
176
176
|
while (self.get_char_at() or "]") != "]":
|
177
|
+
self.skip_whitespaces_at()
|
177
178
|
value = self.parse_json()
|
178
179
|
|
179
180
|
# It is possible that parse_json() returns nothing valid, so we stop
|
@@ -218,6 +219,7 @@ class JSONParser:
|
|
218
219
|
|
219
220
|
# Flag to manage corner cases related to missing starting quote
|
220
221
|
fixed_quotes = False
|
222
|
+
doubled_quotes = False
|
221
223
|
lstring_delimiter = rstring_delimiter = '"'
|
222
224
|
if isinstance(string_quotes, list):
|
223
225
|
lstring_delimiter = string_quotes[0]
|
@@ -226,17 +228,20 @@ class JSONParser:
|
|
226
228
|
lstring_delimiter = rstring_delimiter = string_quotes
|
227
229
|
# There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
|
228
230
|
if self.get_char_at(1) == lstring_delimiter:
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
231
|
+
# This is a valid exception only if it's closed by a double delimiter again
|
232
|
+
i = 2
|
233
|
+
next_c = self.get_char_at(i)
|
234
|
+
while next_c and next_c != rstring_delimiter:
|
235
|
+
i += 1
|
236
|
+
next_c = self.get_char_at(i)
|
237
|
+
# Now check that the next character is also a delimiter to ensure that we have "".....""
|
238
|
+
# In that case we ignore this rstring delimiter
|
239
|
+
if next_c and (self.get_char_at(i + 1) or "") == rstring_delimiter:
|
236
240
|
self.log(
|
237
|
-
"While parsing a string, we found a doubled quote, ignoring it",
|
241
|
+
"While parsing a string, we found a valid starting doubled quote, ignoring it",
|
238
242
|
"info",
|
239
243
|
)
|
244
|
+
doubled_quotes = True
|
240
245
|
self.index += 1
|
241
246
|
char = self.get_char_at()
|
242
247
|
if char != lstring_delimiter:
|
@@ -277,13 +282,9 @@ class JSONParser:
|
|
277
282
|
self.remove_char_at(-1)
|
278
283
|
self.index -= 1
|
279
284
|
# ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
|
280
|
-
if
|
281
|
-
char == rstring_delimiter
|
282
|
-
# Next character is not a delimiter
|
283
|
-
and self.get_char_at(1) not in [",", ":", "]", "}"]
|
284
|
-
):
|
285
|
+
if char == rstring_delimiter:
|
285
286
|
# Special case here, in case of double quotes one after another
|
286
|
-
if self.get_char_at(1) == rstring_delimiter:
|
287
|
+
if doubled_quotes and self.get_char_at(1) == rstring_delimiter:
|
287
288
|
self.log(
|
288
289
|
"While parsing a string, we found a doubled quote, ignoring it",
|
289
290
|
"info",
|
@@ -292,13 +293,20 @@ class JSONParser:
|
|
292
293
|
self.remove_char_at()
|
293
294
|
else:
|
294
295
|
# Check if eventually there is a rstring delimiter, otherwise we bail
|
295
|
-
i =
|
296
|
+
i = 1
|
297
|
+
context = self.get_context()
|
296
298
|
next_c = self.get_char_at(i)
|
297
299
|
while next_c and next_c != rstring_delimiter:
|
300
|
+
# If we are in an object context, let's check for the right delimiters
|
301
|
+
if (
|
302
|
+
(context == "object_key" and next_c == ":")
|
303
|
+
or (context == "object_value" and next_c in ["}", ","])
|
304
|
+
or (context == "" and next_c in ["]", ","])
|
305
|
+
):
|
306
|
+
break
|
298
307
|
i += 1
|
299
308
|
next_c = self.get_char_at(i)
|
300
|
-
|
301
|
-
if next_c:
|
309
|
+
if next_c == rstring_delimiter:
|
302
310
|
self.log(
|
303
311
|
"While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
|
304
312
|
"info",
|
@@ -414,7 +422,7 @@ class JSONParser:
|
|
414
422
|
|
415
423
|
def get_context(self) -> str:
|
416
424
|
try:
|
417
|
-
return self.context[
|
425
|
+
return self.context[-1]
|
418
426
|
except Exception:
|
419
427
|
return ""
|
420
428
|
|
@@ -107,7 +107,6 @@ def test_repair_json():
|
|
107
107
|
assert repair_json('{"key\_1\n": "value"}') == '{"key_1": "value"}'
|
108
108
|
assert repair_json('{"key\t\_": "value"}') == '{"key\\t_": "value"}'
|
109
109
|
assert repair_json('{""answer"":[{""traits"":''Female aged 60+'',""answer1"":""5""}]}') == '{"answer": [{"traits": "Female aged 60+", "answer1": "5"}]}'
|
110
|
-
assert repair_json('{""answer":[{""traits":""Female aged 60+",""answer1":""5"}]}') == '{"answer": [{"traits": "Female aged 60+", "answer1": "5"}]}'
|
111
110
|
assert repair_json('{"key":""') == '{"key": ""}'
|
112
111
|
assert repair_json('{ "words": abcdef", "numbers": 12345", "words2": ghijkl" }') == '{"words": "abcdef", "numbers": 12345, "words2": "ghijkl"}'
|
113
112
|
assert repair_json('{"key": 1/3}') == '{"key": "1/3"}'
|
@@ -191,6 +190,18 @@ def test_repair_json_with_objects():
|
|
191
190
|
}
|
192
191
|
''', True) == {"resourceType": "Bundle", "id": "1", "type": "collection", "entry": [{"resource": {"resourceType": "Patient", "id": "1", "name": [{"use": "official", "family": "Corwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."]}, {"use": "maiden", "family": "Goodwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."]}]}}]}
|
193
192
|
assert repair_json('{\n"html": "<h3 id="aaa">Waarom meer dan 200 Technical Experts - "Passie voor techniek"?</h3>"}', True) == {'html': '<h3 id="aaa">Waarom meer dan 200 Technical Experts - "Passie voor techniek"?</h3>'}
|
193
|
+
assert repair_json("""
|
194
|
+
[
|
195
|
+
{
|
196
|
+
"foo": "Foo bar baz",
|
197
|
+
"tag": "#foo-bar-baz"
|
198
|
+
},
|
199
|
+
{
|
200
|
+
"foo": "foo bar "foobar" foo bar baz.",
|
201
|
+
"tag": "#foo-bar-foobar"
|
202
|
+
}
|
203
|
+
]
|
204
|
+
""", True) == [{"foo": "Foo bar baz", "tag": "#foo-bar-baz"},{"foo": "foo bar \"foobar\" foo bar baz.", "tag": "#foo-bar-foobar" }]
|
194
205
|
|
195
206
|
|
196
207
|
def test_repair_json_corner_cases_generate_by_gpt():
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|