json-repair 0.15.5__tar.gz → 0.15.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {json_repair-0.15.5/src/json_repair.egg-info → json_repair-0.15.6}/PKG-INFO +1 -1
- {json_repair-0.15.5 → json_repair-0.15.6}/pyproject.toml +1 -1
- {json_repair-0.15.5 → json_repair-0.15.6}/src/json_repair/json_repair.py +16 -10
- {json_repair-0.15.5 → json_repair-0.15.6/src/json_repair.egg-info}/PKG-INFO +1 -1
- {json_repair-0.15.5 → json_repair-0.15.6}/tests/test_json_repair.py +12 -0
- {json_repair-0.15.5 → json_repair-0.15.6}/LICENSE +0 -0
- {json_repair-0.15.5 → json_repair-0.15.6}/README.md +0 -0
- {json_repair-0.15.5 → json_repair-0.15.6}/setup.cfg +0 -0
- {json_repair-0.15.5 → json_repair-0.15.6}/src/json_repair/__init__.py +0 -0
- {json_repair-0.15.5 → json_repair-0.15.6}/src/json_repair.egg-info/SOURCES.txt +0 -0
- {json_repair-0.15.5 → json_repair-0.15.6}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.15.5 → json_repair-0.15.6}/src/json_repair.egg-info/top_level.txt +0 -0
- {json_repair-0.15.5 → json_repair-0.15.6}/tests/test_performance.py +0 -0
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.15.
|
6
|
+
version = "0.15.6"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -174,6 +174,7 @@ class JSONParser:
|
|
174
174
|
arr = []
|
175
175
|
# Stop when you either find the closing parentheses or you have iterated over the entire string
|
176
176
|
while (self.get_char_at() or "]") != "]":
|
177
|
+
self.skip_whitespaces_at()
|
177
178
|
value = self.parse_json()
|
178
179
|
|
179
180
|
# It is possible that parse_json() returns nothing valid, so we stop
|
@@ -218,6 +219,7 @@ class JSONParser:
|
|
218
219
|
|
219
220
|
# Flag to manage corner cases related to missing starting quote
|
220
221
|
fixed_quotes = False
|
222
|
+
doubled_quotes = False
|
221
223
|
lstring_delimiter = rstring_delimiter = '"'
|
222
224
|
if isinstance(string_quotes, list):
|
223
225
|
lstring_delimiter = string_quotes[0]
|
@@ -239,6 +241,7 @@ class JSONParser:
|
|
239
241
|
"While parsing a string, we found a valid starting doubled quote, ignoring it",
|
240
242
|
"info",
|
241
243
|
)
|
244
|
+
doubled_quotes = True
|
242
245
|
self.index += 1
|
243
246
|
char = self.get_char_at()
|
244
247
|
if char != lstring_delimiter:
|
@@ -279,13 +282,9 @@ class JSONParser:
|
|
279
282
|
self.remove_char_at(-1)
|
280
283
|
self.index -= 1
|
281
284
|
# ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
|
282
|
-
if
|
283
|
-
char == rstring_delimiter
|
284
|
-
# Next character is not a delimiter
|
285
|
-
and self.get_char_at(1) not in [",", ":", "]", "}"]
|
286
|
-
):
|
285
|
+
if char == rstring_delimiter:
|
287
286
|
# Special case here, in case of double quotes one after another
|
288
|
-
if self.get_char_at(1) == rstring_delimiter:
|
287
|
+
if doubled_quotes and self.get_char_at(1) == rstring_delimiter:
|
289
288
|
self.log(
|
290
289
|
"While parsing a string, we found a doubled quote, ignoring it",
|
291
290
|
"info",
|
@@ -294,13 +293,20 @@ class JSONParser:
|
|
294
293
|
self.remove_char_at()
|
295
294
|
else:
|
296
295
|
# Check if eventually there is a rstring delimiter, otherwise we bail
|
297
|
-
i =
|
296
|
+
i = 1
|
297
|
+
context = self.get_context()
|
298
298
|
next_c = self.get_char_at(i)
|
299
299
|
while next_c and next_c != rstring_delimiter:
|
300
|
+
# If we are in an object context, let's check for the right delimiters
|
301
|
+
if (
|
302
|
+
(context == "object_key" and next_c == ":")
|
303
|
+
or (context == "object_value" and next_c in ["}", ","])
|
304
|
+
or (context == "" and next_c in ["]", ","])
|
305
|
+
):
|
306
|
+
break
|
300
307
|
i += 1
|
301
308
|
next_c = self.get_char_at(i)
|
302
|
-
|
303
|
-
if next_c:
|
309
|
+
if next_c == rstring_delimiter:
|
304
310
|
self.log(
|
305
311
|
"While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
|
306
312
|
"info",
|
@@ -416,7 +422,7 @@ class JSONParser:
|
|
416
422
|
|
417
423
|
def get_context(self) -> str:
|
418
424
|
try:
|
419
|
-
return self.context[
|
425
|
+
return self.context[-1]
|
420
426
|
except Exception:
|
421
427
|
return ""
|
422
428
|
|
@@ -190,6 +190,18 @@ def test_repair_json_with_objects():
|
|
190
190
|
}
|
191
191
|
''', True) == {"resourceType": "Bundle", "id": "1", "type": "collection", "entry": [{"resource": {"resourceType": "Patient", "id": "1", "name": [{"use": "official", "family": "Corwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."]}, {"use": "maiden", "family": "Goodwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."]}]}}]}
|
192
192
|
assert repair_json('{\n"html": "<h3 id="aaa">Waarom meer dan 200 Technical Experts - "Passie voor techniek"?</h3>"}', True) == {'html': '<h3 id="aaa">Waarom meer dan 200 Technical Experts - "Passie voor techniek"?</h3>'}
|
193
|
+
assert repair_json("""
|
194
|
+
[
|
195
|
+
{
|
196
|
+
"foo": "Foo bar baz",
|
197
|
+
"tag": "#foo-bar-baz"
|
198
|
+
},
|
199
|
+
{
|
200
|
+
"foo": "foo bar "foobar" foo bar baz.",
|
201
|
+
"tag": "#foo-bar-foobar"
|
202
|
+
}
|
203
|
+
]
|
204
|
+
""", True) == [{"foo": "Foo bar baz", "tag": "#foo-bar-baz"},{"foo": "foo bar \"foobar\" foo bar baz.", "tag": "#foo-bar-foobar" }]
|
193
205
|
|
194
206
|
|
195
207
|
def test_repair_json_corner_cases_generate_by_gpt():
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|