json-repair 0.19.2__tar.gz → 0.20.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {json_repair-0.19.2/src/json_repair.egg-info → json_repair-0.20.1}/PKG-INFO +1 -1
- {json_repair-0.19.2 → json_repair-0.20.1}/pyproject.toml +1 -1
- {json_repair-0.19.2 → json_repair-0.20.1}/src/json_repair/json_repair.py +37 -15
- {json_repair-0.19.2 → json_repair-0.20.1/src/json_repair.egg-info}/PKG-INFO +1 -1
- {json_repair-0.19.2 → json_repair-0.20.1}/tests/test_json_repair.py +4 -2
- {json_repair-0.19.2 → json_repair-0.20.1}/LICENSE +0 -0
- {json_repair-0.19.2 → json_repair-0.20.1}/README.md +0 -0
- {json_repair-0.19.2 → json_repair-0.20.1}/setup.cfg +0 -0
- {json_repair-0.19.2 → json_repair-0.20.1}/src/json_repair/__init__.py +0 -0
- {json_repair-0.19.2 → json_repair-0.20.1}/src/json_repair.egg-info/SOURCES.txt +0 -0
- {json_repair-0.19.2 → json_repair-0.20.1}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.19.2 → json_repair-0.20.1}/src/json_repair.egg-info/top_level.txt +0 -0
- {json_repair-0.19.2 → json_repair-0.20.1}/tests/test_performance.py +0 -0
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.
|
6
|
+
version = "0.20.1"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -131,10 +131,14 @@ class JSONParser:
|
|
131
131
|
# Sometimes the string search might not move the index at all, that might lead us to an infinite loop
|
132
132
|
self.index += 1
|
133
133
|
|
134
|
+
self.skip_whitespaces_at()
|
135
|
+
|
134
136
|
# We reached the end here
|
135
137
|
if (self.get_char_at() or "}") == "}":
|
136
138
|
continue
|
137
139
|
|
140
|
+
self.skip_whitespaces_at()
|
141
|
+
|
138
142
|
# An extreme case of missing ":" after a key
|
139
143
|
if (self.get_char_at() or "") != ":":
|
140
144
|
self.log(
|
@@ -178,7 +182,7 @@ class JSONParser:
|
|
178
182
|
value = self.parse_json()
|
179
183
|
|
180
184
|
# It is possible that parse_json() returns nothing valid, so we stop
|
181
|
-
if
|
185
|
+
if value == "":
|
182
186
|
break
|
183
187
|
|
184
188
|
if value == "..." and self.get_char_at(-1) == ".":
|
@@ -335,32 +339,50 @@ class JSONParser:
|
|
335
339
|
# Check if eventually there is a rstring delimiter, otherwise we bail
|
336
340
|
i = 1
|
337
341
|
next_c = self.get_char_at(i)
|
338
|
-
|
342
|
+
check_comma_in_object_value = True
|
343
|
+
while next_c and next_c not in [
|
344
|
+
rstring_delimiter,
|
345
|
+
lstring_delimiter,
|
346
|
+
]:
|
347
|
+
# This is a bit of a weird workaround, essentially in object_value context we don't always break on commas
|
348
|
+
# This is because the routine after will make sure to correct any bad guess and this solves a corner case
|
349
|
+
if next_c.isalpha():
|
350
|
+
check_comma_in_object_value = False
|
339
351
|
# If we are in an object context, let's check for the right delimiters
|
340
352
|
if (
|
341
|
-
next_c
|
342
|
-
or ("
|
343
|
-
or ("object_value" in self.context and next_c in ["}", ","])
|
353
|
+
("object_key" in self.context and next_c in [":", "}"])
|
354
|
+
or ("object_value" in self.context and next_c == "}")
|
344
355
|
or ("array" in self.context and next_c in ["]", ","])
|
356
|
+
or (
|
357
|
+
check_comma_in_object_value
|
358
|
+
and self.get_context() == "object_value"
|
359
|
+
and next_c == ","
|
360
|
+
)
|
345
361
|
):
|
346
362
|
break
|
347
363
|
i += 1
|
348
364
|
next_c = self.get_char_at(i)
|
349
365
|
if next_c == rstring_delimiter:
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
while next_c and next_c != rstring_delimiter:
|
366
|
+
if self.get_context() == "object_value":
|
367
|
+
# But this might not be it! This could be just a missing comma
|
368
|
+
# We found a delimiter and we need to check if this is a key
|
369
|
+
# so find a rstring_delimiter and a colon after
|
355
370
|
i += 1
|
356
371
|
next_c = self.get_char_at(i)
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
if next_c in [lstring_delimiter, rstring_delimiter, ","]:
|
361
|
-
break
|
372
|
+
while next_c and next_c != rstring_delimiter:
|
373
|
+
i += 1
|
374
|
+
next_c = self.get_char_at(i)
|
362
375
|
i += 1
|
363
376
|
next_c = self.get_char_at(i)
|
377
|
+
while next_c and next_c != ":":
|
378
|
+
if next_c in [
|
379
|
+
lstring_delimiter,
|
380
|
+
rstring_delimiter,
|
381
|
+
",",
|
382
|
+
]:
|
383
|
+
break
|
384
|
+
i += 1
|
385
|
+
next_c = self.get_char_at(i)
|
364
386
|
# Only if we fail to find a ':' then we know this is misplaced quote
|
365
387
|
if next_c != ":":
|
366
388
|
self.log(
|
@@ -32,7 +32,7 @@ def test_valid_json():
|
|
32
32
|
assert repair_json('{"key": "value\\nvalue"}') == '{"key": "value\\nvalue"}'
|
33
33
|
|
34
34
|
def test_brackets_edge_cases():
|
35
|
-
assert repair_json("[{]") == "[]"
|
35
|
+
assert repair_json("[{]") == "[{}]"
|
36
36
|
assert repair_json(" { } ") == "{}"
|
37
37
|
assert repair_json("[") == "[]"
|
38
38
|
assert repair_json("]") == '""'
|
@@ -83,6 +83,7 @@ def test_array_edge_cases():
|
|
83
83
|
assert repair_json("[1, 2, 3, ...]") == "[1, 2, 3]"
|
84
84
|
assert repair_json("[1, 2, ... , 3]") == "[1, 2, 3]"
|
85
85
|
assert repair_json("[1, 2, '...', 3]") == '[1, 2, "...", 3]'
|
86
|
+
assert repair_json("[true, false, null, ...]") == '[true, false, null]'
|
86
87
|
assert (
|
87
88
|
repair_json('{"employees":["John", "Anna",')
|
88
89
|
== '{"employees": ["John", "Anna"]}'
|
@@ -119,8 +120,9 @@ def test_object_edge_cases():
|
|
119
120
|
assert repair_json('{""answer"":[{""traits"":''Female aged 60+'',""answer1"":""5""}]}') == '{"answer": [{"traits": "Female aged 60+", "answer1": "5"}]}'
|
120
121
|
assert repair_json('{ "words": abcdef", "numbers": 12345", "words2": ghijkl" }') == '{"words": "abcdef", "numbers": 12345, "words2": "ghijkl"}'
|
121
122
|
assert repair_json('''{"number": 1,"reason": "According...""ans": "YES"}''') == '{"number": 1, "reason": "According...", "ans": "YES"}'
|
122
|
-
assert repair_json('''{ "a": "{ b": {} }" }''') == '{"a": "{ b"}'
|
123
|
+
assert repair_json('''{ "a" : "{ b": {} }" }''') == '{"a": "{ b"}'
|
123
124
|
assert repair_json("""{"b": "xxxxx" true}""") == '{"b": "xxxxx"}'
|
125
|
+
assert repair_json('{"key": "Lorem "ipsum" s,"}') == '{"key": "Lorem \\"ipsum\\" s,"}'
|
124
126
|
|
125
127
|
def test_number_edge_cases():
|
126
128
|
assert repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|