json-repair 0.30.3__tar.gz → 0.32.0__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {json_repair-0.30.3/src/json_repair.egg-info → json_repair-0.32.0}/PKG-INFO +1 -1
- {json_repair-0.30.3 → json_repair-0.32.0}/pyproject.toml +1 -1
- {json_repair-0.30.3 → json_repair-0.32.0}/src/json_repair/json_parser.py +24 -1
- {json_repair-0.30.3 → json_repair-0.32.0/src/json_repair.egg-info}/PKG-INFO +1 -1
- {json_repair-0.30.3 → json_repair-0.32.0}/tests/test_json_repair.py +3 -1
- {json_repair-0.30.3 → json_repair-0.32.0}/tests/test_performance.py +5 -5
- {json_repair-0.30.3 → json_repair-0.32.0}/LICENSE +0 -0
- {json_repair-0.30.3 → json_repair-0.32.0}/README.md +0 -0
- {json_repair-0.30.3 → json_repair-0.32.0}/setup.cfg +0 -0
- {json_repair-0.30.3 → json_repair-0.32.0}/src/json_repair/__init__.py +0 -0
- {json_repair-0.30.3 → json_repair-0.32.0}/src/json_repair/__main__.py +0 -0
- {json_repair-0.30.3 → json_repair-0.32.0}/src/json_repair/json_context.py +0 -0
- {json_repair-0.30.3 → json_repair-0.32.0}/src/json_repair/json_repair.py +0 -0
- {json_repair-0.30.3 → json_repair-0.32.0}/src/json_repair/py.typed +0 -0
- {json_repair-0.30.3 → json_repair-0.32.0}/src/json_repair/string_file_wrapper.py +0 -0
- {json_repair-0.30.3 → json_repair-0.32.0}/src/json_repair.egg-info/SOURCES.txt +0 -0
- {json_repair-0.30.3 → json_repair-0.32.0}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.30.3 → json_repair-0.32.0}/src/json_repair.egg-info/entry_points.txt +0 -0
- {json_repair-0.30.3 → json_repair-0.32.0}/src/json_repair.egg-info/top_level.txt +0 -0
- {json_repair-0.30.3 → json_repair-0.32.0}/tests/test_coverage.py +0 -0
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.
|
6
|
+
version = "0.32.0"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -124,6 +124,9 @@ class JSONParser:
|
|
124
124
|
|
125
125
|
self.skip_whitespaces_at()
|
126
126
|
|
127
|
+
# Save this index in case we need find a duplicate key
|
128
|
+
rollback_index = self.index
|
129
|
+
|
127
130
|
# <member> starts with a <string>
|
128
131
|
key = ""
|
129
132
|
while self.get_char_at():
|
@@ -132,7 +135,14 @@ class JSONParser:
|
|
132
135
|
if key != "" or (key == "" and self.get_char_at() == ":"):
|
133
136
|
# If the string is empty but there is a object divider, we are done here
|
134
137
|
break
|
138
|
+
if ContextValues.ARRAY in self.context.context and key in obj:
|
139
|
+
self.log(
|
140
|
+
"While parsing an object we found a duplicate key, closing the object here and rolling back the index",
|
141
|
+
)
|
142
|
+
self.index = rollback_index - 1
|
143
|
+
break
|
135
144
|
|
145
|
+
# Skip filler whitespaces
|
136
146
|
self.skip_whitespaces_at()
|
137
147
|
|
138
148
|
# We reached the end here
|
@@ -322,11 +332,24 @@ class JSONParser:
|
|
322
332
|
else:
|
323
333
|
# OK but this could still be some garbage at the end of the string
|
324
334
|
# So we need to check if we find a new lstring_delimiter afterwards
|
325
|
-
# If we do, this is a missing delimiter
|
335
|
+
# If we do, maybe this is a missing delimiter
|
326
336
|
i = self.skip_to_character(character=lstring_delimiter, idx=i)
|
337
|
+
if doubled_quotes:
|
338
|
+
i = self.skip_to_character(
|
339
|
+
character=lstring_delimiter, idx=i
|
340
|
+
)
|
327
341
|
next_c = self.get_char_at(i)
|
328
342
|
if not next_c:
|
329
343
|
rstring_delimiter_missing = False
|
344
|
+
else:
|
345
|
+
# But again, this could just be something a bit stupid like "lorem, "ipsum" sic"
|
346
|
+
# Check if we find a : afterwards (skipping space)
|
347
|
+
i = self.skip_whitespaces_at(
|
348
|
+
idx=i + 1, move_main_index=False
|
349
|
+
)
|
350
|
+
next_c = self.get_char_at(i)
|
351
|
+
if next_c and next_c != ":":
|
352
|
+
rstring_delimiter_missing = False
|
330
353
|
else:
|
331
354
|
# There could be a case in which even the next key:value is missing delimeters
|
332
355
|
# because it might be a systemic issue with the output
|
@@ -108,6 +108,7 @@ def test_missing_and_mixed_quotes():
|
|
108
108
|
assert repair_json('{"key": value "key2" : "value2" ') == '{"key": "value", "key2": "value2"}'
|
109
109
|
assert repair_json('{"key": "lorem ipsum ... "sic " tamet. ...}') == '{"key": "lorem ipsum ... \\"sic \\" tamet. ..."}'
|
110
110
|
assert repair_json('{"key": value , }') == '{"key": "value"}'
|
111
|
+
assert repair_json('{"comment": "lorem, "ipsum" sic "tamet". To improve"}') == '{"comment": "lorem, \\"ipsum\\" sic \\"tamet\\". To improve"}'
|
111
112
|
|
112
113
|
def test_array_edge_cases():
|
113
114
|
assert repair_json("[1, 2, 3,") == "[1, 2, 3]"
|
@@ -135,7 +136,7 @@ def test_object_edge_cases():
|
|
135
136
|
assert repair_json('{"value_1": true, COMMENT "value_2": "data"}') == '{"value_1": true, "value_2": "data"}'
|
136
137
|
assert repair_json('{"value_1": true, SHOULD_NOT_EXIST "value_2": "data" AAAA }') == '{"value_1": true, "value_2": "data"}'
|
137
138
|
assert repair_json('{"" : true, "key2": "value2"}') == '{"": true, "key2": "value2"}'
|
138
|
-
assert repair_json(
|
139
|
+
assert repair_json("""{""answer"":[{""traits"":''Female aged 60+'',""answer1"":""5""}]}""") == '{"answer": [{"traits": "Female aged 60+", "answer1": "5"}]}'
|
139
140
|
assert repair_json('{ "words": abcdef", "numbers": 12345", "words2": ghijkl" }') == '{"words": "abcdef", "numbers": 12345, "words2": "ghijkl"}'
|
140
141
|
assert repair_json('''{"number": 1,"reason": "According...""ans": "YES"}''') == '{"number": 1, "reason": "According...", "ans": "YES"}'
|
141
142
|
assert repair_json('''{ "a" : "{ b": {} }" }''') == '{"a": "{ b"}'
|
@@ -151,6 +152,7 @@ def test_object_edge_cases():
|
|
151
152
|
assert repair_json('{text:words{words in brackets}m}') == '{"text": "words{words in brackets}m"}'
|
152
153
|
assert repair_json('{"key": "value, value2"```') == '{"key": "value, value2"}'
|
153
154
|
assert repair_json('{key:value,key2:value2}') == '{"key": "value", "key2": "value2"}'
|
155
|
+
assert repair_json('[{"lorem": {"ipsum": "sic"}, "lorem": {"ipsum": "sic"}]') == '[{"lorem": {"ipsum": "sic"}}, "lorem", {"ipsum": "sic"}]'
|
154
156
|
|
155
157
|
def test_number_edge_cases():
|
156
158
|
assert repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'
|
@@ -19,7 +19,7 @@ def test_true_true_correct(benchmark):
|
|
19
19
|
mean_time = benchmark.stats.get("median")
|
20
20
|
|
21
21
|
# Define your time threshold in seconds
|
22
|
-
max_time = 1.
|
22
|
+
max_time = 1.9 / 10 ** 3 # 1.9 millisecond
|
23
23
|
|
24
24
|
# Assert that the average time is below the threshold
|
25
25
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -31,7 +31,7 @@ def test_true_true_incorrect(benchmark):
|
|
31
31
|
mean_time = benchmark.stats.get("median")
|
32
32
|
|
33
33
|
# Define your time threshold in seconds
|
34
|
-
max_time =
|
34
|
+
max_time = 9 / 10 ** 3 # 1.9 millisecond
|
35
35
|
|
36
36
|
# Assert that the average time is below the threshold
|
37
37
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -53,7 +53,7 @@ def test_true_false_incorrect(benchmark):
|
|
53
53
|
mean_time = benchmark.stats.get("median")
|
54
54
|
|
55
55
|
# Define your time threshold in seconds
|
56
|
-
max_time = 1.
|
56
|
+
max_time = 1.9 / 10 ** 3 # 1.9 millisecond
|
57
57
|
|
58
58
|
# Assert that the average time is below the threshold
|
59
59
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -64,7 +64,7 @@ def test_false_true_correct(benchmark):
|
|
64
64
|
mean_time = benchmark.stats.get("median")
|
65
65
|
|
66
66
|
# Define your time threshold in seconds
|
67
|
-
max_time = 1.
|
67
|
+
max_time = 1.9 / 10 ** 3 # 1.9 millisecond
|
68
68
|
|
69
69
|
# Assert that the average time is below the threshold
|
70
70
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -75,7 +75,7 @@ def test_false_true_incorrect(benchmark):
|
|
75
75
|
mean_time = benchmark.stats.get("median")
|
76
76
|
|
77
77
|
# Define your time threshold in seconds
|
78
|
-
max_time = 1.
|
78
|
+
max_time = 1.9 / 10 ** 3 # 1.9 millisecond
|
79
79
|
|
80
80
|
# Assert that the average time is below the threshold
|
81
81
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|