json-repair 0.31.0__tar.gz → 0.33.0__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {json_repair-0.31.0/src/json_repair.egg-info → json_repair-0.33.0}/PKG-INFO +1 -1
- {json_repair-0.31.0 → json_repair-0.33.0}/pyproject.toml +1 -1
- {json_repair-0.31.0 → json_repair-0.33.0}/src/json_repair/json_parser.py +32 -2
- {json_repair-0.31.0 → json_repair-0.33.0/src/json_repair.egg-info}/PKG-INFO +1 -1
- {json_repair-0.31.0 → json_repair-0.33.0}/tests/test_json_repair.py +2 -0
- {json_repair-0.31.0 → json_repair-0.33.0}/tests/test_performance.py +5 -5
- {json_repair-0.31.0 → json_repair-0.33.0}/LICENSE +0 -0
- {json_repair-0.31.0 → json_repair-0.33.0}/README.md +0 -0
- {json_repair-0.31.0 → json_repair-0.33.0}/setup.cfg +0 -0
- {json_repair-0.31.0 → json_repair-0.33.0}/src/json_repair/__init__.py +0 -0
- {json_repair-0.31.0 → json_repair-0.33.0}/src/json_repair/__main__.py +0 -0
- {json_repair-0.31.0 → json_repair-0.33.0}/src/json_repair/json_context.py +0 -0
- {json_repair-0.31.0 → json_repair-0.33.0}/src/json_repair/json_repair.py +0 -0
- {json_repair-0.31.0 → json_repair-0.33.0}/src/json_repair/py.typed +0 -0
- {json_repair-0.31.0 → json_repair-0.33.0}/src/json_repair/string_file_wrapper.py +0 -0
- {json_repair-0.31.0 → json_repair-0.33.0}/src/json_repair.egg-info/SOURCES.txt +0 -0
- {json_repair-0.31.0 → json_repair-0.33.0}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.31.0 → json_repair-0.33.0}/src/json_repair.egg-info/entry_points.txt +0 -0
- {json_repair-0.31.0 → json_repair-0.33.0}/src/json_repair.egg-info/top_level.txt +0 -0
- {json_repair-0.31.0 → json_repair-0.33.0}/tests/test_coverage.py +0 -0
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.
|
6
|
+
version = "0.33.0"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -124,6 +124,9 @@ class JSONParser:
|
|
124
124
|
|
125
125
|
self.skip_whitespaces_at()
|
126
126
|
|
127
|
+
# Save this index in case we need find a duplicate key
|
128
|
+
rollback_index = self.index
|
129
|
+
|
127
130
|
# <member> starts with a <string>
|
128
131
|
key = ""
|
129
132
|
while self.get_char_at():
|
@@ -132,7 +135,14 @@ class JSONParser:
|
|
132
135
|
if key != "" or (key == "" and self.get_char_at() == ":"):
|
133
136
|
# If the string is empty but there is a object divider, we are done here
|
134
137
|
break
|
138
|
+
if ContextValues.ARRAY in self.context.context and key in obj:
|
139
|
+
self.log(
|
140
|
+
"While parsing an object we found a duplicate key, closing the object here and rolling back the index",
|
141
|
+
)
|
142
|
+
self.index = rollback_index - 1
|
143
|
+
break
|
135
144
|
|
145
|
+
# Skip filler whitespaces
|
136
146
|
self.skip_whitespaces_at()
|
137
147
|
|
138
148
|
# We reached the end here
|
@@ -498,9 +508,8 @@ class JSONParser:
|
|
498
508
|
# But this might not be it! This could be just a missing comma
|
499
509
|
# We found a delimiter and we need to check if this is a key
|
500
510
|
# so find a rstring_delimiter and a colon after
|
501
|
-
i += 1
|
502
511
|
i = self.skip_to_character(
|
503
|
-
character=rstring_delimiter, idx=i
|
512
|
+
character=rstring_delimiter, idx=i + 1
|
504
513
|
)
|
505
514
|
i += 1
|
506
515
|
next_c = self.get_char_at(i)
|
@@ -521,6 +530,27 @@ class JSONParser:
|
|
521
530
|
string_acc += str(char)
|
522
531
|
self.index += 1
|
523
532
|
char = self.get_char_at()
|
533
|
+
elif self.context.current == ContextValues.ARRAY:
|
534
|
+
# In array context this could be something like "lorem "ipsum" sic"
|
535
|
+
# So let's check if we find a rstring_delimiter forward otherwise end early
|
536
|
+
i = self.skip_to_character(rstring_delimiter, idx=i + 1)
|
537
|
+
next_c = self.get_char_at(i)
|
538
|
+
if next_c and next_c == rstring_delimiter:
|
539
|
+
# Ok now if I find a comma or a closing ], that can be have also an optional rstring_delimiter before them
|
540
|
+
# We can consider this a misplaced quote
|
541
|
+
i += 1
|
542
|
+
i = self.skip_whitespaces_at(
|
543
|
+
idx=i, move_main_index=False
|
544
|
+
)
|
545
|
+
next_c = self.get_char_at(i)
|
546
|
+
if next_c and next_c in [",", "]"]:
|
547
|
+
self.log(
|
548
|
+
"While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
|
549
|
+
)
|
550
|
+
unmatched_delimiter = not unmatched_delimiter
|
551
|
+
string_acc += str(char)
|
552
|
+
self.index += 1
|
553
|
+
char = self.get_char_at()
|
524
554
|
|
525
555
|
if (
|
526
556
|
char
|
@@ -121,6 +121,7 @@ def test_array_edge_cases():
|
|
121
121
|
assert repair_json('{"employees":["John", "Anna", "Peter') == '{"employees": ["John", "Anna", "Peter"]}'
|
122
122
|
assert repair_json('{"key1": {"key2": [1, 2, 3') == '{"key1": {"key2": [1, 2, 3]}}'
|
123
123
|
assert repair_json('{"key": ["value]}') == '{"key": ["value"]}'
|
124
|
+
assert repair_json('["lorem "ipsum" sic"]') == '["lorem \\"ipsum\\" sic"]'
|
124
125
|
|
125
126
|
def test_escaping():
|
126
127
|
assert repair_json("'\"'") == '""'
|
@@ -152,6 +153,7 @@ def test_object_edge_cases():
|
|
152
153
|
assert repair_json('{text:words{words in brackets}m}') == '{"text": "words{words in brackets}m"}'
|
153
154
|
assert repair_json('{"key": "value, value2"```') == '{"key": "value, value2"}'
|
154
155
|
assert repair_json('{key:value,key2:value2}') == '{"key": "value", "key2": "value2"}'
|
156
|
+
assert repair_json('[{"lorem": {"ipsum": "sic"}, "lorem": {"ipsum": "sic"}]') == '[{"lorem": {"ipsum": "sic"}}, "lorem", {"ipsum": "sic"}]'
|
155
157
|
|
156
158
|
def test_number_edge_cases():
|
157
159
|
assert repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'
|
@@ -19,7 +19,7 @@ def test_true_true_correct(benchmark):
|
|
19
19
|
mean_time = benchmark.stats.get("median")
|
20
20
|
|
21
21
|
# Define your time threshold in seconds
|
22
|
-
max_time = 1.
|
22
|
+
max_time = 1.9 / 10 ** 3 # 1.9 millisecond
|
23
23
|
|
24
24
|
# Assert that the average time is below the threshold
|
25
25
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -31,7 +31,7 @@ def test_true_true_incorrect(benchmark):
|
|
31
31
|
mean_time = benchmark.stats.get("median")
|
32
32
|
|
33
33
|
# Define your time threshold in seconds
|
34
|
-
max_time =
|
34
|
+
max_time = 9 / 10 ** 3 # 1.9 millisecond
|
35
35
|
|
36
36
|
# Assert that the average time is below the threshold
|
37
37
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -53,7 +53,7 @@ def test_true_false_incorrect(benchmark):
|
|
53
53
|
mean_time = benchmark.stats.get("median")
|
54
54
|
|
55
55
|
# Define your time threshold in seconds
|
56
|
-
max_time = 1.
|
56
|
+
max_time = 1.9 / 10 ** 3 # 1.9 millisecond
|
57
57
|
|
58
58
|
# Assert that the average time is below the threshold
|
59
59
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -64,7 +64,7 @@ def test_false_true_correct(benchmark):
|
|
64
64
|
mean_time = benchmark.stats.get("median")
|
65
65
|
|
66
66
|
# Define your time threshold in seconds
|
67
|
-
max_time = 1.
|
67
|
+
max_time = 1.9 / 10 ** 3 # 1.9 millisecond
|
68
68
|
|
69
69
|
# Assert that the average time is below the threshold
|
70
70
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -75,7 +75,7 @@ def test_false_true_incorrect(benchmark):
|
|
75
75
|
mean_time = benchmark.stats.get("median")
|
76
76
|
|
77
77
|
# Define your time threshold in seconds
|
78
|
-
max_time = 1.
|
78
|
+
max_time = 1.9 / 10 ** 3 # 1.9 millisecond
|
79
79
|
|
80
80
|
# Assert that the average time is below the threshold
|
81
81
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|