json-repair 0.21.0__tar.gz → 0.23.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {json_repair-0.21.0/src/json_repair.egg-info → json_repair-0.23.0}/PKG-INFO +1 -1
- {json_repair-0.21.0 → json_repair-0.23.0}/pyproject.toml +1 -1
- {json_repair-0.21.0 → json_repair-0.23.0}/src/json_repair/json_repair.py +25 -13
- {json_repair-0.21.0 → json_repair-0.23.0/src/json_repair.egg-info}/PKG-INFO +1 -1
- {json_repair-0.21.0 → json_repair-0.23.0}/tests/test_json_repair.py +10 -4
- {json_repair-0.21.0 → json_repair-0.23.0}/tests/test_performance.py +3 -3
- {json_repair-0.21.0 → json_repair-0.23.0}/LICENSE +0 -0
- {json_repair-0.21.0 → json_repair-0.23.0}/README.md +0 -0
- {json_repair-0.21.0 → json_repair-0.23.0}/setup.cfg +0 -0
- {json_repair-0.21.0 → json_repair-0.23.0}/src/json_repair/__init__.py +0 -0
- {json_repair-0.21.0 → json_repair-0.23.0}/src/json_repair.egg-info/SOURCES.txt +0 -0
- {json_repair-0.21.0 → json_repair-0.23.0}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.21.0 → json_repair-0.23.0}/src/json_repair.egg-info/top_level.txt +0 -0
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.
|
6
|
+
version = "0.23.0"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -1,7 +1,7 @@
|
|
1
1
|
"""
|
2
2
|
This module will parse the JSON file following the BNF definition:
|
3
3
|
|
4
|
-
<json> ::= <
|
4
|
+
<json> ::= <container>
|
5
5
|
|
6
6
|
<primitive> ::= <number> | <string> | <boolean>
|
7
7
|
; Where:
|
@@ -89,15 +89,32 @@ class JSONParser:
|
|
89
89
|
def parse(
|
90
90
|
self,
|
91
91
|
) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
|
92
|
+
json = self.parse_json()
|
93
|
+
if self.index < len(self.json_str):
|
94
|
+
json = [json]
|
95
|
+
last_index = self.index
|
96
|
+
while self.index < len(self.json_str):
|
97
|
+
j = self.parse_json()
|
98
|
+
if j != "":
|
99
|
+
json.append(j)
|
100
|
+
if self.index == last_index:
|
101
|
+
self.index += 1
|
102
|
+
last_index = self.index
|
103
|
+
if len(json) == 1:
|
104
|
+
json = json[0]
|
105
|
+
elif len(json) == 0:
|
106
|
+
json = ""
|
92
107
|
if self.logger.log_level == "none":
|
93
|
-
return
|
108
|
+
return json
|
94
109
|
else:
|
95
|
-
return
|
110
|
+
return json, self.logger.log
|
96
111
|
|
97
112
|
def parse_json(
|
98
113
|
self,
|
99
114
|
) -> JSONReturnType:
|
100
115
|
char = self.get_char_at()
|
116
|
+
# This parser will ignore any basic element (string or number) that is not inside an array or object
|
117
|
+
is_in_context = len(self.context) > 0
|
101
118
|
# False means that we are at the end of the string provided, is the base case for recursion
|
102
119
|
if char is False:
|
103
120
|
return ""
|
@@ -120,10 +137,10 @@ class JSONParser:
|
|
120
137
|
)
|
121
138
|
return ""
|
122
139
|
# <string> starts with a quote
|
123
|
-
elif char in ['"', "'", "“"] or char.isalpha():
|
140
|
+
elif is_in_context and (char in ['"', "'", "“"] or char.isalpha()):
|
124
141
|
return self.parse_string()
|
125
142
|
# <number> starts with [0-9] or minus
|
126
|
-
elif char.isdigit() or char == "-" or char == ".":
|
143
|
+
elif is_in_context and (char.isdigit() or char == "-" or char == "."):
|
127
144
|
return self.parse_number()
|
128
145
|
# If everything else fails, we just ignore and move on
|
129
146
|
else:
|
@@ -304,14 +321,6 @@ class JSONParser:
|
|
304
321
|
"While parsing a string, we found a literal instead of a quote",
|
305
322
|
"info",
|
306
323
|
)
|
307
|
-
if self.get_context() == "":
|
308
|
-
# A string literal in the wild isn't a valid json and not something we can fix
|
309
|
-
self.log(
|
310
|
-
"While parsing a string, we found a literal outside of context, ignoring it",
|
311
|
-
"info",
|
312
|
-
)
|
313
|
-
self.index += 1
|
314
|
-
return self.parse_json()
|
315
324
|
self.log(
|
316
325
|
"While parsing a string, we found no starting quote. Will add the quote back",
|
317
326
|
"info",
|
@@ -656,3 +665,6 @@ def from_file(
|
|
656
665
|
fd.close()
|
657
666
|
|
658
667
|
return jsonobj
|
668
|
+
|
669
|
+
|
670
|
+
repair_json("[]{}")
|
@@ -1,9 +1,9 @@
|
|
1
1
|
from src.json_repair.json_repair import from_file, repair_json, loads
|
2
2
|
|
3
3
|
def test_basic_types_valid():
|
4
|
-
assert repair_json("True", return_objects=True) ==
|
5
|
-
assert repair_json("False", return_objects=True) ==
|
6
|
-
assert repair_json("Null", return_objects=True) ==
|
4
|
+
assert repair_json("True", return_objects=True) == ""
|
5
|
+
assert repair_json("False", return_objects=True) == ""
|
6
|
+
assert repair_json("Null", return_objects=True) == ""
|
7
7
|
assert repair_json("1", return_objects=True) == 1
|
8
8
|
assert repair_json("[]", return_objects=True) == []
|
9
9
|
assert repair_json("[1, 2, 3, 4]", return_objects=True) == [1, 2, 3, 4]
|
@@ -114,7 +114,7 @@ def test_array_edge_cases():
|
|
114
114
|
|
115
115
|
|
116
116
|
def test_escaping():
|
117
|
-
assert repair_json("'\"'") == '"
|
117
|
+
assert repair_json("'\"'") == '""'
|
118
118
|
assert repair_json("{\"key\": 'string\"\n\t\le'") == '{"key": "string\\"\\n\\tle"}'
|
119
119
|
assert repair_json(r'{"real_content": "Some string: Some other string \t Some string <a href=\"https://domain.com\">Some link</a>"') == r'{"real_content": "Some string: Some other string \t Some string <a href=\"https://domain.com\">Some link</a>"}'
|
120
120
|
assert repair_json('{"key_1\n": "value"}') == '{"key_1": "value"}'
|
@@ -164,9 +164,15 @@ def test_leading_trailing_characters():
|
|
164
164
|
assert repair_json("""{ "a": "", "b": [ { "c": 1} ] \n}```""") == '{"a": "", "b": [{"c": 1}]}'
|
165
165
|
assert repair_json("Based on the information extracted, here is the filled JSON output: ```json { 'a': 'b' } ```") == '{"a": "b"}'
|
166
166
|
assert repair_json("""
|
167
|
+
The next 64 elements are:
|
167
168
|
```json
|
168
169
|
{ "key": "value" }
|
169
170
|
```""") == '{"key": "value"}'
|
171
|
+
def test_multiple_jsons():
|
172
|
+
assert repair_json("[]{}") == "[[], {}]"
|
173
|
+
assert repair_json("{}[]") == "[{}, []]"
|
174
|
+
assert repair_json('{"key":"value"}[1,2,3,True]') == '[{"key": "value"}, ["1,2,3", true]]'
|
175
|
+
assert repair_json('lorem ```json {"key":"value"} ``` ipsum ```json [1,2,3,True] ``` 42') == '[{"key": "value"}, ["1,2,3", true]]'
|
170
176
|
|
171
177
|
def test_repair_json_with_objects():
|
172
178
|
# Test with valid JSON strings
|
@@ -19,7 +19,7 @@ def test_true_true_correct(benchmark):
|
|
19
19
|
mean_time = benchmark.stats.get("median")
|
20
20
|
|
21
21
|
# Define your time threshold in seconds
|
22
|
-
max_time =
|
22
|
+
max_time = 14 / 10 ** 4 # 1.4 millisecond
|
23
23
|
|
24
24
|
# Assert that the average time is below the threshold
|
25
25
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -31,7 +31,7 @@ def test_true_true_incorrect(benchmark):
|
|
31
31
|
mean_time = benchmark.stats.get("median")
|
32
32
|
|
33
33
|
# Define your time threshold in seconds
|
34
|
-
max_time =
|
34
|
+
max_time = 14 / 10 ** 4 # 1.4 millisecond
|
35
35
|
|
36
36
|
# Assert that the average time is below the threshold
|
37
37
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -53,7 +53,7 @@ def test_true_false_incorrect(benchmark):
|
|
53
53
|
mean_time = benchmark.stats.get("median")
|
54
54
|
|
55
55
|
# Define your time threshold in seconds
|
56
|
-
max_time =
|
56
|
+
max_time = 14 / 10 ** 4 # 1.4 millisecond
|
57
57
|
|
58
58
|
# Assert that the average time is below the threshold
|
59
59
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|