json-repair 0.47.5__tar.gz → 0.47.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {json_repair-0.47.5/src/json_repair.egg-info → json_repair-0.47.6}/PKG-INFO +1 -1
- {json_repair-0.47.5 → json_repair-0.47.6}/pyproject.toml +1 -1
- {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/parse_string.py +18 -1
- {json_repair-0.47.5 → json_repair-0.47.6/src/json_repair.egg-info}/PKG-INFO +1 -1
- {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair.egg-info/SOURCES.txt +8 -1
- json_repair-0.47.6/tests/test_json_repair.py +160 -0
- json_repair-0.47.6/tests/test_parse_array.py +37 -0
- json_repair-0.47.6/tests/test_parse_boolean_or_null.py +12 -0
- {json_repair-0.47.5 → json_repair-0.47.6}/tests/test_parse_comment.py +1 -0
- json_repair-0.47.6/tests/test_parse_number.py +27 -0
- json_repair-0.47.6/tests/test_parse_object.py +85 -0
- json_repair-0.47.6/tests/test_parse_string.py +99 -0
- json_repair-0.47.6/tests/test_repair_json_cli.py +67 -0
- json_repair-0.47.5/tests/test_json_repair.py → json_repair-0.47.6/tests/test_repair_json_from_file.py +1 -479
- {json_repair-0.47.5 → json_repair-0.47.6}/LICENSE +0 -0
- {json_repair-0.47.5 → json_repair-0.47.6}/README.md +0 -0
- {json_repair-0.47.5 → json_repair-0.47.6}/setup.cfg +0 -0
- {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/__init__.py +0 -0
- {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/__main__.py +0 -0
- {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/constants.py +0 -0
- {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/json_context.py +0 -0
- {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/json_parser.py +0 -0
- {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/json_repair.py +0 -0
- {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/object_comparer.py +0 -0
- {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/parse_array.py +0 -0
- {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/parse_boolean_or_null.py +0 -0
- {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/parse_comment.py +0 -0
- {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/parse_number.py +0 -0
- {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/parse_object.py +0 -0
- {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/py.typed +0 -0
- {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/string_file_wrapper.py +0 -0
- {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair.egg-info/entry_points.txt +0 -0
- {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair.egg-info/top_level.txt +0 -0
- {json_repair-0.47.5 → json_repair-0.47.6}/tests/test_performance.py +0 -0
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.47.
|
6
|
+
version = "0.47.6"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -328,7 +328,24 @@ def parse_string(self) -> str | bool | None:
|
|
328
328
|
if all(str(self.get_char_at(j)).isspace() for j in range(1, i) if self.get_char_at(j)):
|
329
329
|
break
|
330
330
|
if self.context.current == ContextValues.OBJECT_VALUE:
|
331
|
-
|
331
|
+
i = self.skip_whitespaces_at(idx=i + 1, move_main_index=False)
|
332
|
+
if self.get_char_at(i) == ",":
|
333
|
+
# So we found a comma, this could be a case of a single quote like "va"lue",
|
334
|
+
# Search if it's followed by another key, starting with the first delimeter
|
335
|
+
i = self.skip_to_character(character=lstring_delimiter, idx=i + 1)
|
336
|
+
i += 1
|
337
|
+
i = self.skip_to_character(character=rstring_delimiter, idx=i + 1)
|
338
|
+
i += 1
|
339
|
+
i = self.skip_whitespaces_at(idx=i, move_main_index=False)
|
340
|
+
next_c = self.get_char_at(i)
|
341
|
+
if next_c == ":":
|
342
|
+
self.log(
|
343
|
+
"While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
|
344
|
+
)
|
345
|
+
string_acc += str(char)
|
346
|
+
self.index += 1
|
347
|
+
char = self.get_char_at()
|
348
|
+
continue
|
332
349
|
# We found a delimiter and we need to check if this is a key
|
333
350
|
# so find a rstring_delimiter and a colon after
|
334
351
|
i = self.skip_to_character(character=rstring_delimiter, idx=i + 1)
|
@@ -22,5 +22,12 @@ src/json_repair.egg-info/dependency_links.txt
|
|
22
22
|
src/json_repair.egg-info/entry_points.txt
|
23
23
|
src/json_repair.egg-info/top_level.txt
|
24
24
|
tests/test_json_repair.py
|
25
|
+
tests/test_parse_array.py
|
26
|
+
tests/test_parse_boolean_or_null.py
|
25
27
|
tests/test_parse_comment.py
|
26
|
-
tests/
|
28
|
+
tests/test_parse_number.py
|
29
|
+
tests/test_parse_object.py
|
30
|
+
tests/test_parse_string.py
|
31
|
+
tests/test_performance.py
|
32
|
+
tests/test_repair_json_cli.py
|
33
|
+
tests/test_repair_json_from_file.py
|
@@ -0,0 +1,160 @@
|
|
1
|
+
from src.json_repair.json_repair import loads, repair_json
|
2
|
+
|
3
|
+
|
4
|
+
def test_valid_json():
|
5
|
+
assert (
|
6
|
+
repair_json('{"name": "John", "age": 30, "city": "New York"}')
|
7
|
+
== '{"name": "John", "age": 30, "city": "New York"}'
|
8
|
+
)
|
9
|
+
assert repair_json('{"employees":["John", "Anna", "Peter"]} ') == '{"employees": ["John", "Anna", "Peter"]}'
|
10
|
+
assert repair_json('{"key": "value:value"}') == '{"key": "value:value"}'
|
11
|
+
assert repair_json('{"text": "The quick brown fox,"}') == '{"text": "The quick brown fox,"}'
|
12
|
+
assert repair_json('{"text": "The quick brown fox won\'t jump"}') == '{"text": "The quick brown fox won\'t jump"}'
|
13
|
+
assert repair_json('{"key": ""') == '{"key": ""}'
|
14
|
+
assert repair_json('{"key1": {"key2": [1, 2, 3]}}') == '{"key1": {"key2": [1, 2, 3]}}'
|
15
|
+
assert repair_json('{"key": 12345678901234567890}') == '{"key": 12345678901234567890}'
|
16
|
+
assert repair_json('{"key": "value\u263a"}') == '{"key": "value\\u263a"}'
|
17
|
+
assert repair_json('{"key": "value\\nvalue"}') == '{"key": "value\\nvalue"}'
|
18
|
+
|
19
|
+
|
20
|
+
def test_multiple_jsons():
|
21
|
+
assert repair_json("[]{}") == "[[], {}]"
|
22
|
+
assert repair_json("{}[]{}") == "[{}, [], {}]"
|
23
|
+
assert repair_json('{"key":"value"}[1,2,3,True]') == '[{"key": "value"}, [1, 2, 3, true]]'
|
24
|
+
assert (
|
25
|
+
repair_json('lorem ```json {"key":"value"} ``` ipsum ```json [1,2,3,True] ``` 42')
|
26
|
+
== '[{"key": "value"}, [1, 2, 3, true]]'
|
27
|
+
)
|
28
|
+
assert repair_json('[{"key":"value"}][{"key":"value_after"}]') == '[{"key": "value_after"}]'
|
29
|
+
|
30
|
+
|
31
|
+
def test_repair_json_with_objects():
|
32
|
+
# Test with valid JSON strings
|
33
|
+
assert repair_json("[]", return_objects=True) == []
|
34
|
+
assert repair_json("{}", return_objects=True) == {}
|
35
|
+
assert repair_json('{"key": true, "key2": false, "key3": null}', return_objects=True) == {
|
36
|
+
"key": True,
|
37
|
+
"key2": False,
|
38
|
+
"key3": None,
|
39
|
+
}
|
40
|
+
assert repair_json('{"name": "John", "age": 30, "city": "New York"}', return_objects=True) == {
|
41
|
+
"name": "John",
|
42
|
+
"age": 30,
|
43
|
+
"city": "New York",
|
44
|
+
}
|
45
|
+
assert repair_json("[1, 2, 3, 4]", return_objects=True) == [1, 2, 3, 4]
|
46
|
+
assert repair_json('{"employees":["John", "Anna", "Peter"]} ', return_objects=True) == {
|
47
|
+
"employees": ["John", "Anna", "Peter"]
|
48
|
+
}
|
49
|
+
assert repair_json(
|
50
|
+
"""
|
51
|
+
{
|
52
|
+
"resourceType": "Bundle",
|
53
|
+
"id": "1",
|
54
|
+
"type": "collection",
|
55
|
+
"entry": [
|
56
|
+
{
|
57
|
+
"resource": {
|
58
|
+
"resourceType": "Patient",
|
59
|
+
"id": "1",
|
60
|
+
"name": [
|
61
|
+
{"use": "official", "family": "Corwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."},
|
62
|
+
{"use": "maiden", "family": "Goodwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."]}
|
63
|
+
]
|
64
|
+
}
|
65
|
+
}
|
66
|
+
]
|
67
|
+
}
|
68
|
+
""",
|
69
|
+
return_objects=True,
|
70
|
+
) == {
|
71
|
+
"resourceType": "Bundle",
|
72
|
+
"id": "1",
|
73
|
+
"type": "collection",
|
74
|
+
"entry": [
|
75
|
+
{
|
76
|
+
"resource": {
|
77
|
+
"resourceType": "Patient",
|
78
|
+
"id": "1",
|
79
|
+
"name": [
|
80
|
+
{
|
81
|
+
"use": "official",
|
82
|
+
"family": "Corwin",
|
83
|
+
"given": ["Keisha", "Sunny"],
|
84
|
+
"prefix": ["Mrs."],
|
85
|
+
},
|
86
|
+
{
|
87
|
+
"use": "maiden",
|
88
|
+
"family": "Goodwin",
|
89
|
+
"given": ["Keisha", "Sunny"],
|
90
|
+
"prefix": ["Mrs."],
|
91
|
+
},
|
92
|
+
],
|
93
|
+
}
|
94
|
+
}
|
95
|
+
],
|
96
|
+
}
|
97
|
+
assert repair_json(
|
98
|
+
'{\n"html": "<h3 id="aaa">Waarom meer dan 200 Technical Experts - "Passie voor techniek"?</h3>"}',
|
99
|
+
return_objects=True,
|
100
|
+
) == {"html": '<h3 id="aaa">Waarom meer dan 200 Technical Experts - "Passie voor techniek"?</h3>'}
|
101
|
+
assert repair_json(
|
102
|
+
"""
|
103
|
+
[
|
104
|
+
{
|
105
|
+
"foo": "Foo bar baz",
|
106
|
+
"tag": "#foo-bar-baz"
|
107
|
+
},
|
108
|
+
{
|
109
|
+
"foo": "foo bar "foobar" foo bar baz.",
|
110
|
+
"tag": "#foo-bar-foobar"
|
111
|
+
}
|
112
|
+
]
|
113
|
+
""",
|
114
|
+
return_objects=True,
|
115
|
+
) == [
|
116
|
+
{"foo": "Foo bar baz", "tag": "#foo-bar-baz"},
|
117
|
+
{"foo": 'foo bar "foobar" foo bar baz.', "tag": "#foo-bar-foobar"},
|
118
|
+
]
|
119
|
+
|
120
|
+
|
121
|
+
def test_repair_json_skip_json_loads():
|
122
|
+
assert (
|
123
|
+
repair_json('{"key": true, "key2": false, "key3": null}', skip_json_loads=True)
|
124
|
+
== '{"key": true, "key2": false, "key3": null}'
|
125
|
+
)
|
126
|
+
assert repair_json(
|
127
|
+
'{"key": true, "key2": false, "key3": null}',
|
128
|
+
return_objects=True,
|
129
|
+
skip_json_loads=True,
|
130
|
+
) == {"key": True, "key2": False, "key3": None}
|
131
|
+
assert (
|
132
|
+
repair_json('{"key": true, "key2": false, "key3": }', skip_json_loads=True)
|
133
|
+
== '{"key": true, "key2": false, "key3": ""}'
|
134
|
+
)
|
135
|
+
assert loads('{"key": true, "key2": false, "key3": }', skip_json_loads=True) == {
|
136
|
+
"key": True,
|
137
|
+
"key2": False,
|
138
|
+
"key3": "",
|
139
|
+
}
|
140
|
+
|
141
|
+
|
142
|
+
def test_ensure_ascii():
|
143
|
+
assert repair_json("{'test_中国人_ascii':'统一码'}", ensure_ascii=False) == '{"test_中国人_ascii": "统一码"}'
|
144
|
+
|
145
|
+
|
146
|
+
def test_stream_stable():
|
147
|
+
# default: stream_stable = False
|
148
|
+
# When the json to be repaired is the accumulation of streaming json at a certain moment.
|
149
|
+
# The default repair result is unstable.
|
150
|
+
assert repair_json('{"key": "val\\', stream_stable=False) == '{"key": "val\\\\"}'
|
151
|
+
assert repair_json('{"key": "val\\n', stream_stable=False) == '{"key": "val"}'
|
152
|
+
assert (
|
153
|
+
repair_json('{"key": "val\\n123,`key2:value2', stream_stable=False) == '{"key": "val\\n123", "key2": "value2"}'
|
154
|
+
)
|
155
|
+
assert repair_json('{"key": "val\\n123,`key2:value2`"}', stream_stable=True) == '{"key": "val\\n123,`key2:value2`"}'
|
156
|
+
# stream_stable = True
|
157
|
+
assert repair_json('{"key": "val\\', stream_stable=True) == '{"key": "val"}'
|
158
|
+
assert repair_json('{"key": "val\\n', stream_stable=True) == '{"key": "val\\n"}'
|
159
|
+
assert repair_json('{"key": "val\\n123,`key2:value2', stream_stable=True) == '{"key": "val\\n123,`key2:value2"}'
|
160
|
+
assert repair_json('{"key": "val\\n123,`key2:value2`"}', stream_stable=True) == '{"key": "val\\n123,`key2:value2`"}'
|
@@ -0,0 +1,37 @@
|
|
1
|
+
from src.json_repair.json_repair import repair_json
|
2
|
+
|
3
|
+
|
4
|
+
def test_parse_array():
|
5
|
+
assert repair_json("[]", return_objects=True) == []
|
6
|
+
assert repair_json("[1, 2, 3, 4]", return_objects=True) == [1, 2, 3, 4]
|
7
|
+
assert repair_json("[", return_objects=True) == []
|
8
|
+
assert repair_json("[[1\n\n]") == "[[1]]"
|
9
|
+
|
10
|
+
|
11
|
+
def test_parse_array_edge_cases():
|
12
|
+
assert repair_json("[{]") == "[{}]"
|
13
|
+
assert repair_json("[") == "[]"
|
14
|
+
assert repair_json('["') == "[]"
|
15
|
+
assert repair_json("]") == ""
|
16
|
+
assert repair_json("[1, 2, 3,") == "[1, 2, 3]"
|
17
|
+
assert repair_json("[1, 2, 3, ...]") == "[1, 2, 3]"
|
18
|
+
assert repair_json("[1, 2, ... , 3]") == "[1, 2, 3]"
|
19
|
+
assert repair_json("[1, 2, '...', 3]") == '[1, 2, "...", 3]'
|
20
|
+
assert repair_json("[true, false, null, ...]") == "[true, false, null]"
|
21
|
+
assert repair_json('["a" "b" "c" 1') == '["a", "b", "c", 1]'
|
22
|
+
assert repair_json('{"employees":["John", "Anna",') == '{"employees": ["John", "Anna"]}'
|
23
|
+
assert repair_json('{"employees":["John", "Anna", "Peter') == '{"employees": ["John", "Anna", "Peter"]}'
|
24
|
+
assert repair_json('{"key1": {"key2": [1, 2, 3') == '{"key1": {"key2": [1, 2, 3]}}'
|
25
|
+
assert repair_json('{"key": ["value]}') == '{"key": ["value"]}'
|
26
|
+
assert repair_json('["lorem "ipsum" sic"]') == '["lorem \\"ipsum\\" sic"]'
|
27
|
+
assert (
|
28
|
+
repair_json('{"key1": ["value1", "value2"}, "key2": ["value3", "value4"]}')
|
29
|
+
== '{"key1": ["value1", "value2"], "key2": ["value3", "value4"]}'
|
30
|
+
)
|
31
|
+
assert repair_json('{"key": ["value" "value1" "value2"]}') == '{"key": ["value", "value1", "value2"]}'
|
32
|
+
assert (
|
33
|
+
repair_json('{"key": ["lorem "ipsum" dolor "sit" amet, "consectetur" ", "lorem "ipsum" dolor", "lorem"]}')
|
34
|
+
== '{"key": ["lorem \\"ipsum\\" dolor \\"sit\\" amet, \\"consectetur\\" ", "lorem \\"ipsum\\" dolor", "lorem"]}'
|
35
|
+
)
|
36
|
+
assert repair_json('{"k"e"y": "value"}') == '{"k\\"e\\"y": "value"}'
|
37
|
+
assert repair_json('["key":"value"}]') == '[{"key": "value"}]'
|
@@ -0,0 +1,12 @@
|
|
1
|
+
from src.json_repair.json_repair import repair_json
|
2
|
+
|
3
|
+
|
4
|
+
def test_parse_boolean_or_null():
|
5
|
+
assert repair_json("True", return_objects=True) == ""
|
6
|
+
assert repair_json("False", return_objects=True) == ""
|
7
|
+
assert repair_json("Null", return_objects=True) == ""
|
8
|
+
assert repair_json("true", return_objects=True)
|
9
|
+
assert not repair_json("false", return_objects=True)
|
10
|
+
assert repair_json("null", return_objects=True) is None
|
11
|
+
assert repair_json(' {"key": true, "key2": false, "key3": null}') == '{"key": true, "key2": false, "key3": null}'
|
12
|
+
assert repair_json('{"key": TRUE, "key2": FALSE, "key3": Null} ') == '{"key": true, "key2": false, "key3": null}'
|
@@ -0,0 +1,27 @@
|
|
1
|
+
from src.json_repair.json_repair import repair_json
|
2
|
+
|
3
|
+
|
4
|
+
def test_parse_number():
|
5
|
+
assert repair_json("1", return_objects=True) == 1
|
6
|
+
assert repair_json("1.2", return_objects=True) == 1.2
|
7
|
+
|
8
|
+
|
9
|
+
def test_parse_number_edge_cases():
|
10
|
+
assert (
|
11
|
+
repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'
|
12
|
+
)
|
13
|
+
assert repair_json('{"key": 1/3}') == '{"key": "1/3"}'
|
14
|
+
assert repair_json('{"key": .25}') == '{"key": 0.25}'
|
15
|
+
assert repair_json('{"here": "now", "key": 1/3, "foo": "bar"}') == '{"here": "now", "key": "1/3", "foo": "bar"}'
|
16
|
+
assert repair_json('{"key": 12345/67890}') == '{"key": "12345/67890"}'
|
17
|
+
assert repair_json("[105,12") == "[105, 12]"
|
18
|
+
assert repair_json('{"key", 105,12,') == '{"key": "105,12"}'
|
19
|
+
assert repair_json('{"key": 1/3, "foo": "bar"}') == '{"key": "1/3", "foo": "bar"}'
|
20
|
+
assert repair_json('{"key": 10-20}') == '{"key": "10-20"}'
|
21
|
+
assert repair_json('{"key": 1.1.1}') == '{"key": "1.1.1"}'
|
22
|
+
assert repair_json("[- ") == "[]"
|
23
|
+
assert repair_json('{"key": 1. }') == '{"key": 1.0}'
|
24
|
+
assert repair_json('{"key": 1e10 }') == '{"key": 10000000000.0}'
|
25
|
+
assert repair_json('{"key": 1e }') == '{"key": 1}'
|
26
|
+
assert repair_json('{"key": 1notanumber }') == '{"key": "1notanumber"}'
|
27
|
+
assert repair_json("[1, 2notanumber]") == '[1, "2notanumber"]'
|
@@ -0,0 +1,85 @@
|
|
1
|
+
from src.json_repair.json_repair import repair_json
|
2
|
+
|
3
|
+
|
4
|
+
def test_parse_object():
|
5
|
+
assert repair_json("{}", return_objects=True) == {}
|
6
|
+
assert repair_json('{ "key": "value", "key2": 1, "key3": True }', return_objects=True) == {
|
7
|
+
"key": "value",
|
8
|
+
"key2": 1,
|
9
|
+
"key3": True,
|
10
|
+
}
|
11
|
+
assert repair_json("{", return_objects=True) == {}
|
12
|
+
assert repair_json('{ "key": value, "key2": 1 "key3": null }', return_objects=True) == {
|
13
|
+
"key": "value",
|
14
|
+
"key2": 1,
|
15
|
+
"key3": None,
|
16
|
+
}
|
17
|
+
assert repair_json(" { } ") == "{}"
|
18
|
+
assert repair_json("{") == "{}"
|
19
|
+
assert repair_json("}") == ""
|
20
|
+
assert repair_json('{"') == "{}"
|
21
|
+
|
22
|
+
|
23
|
+
def test_parse_object_edge_cases():
|
24
|
+
assert repair_json("{foo: [}") == '{"foo": []}'
|
25
|
+
assert repair_json("{ ") == "{}"
|
26
|
+
assert repair_json('{"": "value"') == '{"": "value"}'
|
27
|
+
assert repair_json('{"value_1": true, COMMENT "value_2": "data"}') == '{"value_1": true, "value_2": "data"}'
|
28
|
+
assert (
|
29
|
+
repair_json('{"value_1": true, SHOULD_NOT_EXIST "value_2": "data" AAAA }')
|
30
|
+
== '{"value_1": true, "value_2": "data"}'
|
31
|
+
)
|
32
|
+
assert repair_json('{"" : true, "key2": "value2"}') == '{"": true, "key2": "value2"}'
|
33
|
+
assert (
|
34
|
+
repair_json("""{""answer"":[{""traits"":''Female aged 60+'',""answer1"":""5""}]}""")
|
35
|
+
== '{"answer": [{"traits": "Female aged 60+", "answer1": "5"}]}'
|
36
|
+
)
|
37
|
+
assert (
|
38
|
+
repair_json('{ "words": abcdef", "numbers": 12345", "words2": ghijkl" }')
|
39
|
+
== '{"words": "abcdef", "numbers": 12345, "words2": "ghijkl"}'
|
40
|
+
)
|
41
|
+
assert (
|
42
|
+
repair_json("""{"number": 1,"reason": "According...""ans": "YES"}""")
|
43
|
+
== '{"number": 1, "reason": "According...", "ans": "YES"}'
|
44
|
+
)
|
45
|
+
assert repair_json("""{ "a" : "{ b": {} }" }""") == '{"a": "{ b"}'
|
46
|
+
assert repair_json("""{"b": "xxxxx" true}""") == '{"b": "xxxxx"}'
|
47
|
+
assert repair_json('{"key": "Lorem "ipsum" s,"}') == '{"key": "Lorem \\"ipsum\\" s,"}'
|
48
|
+
assert repair_json('{"lorem": ipsum, sic, datum.",}') == '{"lorem": "ipsum, sic, datum."}'
|
49
|
+
assert (
|
50
|
+
repair_json('{"lorem": sic tamet. "ipsum": sic tamet, quick brown fox. "sic": ipsum}')
|
51
|
+
== '{"lorem": "sic tamet.", "ipsum": "sic tamet", "sic": "ipsum"}'
|
52
|
+
)
|
53
|
+
assert (
|
54
|
+
repair_json('{"lorem_ipsum": "sic tamet, quick brown fox. }')
|
55
|
+
== '{"lorem_ipsum": "sic tamet, quick brown fox."}'
|
56
|
+
)
|
57
|
+
assert repair_json('{"key":value, " key2":"value2" }') == '{"key": "value", " key2": "value2"}'
|
58
|
+
assert repair_json('{"key":value "key2":"value2" }') == '{"key": "value", "key2": "value2"}'
|
59
|
+
assert (
|
60
|
+
repair_json("{'text': 'words{words in brackets}more words'}")
|
61
|
+
== '{"text": "words{words in brackets}more words"}'
|
62
|
+
)
|
63
|
+
assert repair_json("{text:words{words in brackets}}") == '{"text": "words{words in brackets}"}'
|
64
|
+
assert repair_json("{text:words{words in brackets}m}") == '{"text": "words{words in brackets}m"}'
|
65
|
+
assert repair_json('{"key": "value, value2"```') == '{"key": "value, value2"}'
|
66
|
+
assert repair_json("{key:value,key2:value2}") == '{"key": "value", "key2": "value2"}'
|
67
|
+
assert repair_json('{"key:"value"}') == '{"key": "value"}'
|
68
|
+
assert repair_json('{"key:value}') == '{"key": "value"}'
|
69
|
+
assert (
|
70
|
+
repair_json('[{"lorem": {"ipsum": "sic"}, """" "lorem": {"ipsum": "sic"}]')
|
71
|
+
== '[{"lorem": {"ipsum": "sic"}}, {"lorem": {"ipsum": "sic"}}]'
|
72
|
+
)
|
73
|
+
assert (
|
74
|
+
repair_json('{ "key": ["arrayvalue"], ["arrayvalue1"], ["arrayvalue2"], "key3": "value3" }')
|
75
|
+
== '{"key": ["arrayvalue", "arrayvalue1", "arrayvalue2"], "key3": "value3"}'
|
76
|
+
)
|
77
|
+
assert (
|
78
|
+
repair_json('{ "key": ["arrayvalue"], "key3": "value3", ["arrayvalue1"] }')
|
79
|
+
== '{"key": ["arrayvalue"], "key3": "value3", "arrayvalue1": ""}'
|
80
|
+
)
|
81
|
+
assert (
|
82
|
+
repair_json('{"key": "{\\\\"key\\\\\\":[\\"value\\\\\\"],\\"key2\\":"value2"}"}')
|
83
|
+
== '{"key": "{\\"key\\":[\\"value\\"],\\"key2\\":\\"value2\\"}"}'
|
84
|
+
)
|
85
|
+
assert repair_json('{"key": , "key2": "value2"}') == '{"key": "", "key2": "value2"}'
|
@@ -0,0 +1,99 @@
|
|
1
|
+
from src.json_repair.json_repair import repair_json
|
2
|
+
|
3
|
+
|
4
|
+
def test_parse_string():
|
5
|
+
assert repair_json('"') == ""
|
6
|
+
assert repair_json("\n") == ""
|
7
|
+
assert repair_json(" ") == ""
|
8
|
+
assert repair_json("string") == ""
|
9
|
+
assert repair_json("stringbeforeobject {}") == "{}"
|
10
|
+
|
11
|
+
|
12
|
+
def test_missing_and_mixed_quotes():
|
13
|
+
assert (
|
14
|
+
repair_json("{'key': 'string', 'key2': false, \"key3\": null, \"key4\": unquoted}")
|
15
|
+
== '{"key": "string", "key2": false, "key3": null, "key4": "unquoted"}'
|
16
|
+
)
|
17
|
+
assert (
|
18
|
+
repair_json('{"name": "John", "age": 30, "city": "New York')
|
19
|
+
== '{"name": "John", "age": 30, "city": "New York"}'
|
20
|
+
)
|
21
|
+
assert (
|
22
|
+
repair_json('{"name": "John", "age": 30, city: "New York"}')
|
23
|
+
== '{"name": "John", "age": 30, "city": "New York"}'
|
24
|
+
)
|
25
|
+
assert (
|
26
|
+
repair_json('{"name": "John", "age": 30, "city": New York}')
|
27
|
+
== '{"name": "John", "age": 30, "city": "New York"}'
|
28
|
+
)
|
29
|
+
assert (
|
30
|
+
repair_json('{"name": John, "age": 30, "city": "New York"}')
|
31
|
+
== '{"name": "John", "age": 30, "city": "New York"}'
|
32
|
+
)
|
33
|
+
assert repair_json('{“slanted_delimiter”: "value"}') == '{"slanted_delimiter": "value"}'
|
34
|
+
assert repair_json('{"name": "John", "age": 30, "city": "New') == '{"name": "John", "age": 30, "city": "New"}'
|
35
|
+
assert (
|
36
|
+
repair_json('{"name": "John", "age": 30, "city": "New York, "gender": "male"}')
|
37
|
+
== '{"name": "John", "age": 30, "city": "New York", "gender": "male"}'
|
38
|
+
)
|
39
|
+
|
40
|
+
assert (
|
41
|
+
repair_json('[{"key": "value", COMMENT "notes": "lorem "ipsum", sic." }]')
|
42
|
+
== '[{"key": "value", "notes": "lorem \\"ipsum\\", sic."}]'
|
43
|
+
)
|
44
|
+
assert repair_json('{"key": ""value"}') == '{"key": "value"}'
|
45
|
+
assert repair_json('{"key": "value", 5: "value"}') == '{"key": "value", "5": "value"}'
|
46
|
+
assert repair_json('{"foo": "\\"bar\\""') == '{"foo": "\\"bar\\""}'
|
47
|
+
assert repair_json('{"" key":"val"') == '{" key": "val"}'
|
48
|
+
assert repair_json('{"key": value "key2" : "value2" ') == '{"key": "value", "key2": "value2"}'
|
49
|
+
assert (
|
50
|
+
repair_json('{"key": "lorem ipsum ... "sic " tamet. ...}') == '{"key": "lorem ipsum ... \\"sic \\" tamet. ..."}'
|
51
|
+
)
|
52
|
+
assert repair_json('{"key": value , }') == '{"key": "value"}'
|
53
|
+
assert (
|
54
|
+
repair_json('{"comment": "lorem, "ipsum" sic "tamet". To improve"}')
|
55
|
+
== '{"comment": "lorem, \\"ipsum\\" sic \\"tamet\\". To improve"}'
|
56
|
+
)
|
57
|
+
assert repair_json('{"key": "v"alu"e"} key:') == '{"key": "v\\"alu\\"e"}'
|
58
|
+
assert repair_json('{"key": "v"alue", "key2": "value2"}') == '{"key": "v\\"alue", "key2": "value2"}'
|
59
|
+
|
60
|
+
|
61
|
+
def test_escaping():
|
62
|
+
assert repair_json("'\"'") == ""
|
63
|
+
assert repair_json('{"key": \'string"\n\t\\le\'') == '{"key": "string\\"\\n\\t\\\\le"}'
|
64
|
+
assert (
|
65
|
+
repair_json(
|
66
|
+
r'{"real_content": "Some string: Some other string \t Some string <a href=\"https://domain.com\">Some link</a>"'
|
67
|
+
)
|
68
|
+
== r'{"real_content": "Some string: Some other string \t Some string <a href=\"https://domain.com\">Some link</a>"}'
|
69
|
+
)
|
70
|
+
assert repair_json('{"key_1\n": "value"}') == '{"key_1": "value"}'
|
71
|
+
assert repair_json('{"key\t_": "value"}') == '{"key\\t_": "value"}'
|
72
|
+
assert repair_json("{\"key\": '\u0076\u0061\u006c\u0075\u0065'}") == '{"key": "value"}'
|
73
|
+
assert repair_json('{"key": "\\u0076\\u0061\\u006C\\u0075\\u0065"}', skip_json_loads=True) == '{"key": "value"}'
|
74
|
+
|
75
|
+
|
76
|
+
def test_markdown():
|
77
|
+
assert (
|
78
|
+
repair_json('{ "content": "[LINK]("https://google.com")" }')
|
79
|
+
== '{"content": "[LINK](\\"https://google.com\\")"}'
|
80
|
+
)
|
81
|
+
assert repair_json('{ "content": "[LINK](" }') == '{"content": "[LINK]("}'
|
82
|
+
assert repair_json('{ "content": "[LINK](", "key": true }') == '{"content": "[LINK](", "key": true}'
|
83
|
+
|
84
|
+
|
85
|
+
def test_leading_trailing_characters():
|
86
|
+
assert repair_json('````{ "key": "value" }```') == '{"key": "value"}'
|
87
|
+
assert repair_json("""{ "a": "", "b": [ { "c": 1} ] \n}```""") == '{"a": "", "b": [{"c": 1}]}'
|
88
|
+
assert (
|
89
|
+
repair_json("Based on the information extracted, here is the filled JSON output: ```json { 'a': 'b' } ```")
|
90
|
+
== '{"a": "b"}'
|
91
|
+
)
|
92
|
+
assert (
|
93
|
+
repair_json("""
|
94
|
+
The next 64 elements are:
|
95
|
+
```json
|
96
|
+
{ "key": "value" }
|
97
|
+
```""")
|
98
|
+
== '{"key": "value"}'
|
99
|
+
)
|
@@ -0,0 +1,67 @@
|
|
1
|
+
import io
|
2
|
+
import os
|
3
|
+
import tempfile
|
4
|
+
from unittest.mock import patch
|
5
|
+
|
6
|
+
import pytest
|
7
|
+
|
8
|
+
from src.json_repair.json_repair import cli
|
9
|
+
|
10
|
+
|
11
|
+
def test_cli(capsys):
|
12
|
+
# Create a temporary file
|
13
|
+
temp_fd, temp_path = tempfile.mkstemp(suffix=".json")
|
14
|
+
try:
|
15
|
+
# Write content to the temporary file
|
16
|
+
with os.fdopen(temp_fd, "w") as tmp:
|
17
|
+
tmp.write("{key:value")
|
18
|
+
cli(inline_args=[temp_path, "--indent", 0, "--ensure_ascii"])
|
19
|
+
captured = capsys.readouterr()
|
20
|
+
assert captured.out == '{\n"key": "value"\n}\n'
|
21
|
+
|
22
|
+
# Test the output option
|
23
|
+
tempout_fd, tempout_path = tempfile.mkstemp(suffix=".json")
|
24
|
+
cli(inline_args=[temp_path, "--indent", 0, "-o", tempout_path])
|
25
|
+
with open(tempout_path) as tmp:
|
26
|
+
out = tmp.read()
|
27
|
+
assert out == '{\n"key": "value"\n}'
|
28
|
+
|
29
|
+
# Test the inline option
|
30
|
+
cli(inline_args=[temp_path, "--indent", 0, "-i"])
|
31
|
+
with open(temp_path) as tmp:
|
32
|
+
out = tmp.read()
|
33
|
+
assert out == '{\n"key": "value"\n}'
|
34
|
+
|
35
|
+
finally:
|
36
|
+
# Clean up - delete the temporary file
|
37
|
+
os.remove(temp_path)
|
38
|
+
os.remove(tempout_path)
|
39
|
+
|
40
|
+
# Prepare a JSON string that needs to be repaired.
|
41
|
+
test_input = "{key:value"
|
42
|
+
# Expected output when running cli with --indent 0.
|
43
|
+
expected_output = '{\n"key": "value"\n}\n'
|
44
|
+
# Patch sys.stdin so that cli() reads from it instead of a file.
|
45
|
+
with patch("sys.stdin", new=io.StringIO(test_input)):
|
46
|
+
cli(inline_args=["--indent", 0])
|
47
|
+
captured = capsys.readouterr()
|
48
|
+
assert captured.out == expected_output
|
49
|
+
|
50
|
+
|
51
|
+
def test_cli_inline_requires_filename(capsys):
|
52
|
+
"""cli() should exit with an error when --inline is passed without a filename."""
|
53
|
+
with pytest.raises(SystemExit) as exc:
|
54
|
+
cli(inline_args=["--inline"])
|
55
|
+
captured = capsys.readouterr()
|
56
|
+
assert captured.err.strip() == "Error: Inline mode requires a filename"
|
57
|
+
assert exc.value.code != 0
|
58
|
+
|
59
|
+
|
60
|
+
def test_cli_inline_and_output_error(tmp_path, capsys):
|
61
|
+
"""cli() should exit with an error when --inline and --output are used together."""
|
62
|
+
outfile = tmp_path / "out.json"
|
63
|
+
with pytest.raises(SystemExit) as exc:
|
64
|
+
cli(inline_args=["dummy.json", "--inline", "--output", str(outfile)])
|
65
|
+
captured = capsys.readouterr()
|
66
|
+
assert captured.err.strip() == "Error: You cannot pass both --inline and --output"
|
67
|
+
assert exc.value.code != 0
|
@@ -1,406 +1,8 @@
|
|
1
|
-
import io
|
2
1
|
import os.path
|
3
2
|
import pathlib
|
4
3
|
import tempfile
|
5
|
-
from unittest.mock import patch
|
6
4
|
|
7
|
-
import
|
8
|
-
|
9
|
-
from src.json_repair.json_repair import cli, from_file, loads, repair_json
|
10
|
-
|
11
|
-
|
12
|
-
def test_basic_types_valid():
|
13
|
-
assert repair_json("True", return_objects=True) == ""
|
14
|
-
assert repair_json("False", return_objects=True) == ""
|
15
|
-
assert repair_json("Null", return_objects=True) == ""
|
16
|
-
assert repair_json("1", return_objects=True) == 1
|
17
|
-
assert repair_json("[]", return_objects=True) == []
|
18
|
-
assert repair_json("[1, 2, 3, 4]", return_objects=True) == [1, 2, 3, 4]
|
19
|
-
assert repair_json("{}", return_objects=True) == {}
|
20
|
-
assert repair_json('{ "key": "value", "key2": 1, "key3": True }', return_objects=True) == {
|
21
|
-
"key": "value",
|
22
|
-
"key2": 1,
|
23
|
-
"key3": True,
|
24
|
-
}
|
25
|
-
|
26
|
-
|
27
|
-
def test_basic_types_invalid():
|
28
|
-
assert repair_json("true", return_objects=True)
|
29
|
-
assert not repair_json("false", return_objects=True)
|
30
|
-
assert repair_json("null", return_objects=True) is None
|
31
|
-
assert repair_json("1.2", return_objects=True) == 1.2
|
32
|
-
assert repair_json("[", return_objects=True) == []
|
33
|
-
assert repair_json("[1, 2, 3, 4", return_objects=True) == [1, 2, 3, 4]
|
34
|
-
assert repair_json("{", return_objects=True) == {}
|
35
|
-
assert repair_json('{ "key": value, "key2": 1 "key3": null }', return_objects=True) == {
|
36
|
-
"key": "value",
|
37
|
-
"key2": 1,
|
38
|
-
"key3": None,
|
39
|
-
}
|
40
|
-
|
41
|
-
|
42
|
-
def test_valid_json():
|
43
|
-
assert (
|
44
|
-
repair_json('{"name": "John", "age": 30, "city": "New York"}')
|
45
|
-
== '{"name": "John", "age": 30, "city": "New York"}'
|
46
|
-
)
|
47
|
-
assert repair_json('{"employees":["John", "Anna", "Peter"]} ') == '{"employees": ["John", "Anna", "Peter"]}'
|
48
|
-
assert repair_json('{"key": "value:value"}') == '{"key": "value:value"}'
|
49
|
-
assert repair_json('{"text": "The quick brown fox,"}') == '{"text": "The quick brown fox,"}'
|
50
|
-
assert repair_json('{"text": "The quick brown fox won\'t jump"}') == '{"text": "The quick brown fox won\'t jump"}'
|
51
|
-
assert repair_json('{"key": ""') == '{"key": ""}'
|
52
|
-
assert repair_json('{"key1": {"key2": [1, 2, 3]}}') == '{"key1": {"key2": [1, 2, 3]}}'
|
53
|
-
assert repair_json('{"key": 12345678901234567890}') == '{"key": 12345678901234567890}'
|
54
|
-
assert repair_json('{"key": "value\u263a"}') == '{"key": "value\\u263a"}'
|
55
|
-
assert repair_json('{"key": "value\\nvalue"}') == '{"key": "value\\nvalue"}'
|
56
|
-
|
57
|
-
|
58
|
-
def test_brackets_edge_cases():
|
59
|
-
assert repair_json("[{]") == "[{}]"
|
60
|
-
assert repair_json(" { } ") == "{}"
|
61
|
-
assert repair_json("[") == "[]"
|
62
|
-
assert repair_json("]") == ""
|
63
|
-
assert repair_json("{") == "{}"
|
64
|
-
assert repair_json("}") == ""
|
65
|
-
assert repair_json('{"') == "{}"
|
66
|
-
assert repair_json('["') == "[]"
|
67
|
-
assert repair_json("{foo: [}") == '{"foo": []}'
|
68
|
-
|
69
|
-
|
70
|
-
def test_general_edge_cases():
|
71
|
-
assert repair_json('"') == ""
|
72
|
-
assert repair_json("\n") == ""
|
73
|
-
assert repair_json(" ") == ""
|
74
|
-
assert repair_json("[[1\n\n]") == "[[1]]"
|
75
|
-
assert repair_json("string") == ""
|
76
|
-
assert repair_json("stringbeforeobject {}") == "{}"
|
77
|
-
assert repair_json("/") == ""
|
78
|
-
|
79
|
-
|
80
|
-
def test_mixed_data_types():
|
81
|
-
assert repair_json(' {"key": true, "key2": false, "key3": null}') == '{"key": true, "key2": false, "key3": null}'
|
82
|
-
assert repair_json('{"key": TRUE, "key2": FALSE, "key3": Null} ') == '{"key": true, "key2": false, "key3": null}'
|
83
|
-
|
84
|
-
|
85
|
-
def test_missing_and_mixed_quotes():
|
86
|
-
assert (
|
87
|
-
repair_json("{'key': 'string', 'key2': false, \"key3\": null, \"key4\": unquoted}")
|
88
|
-
== '{"key": "string", "key2": false, "key3": null, "key4": "unquoted"}'
|
89
|
-
)
|
90
|
-
assert (
|
91
|
-
repair_json('{"name": "John", "age": 30, "city": "New York')
|
92
|
-
== '{"name": "John", "age": 30, "city": "New York"}'
|
93
|
-
)
|
94
|
-
assert (
|
95
|
-
repair_json('{"name": "John", "age": 30, city: "New York"}')
|
96
|
-
== '{"name": "John", "age": 30, "city": "New York"}'
|
97
|
-
)
|
98
|
-
assert (
|
99
|
-
repair_json('{"name": "John", "age": 30, "city": New York}')
|
100
|
-
== '{"name": "John", "age": 30, "city": "New York"}'
|
101
|
-
)
|
102
|
-
assert (
|
103
|
-
repair_json('{"name": John, "age": 30, "city": "New York"}')
|
104
|
-
== '{"name": "John", "age": 30, "city": "New York"}'
|
105
|
-
)
|
106
|
-
assert repair_json('{“slanted_delimiter”: "value"}') == '{"slanted_delimiter": "value"}'
|
107
|
-
assert repair_json('{"name": "John", "age": 30, "city": "New') == '{"name": "John", "age": 30, "city": "New"}'
|
108
|
-
assert (
|
109
|
-
repair_json('{"name": "John", "age": 30, "city": "New York, "gender": "male"}')
|
110
|
-
== '{"name": "John", "age": 30, "city": "New York", "gender": "male"}'
|
111
|
-
)
|
112
|
-
|
113
|
-
assert (
|
114
|
-
repair_json('[{"key": "value", COMMENT "notes": "lorem "ipsum", sic." }]')
|
115
|
-
== '[{"key": "value", "notes": "lorem \\"ipsum\\", sic."}]'
|
116
|
-
)
|
117
|
-
assert repair_json('{"key": ""value"}') == '{"key": "value"}'
|
118
|
-
assert repair_json('{"key": "value", 5: "value"}') == '{"key": "value", "5": "value"}'
|
119
|
-
assert repair_json('{"foo": "\\"bar\\""') == '{"foo": "\\"bar\\""}'
|
120
|
-
assert repair_json('{"" key":"val"') == '{" key": "val"}'
|
121
|
-
assert repair_json('{"key": value "key2" : "value2" ') == '{"key": "value", "key2": "value2"}'
|
122
|
-
assert (
|
123
|
-
repair_json('{"key": "lorem ipsum ... "sic " tamet. ...}') == '{"key": "lorem ipsum ... \\"sic \\" tamet. ..."}'
|
124
|
-
)
|
125
|
-
assert repair_json('{"key": value , }') == '{"key": "value"}'
|
126
|
-
assert (
|
127
|
-
repair_json('{"comment": "lorem, "ipsum" sic "tamet". To improve"}')
|
128
|
-
== '{"comment": "lorem, \\"ipsum\\" sic \\"tamet\\". To improve"}'
|
129
|
-
)
|
130
|
-
assert repair_json('{"key": "v"alu"e"} key:') == '{"key": "v\\"alu\\"e"}'
|
131
|
-
|
132
|
-
|
133
|
-
def test_array_edge_cases():
|
134
|
-
assert repair_json("[1, 2, 3,") == "[1, 2, 3]"
|
135
|
-
assert repair_json("[1, 2, 3, ...]") == "[1, 2, 3]"
|
136
|
-
assert repair_json("[1, 2, ... , 3]") == "[1, 2, 3]"
|
137
|
-
assert repair_json("[1, 2, '...', 3]") == '[1, 2, "...", 3]'
|
138
|
-
assert repair_json("[true, false, null, ...]") == "[true, false, null]"
|
139
|
-
assert repair_json('["a" "b" "c" 1') == '["a", "b", "c", 1]'
|
140
|
-
assert repair_json('{"employees":["John", "Anna",') == '{"employees": ["John", "Anna"]}'
|
141
|
-
assert repair_json('{"employees":["John", "Anna", "Peter') == '{"employees": ["John", "Anna", "Peter"]}'
|
142
|
-
assert repair_json('{"key1": {"key2": [1, 2, 3') == '{"key1": {"key2": [1, 2, 3]}}'
|
143
|
-
assert repair_json('{"key": ["value]}') == '{"key": ["value"]}'
|
144
|
-
assert repair_json('["lorem "ipsum" sic"]') == '["lorem \\"ipsum\\" sic"]'
|
145
|
-
assert (
|
146
|
-
repair_json('{"key1": ["value1", "value2"}, "key2": ["value3", "value4"]}')
|
147
|
-
== '{"key1": ["value1", "value2"], "key2": ["value3", "value4"]}'
|
148
|
-
)
|
149
|
-
assert repair_json('{"key": ["value" "value1" "value2"]}') == '{"key": ["value", "value1", "value2"]}'
|
150
|
-
assert (
|
151
|
-
repair_json('{"key": ["lorem "ipsum" dolor "sit" amet, "consectetur" ", "lorem "ipsum" dolor", "lorem"]}')
|
152
|
-
== '{"key": ["lorem \\"ipsum\\" dolor \\"sit\\" amet, \\"consectetur\\" ", "lorem \\"ipsum\\" dolor", "lorem"]}'
|
153
|
-
)
|
154
|
-
assert repair_json('{"k"e"y": "value"}') == '{"k\\"e\\"y": "value"}'
|
155
|
-
assert repair_json('["key":"value"}]') == '[{"key": "value"}]'
|
156
|
-
|
157
|
-
|
158
|
-
def test_escaping():
|
159
|
-
assert repair_json("'\"'") == ""
|
160
|
-
assert repair_json('{"key": \'string"\n\t\\le\'') == '{"key": "string\\"\\n\\t\\\\le"}'
|
161
|
-
assert (
|
162
|
-
repair_json(
|
163
|
-
r'{"real_content": "Some string: Some other string \t Some string <a href=\"https://domain.com\">Some link</a>"'
|
164
|
-
)
|
165
|
-
== r'{"real_content": "Some string: Some other string \t Some string <a href=\"https://domain.com\">Some link</a>"}'
|
166
|
-
)
|
167
|
-
assert repair_json('{"key_1\n": "value"}') == '{"key_1": "value"}'
|
168
|
-
assert repair_json('{"key\t_": "value"}') == '{"key\\t_": "value"}'
|
169
|
-
assert repair_json("{\"key\": '\u0076\u0061\u006c\u0075\u0065'}") == '{"key": "value"}'
|
170
|
-
assert repair_json('{"key": "\\u0076\\u0061\\u006C\\u0075\\u0065"}', skip_json_loads=True) == '{"key": "value"}'
|
171
|
-
|
172
|
-
|
173
|
-
def test_object_edge_cases():
|
174
|
-
assert repair_json("{ ") == "{}"
|
175
|
-
assert repair_json('{"": "value"') == '{"": "value"}'
|
176
|
-
assert repair_json('{"value_1": true, COMMENT "value_2": "data"}') == '{"value_1": true, "value_2": "data"}'
|
177
|
-
assert (
|
178
|
-
repair_json('{"value_1": true, SHOULD_NOT_EXIST "value_2": "data" AAAA }')
|
179
|
-
== '{"value_1": true, "value_2": "data"}'
|
180
|
-
)
|
181
|
-
assert repair_json('{"" : true, "key2": "value2"}') == '{"": true, "key2": "value2"}'
|
182
|
-
assert (
|
183
|
-
repair_json("""{""answer"":[{""traits"":''Female aged 60+'',""answer1"":""5""}]}""")
|
184
|
-
== '{"answer": [{"traits": "Female aged 60+", "answer1": "5"}]}'
|
185
|
-
)
|
186
|
-
assert (
|
187
|
-
repair_json('{ "words": abcdef", "numbers": 12345", "words2": ghijkl" }')
|
188
|
-
== '{"words": "abcdef", "numbers": 12345, "words2": "ghijkl"}'
|
189
|
-
)
|
190
|
-
assert (
|
191
|
-
repair_json("""{"number": 1,"reason": "According...""ans": "YES"}""")
|
192
|
-
== '{"number": 1, "reason": "According...", "ans": "YES"}'
|
193
|
-
)
|
194
|
-
assert repair_json("""{ "a" : "{ b": {} }" }""") == '{"a": "{ b"}'
|
195
|
-
assert repair_json("""{"b": "xxxxx" true}""") == '{"b": "xxxxx"}'
|
196
|
-
assert repair_json('{"key": "Lorem "ipsum" s,"}') == '{"key": "Lorem \\"ipsum\\" s,"}'
|
197
|
-
assert repair_json('{"lorem": ipsum, sic, datum.",}') == '{"lorem": "ipsum, sic, datum."}'
|
198
|
-
assert (
|
199
|
-
repair_json('{"lorem": sic tamet. "ipsum": sic tamet, quick brown fox. "sic": ipsum}')
|
200
|
-
== '{"lorem": "sic tamet.", "ipsum": "sic tamet", "sic": "ipsum"}'
|
201
|
-
)
|
202
|
-
assert (
|
203
|
-
repair_json('{"lorem_ipsum": "sic tamet, quick brown fox. }')
|
204
|
-
== '{"lorem_ipsum": "sic tamet, quick brown fox."}'
|
205
|
-
)
|
206
|
-
assert repair_json('{"key":value, " key2":"value2" }') == '{"key": "value", " key2": "value2"}'
|
207
|
-
assert repair_json('{"key":value "key2":"value2" }') == '{"key": "value", "key2": "value2"}'
|
208
|
-
assert (
|
209
|
-
repair_json("{'text': 'words{words in brackets}more words'}")
|
210
|
-
== '{"text": "words{words in brackets}more words"}'
|
211
|
-
)
|
212
|
-
assert repair_json("{text:words{words in brackets}}") == '{"text": "words{words in brackets}"}'
|
213
|
-
assert repair_json("{text:words{words in brackets}m}") == '{"text": "words{words in brackets}m"}'
|
214
|
-
assert repair_json('{"key": "value, value2"```') == '{"key": "value, value2"}'
|
215
|
-
assert repair_json("{key:value,key2:value2}") == '{"key": "value", "key2": "value2"}'
|
216
|
-
assert repair_json('{"key:"value"}') == '{"key": "value"}'
|
217
|
-
assert repair_json('{"key:value}') == '{"key": "value"}'
|
218
|
-
assert (
|
219
|
-
repair_json('[{"lorem": {"ipsum": "sic"}, """" "lorem": {"ipsum": "sic"}]')
|
220
|
-
== '[{"lorem": {"ipsum": "sic"}}, {"lorem": {"ipsum": "sic"}}]'
|
221
|
-
)
|
222
|
-
assert (
|
223
|
-
repair_json('{ "key": ["arrayvalue"], ["arrayvalue1"], ["arrayvalue2"], "key3": "value3" }')
|
224
|
-
== '{"key": ["arrayvalue", "arrayvalue1", "arrayvalue2"], "key3": "value3"}'
|
225
|
-
)
|
226
|
-
assert (
|
227
|
-
repair_json('{ "key": ["arrayvalue"], "key3": "value3", ["arrayvalue1"] }')
|
228
|
-
== '{"key": ["arrayvalue"], "key3": "value3", "arrayvalue1": ""}'
|
229
|
-
)
|
230
|
-
assert (
|
231
|
-
repair_json('{"key": "{\\\\"key\\\\\\":[\\"value\\\\\\"],\\"key2\\":"value2"}"}')
|
232
|
-
== '{"key": "{\\"key\\":[\\"value\\"],\\"key2\\":\\"value2\\"}"}'
|
233
|
-
)
|
234
|
-
assert repair_json('{"key": , "key2": "value2"}') == '{"key": "", "key2": "value2"}'
|
235
|
-
|
236
|
-
|
237
|
-
def test_number_edge_cases():
|
238
|
-
assert (
|
239
|
-
repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'
|
240
|
-
)
|
241
|
-
assert repair_json('{"key": 1/3}') == '{"key": "1/3"}'
|
242
|
-
assert repair_json('{"key": .25}') == '{"key": 0.25}'
|
243
|
-
assert repair_json('{"here": "now", "key": 1/3, "foo": "bar"}') == '{"here": "now", "key": "1/3", "foo": "bar"}'
|
244
|
-
assert repair_json('{"key": 12345/67890}') == '{"key": "12345/67890"}'
|
245
|
-
assert repair_json("[105,12") == "[105, 12]"
|
246
|
-
assert repair_json('{"key", 105,12,') == '{"key": "105,12"}'
|
247
|
-
assert repair_json('{"key": 1/3, "foo": "bar"}') == '{"key": "1/3", "foo": "bar"}'
|
248
|
-
assert repair_json('{"key": 10-20}') == '{"key": "10-20"}'
|
249
|
-
assert repair_json('{"key": 1.1.1}') == '{"key": "1.1.1"}'
|
250
|
-
assert repair_json("[- ") == "[]"
|
251
|
-
assert repair_json('{"key": 1. }') == '{"key": 1.0}'
|
252
|
-
assert repair_json('{"key": 1e10 }') == '{"key": 10000000000.0}'
|
253
|
-
assert repair_json('{"key": 1e }') == '{"key": 1}'
|
254
|
-
assert repair_json('{"key": 1notanumber }') == '{"key": "1notanumber"}'
|
255
|
-
assert repair_json("[1, 2notanumber]") == '[1, "2notanumber"]'
|
256
|
-
|
257
|
-
|
258
|
-
def test_markdown():
|
259
|
-
assert (
|
260
|
-
repair_json('{ "content": "[LINK]("https://google.com")" }')
|
261
|
-
== '{"content": "[LINK](\\"https://google.com\\")"}'
|
262
|
-
)
|
263
|
-
assert repair_json('{ "content": "[LINK](" }') == '{"content": "[LINK]("}'
|
264
|
-
assert repair_json('{ "content": "[LINK](", "key": true }') == '{"content": "[LINK](", "key": true}'
|
265
|
-
|
266
|
-
|
267
|
-
def test_leading_trailing_characters():
|
268
|
-
assert repair_json('````{ "key": "value" }```') == '{"key": "value"}'
|
269
|
-
assert repair_json("""{ "a": "", "b": [ { "c": 1} ] \n}```""") == '{"a": "", "b": [{"c": 1}]}'
|
270
|
-
assert (
|
271
|
-
repair_json("Based on the information extracted, here is the filled JSON output: ```json { 'a': 'b' } ```")
|
272
|
-
== '{"a": "b"}'
|
273
|
-
)
|
274
|
-
assert (
|
275
|
-
repair_json("""
|
276
|
-
The next 64 elements are:
|
277
|
-
```json
|
278
|
-
{ "key": "value" }
|
279
|
-
```""")
|
280
|
-
== '{"key": "value"}'
|
281
|
-
)
|
282
|
-
|
283
|
-
|
284
|
-
def test_multiple_jsons():
|
285
|
-
assert repair_json("[]{}") == "[[], {}]"
|
286
|
-
assert repair_json("{}[]{}") == "[{}, [], {}]"
|
287
|
-
assert repair_json('{"key":"value"}[1,2,3,True]') == '[{"key": "value"}, [1, 2, 3, true]]'
|
288
|
-
assert (
|
289
|
-
repair_json('lorem ```json {"key":"value"} ``` ipsum ```json [1,2,3,True] ``` 42')
|
290
|
-
== '[{"key": "value"}, [1, 2, 3, true]]'
|
291
|
-
)
|
292
|
-
assert repair_json('[{"key":"value"}][{"key":"value_after"}]') == '[{"key": "value_after"}]'
|
293
|
-
|
294
|
-
|
295
|
-
def test_repair_json_with_objects():
|
296
|
-
# Test with valid JSON strings
|
297
|
-
assert repair_json("[]", return_objects=True) == []
|
298
|
-
assert repair_json("{}", return_objects=True) == {}
|
299
|
-
assert repair_json('{"key": true, "key2": false, "key3": null}', return_objects=True) == {
|
300
|
-
"key": True,
|
301
|
-
"key2": False,
|
302
|
-
"key3": None,
|
303
|
-
}
|
304
|
-
assert repair_json('{"name": "John", "age": 30, "city": "New York"}', return_objects=True) == {
|
305
|
-
"name": "John",
|
306
|
-
"age": 30,
|
307
|
-
"city": "New York",
|
308
|
-
}
|
309
|
-
assert repair_json("[1, 2, 3, 4]", return_objects=True) == [1, 2, 3, 4]
|
310
|
-
assert repair_json('{"employees":["John", "Anna", "Peter"]} ', return_objects=True) == {
|
311
|
-
"employees": ["John", "Anna", "Peter"]
|
312
|
-
}
|
313
|
-
assert repair_json(
|
314
|
-
"""
|
315
|
-
{
|
316
|
-
"resourceType": "Bundle",
|
317
|
-
"id": "1",
|
318
|
-
"type": "collection",
|
319
|
-
"entry": [
|
320
|
-
{
|
321
|
-
"resource": {
|
322
|
-
"resourceType": "Patient",
|
323
|
-
"id": "1",
|
324
|
-
"name": [
|
325
|
-
{"use": "official", "family": "Corwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."},
|
326
|
-
{"use": "maiden", "family": "Goodwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."]}
|
327
|
-
]
|
328
|
-
}
|
329
|
-
}
|
330
|
-
]
|
331
|
-
}
|
332
|
-
""",
|
333
|
-
return_objects=True,
|
334
|
-
) == {
|
335
|
-
"resourceType": "Bundle",
|
336
|
-
"id": "1",
|
337
|
-
"type": "collection",
|
338
|
-
"entry": [
|
339
|
-
{
|
340
|
-
"resource": {
|
341
|
-
"resourceType": "Patient",
|
342
|
-
"id": "1",
|
343
|
-
"name": [
|
344
|
-
{
|
345
|
-
"use": "official",
|
346
|
-
"family": "Corwin",
|
347
|
-
"given": ["Keisha", "Sunny"],
|
348
|
-
"prefix": ["Mrs."],
|
349
|
-
},
|
350
|
-
{
|
351
|
-
"use": "maiden",
|
352
|
-
"family": "Goodwin",
|
353
|
-
"given": ["Keisha", "Sunny"],
|
354
|
-
"prefix": ["Mrs."],
|
355
|
-
},
|
356
|
-
],
|
357
|
-
}
|
358
|
-
}
|
359
|
-
],
|
360
|
-
}
|
361
|
-
assert repair_json(
|
362
|
-
'{\n"html": "<h3 id="aaa">Waarom meer dan 200 Technical Experts - "Passie voor techniek"?</h3>"}',
|
363
|
-
return_objects=True,
|
364
|
-
) == {"html": '<h3 id="aaa">Waarom meer dan 200 Technical Experts - "Passie voor techniek"?</h3>'}
|
365
|
-
assert repair_json(
|
366
|
-
"""
|
367
|
-
[
|
368
|
-
{
|
369
|
-
"foo": "Foo bar baz",
|
370
|
-
"tag": "#foo-bar-baz"
|
371
|
-
},
|
372
|
-
{
|
373
|
-
"foo": "foo bar "foobar" foo bar baz.",
|
374
|
-
"tag": "#foo-bar-foobar"
|
375
|
-
}
|
376
|
-
]
|
377
|
-
""",
|
378
|
-
return_objects=True,
|
379
|
-
) == [
|
380
|
-
{"foo": "Foo bar baz", "tag": "#foo-bar-baz"},
|
381
|
-
{"foo": 'foo bar "foobar" foo bar baz.', "tag": "#foo-bar-foobar"},
|
382
|
-
]
|
383
|
-
|
384
|
-
|
385
|
-
def test_repair_json_skip_json_loads():
|
386
|
-
assert (
|
387
|
-
repair_json('{"key": true, "key2": false, "key3": null}', skip_json_loads=True)
|
388
|
-
== '{"key": true, "key2": false, "key3": null}'
|
389
|
-
)
|
390
|
-
assert repair_json(
|
391
|
-
'{"key": true, "key2": false, "key3": null}',
|
392
|
-
return_objects=True,
|
393
|
-
skip_json_loads=True,
|
394
|
-
) == {"key": True, "key2": False, "key3": None}
|
395
|
-
assert (
|
396
|
-
repair_json('{"key": true, "key2": false, "key3": }', skip_json_loads=True)
|
397
|
-
== '{"key": true, "key2": false, "key3": ""}'
|
398
|
-
)
|
399
|
-
assert loads('{"key": true, "key2": false, "key3": }', skip_json_loads=True) == {
|
400
|
-
"key": True,
|
401
|
-
"key2": False,
|
402
|
-
"key3": "",
|
403
|
-
}
|
5
|
+
from src.json_repair.json_repair import from_file
|
404
6
|
|
405
7
|
|
406
8
|
def test_repair_json_from_file():
|
@@ -891,83 +493,3 @@ def test_repair_json_from_file():
|
|
891
493
|
finally:
|
892
494
|
# Clean up - delete the temporary file
|
893
495
|
os.remove(temp_path)
|
894
|
-
|
895
|
-
|
896
|
-
def test_ensure_ascii():
|
897
|
-
assert repair_json("{'test_中国人_ascii':'统一码'}", ensure_ascii=False) == '{"test_中国人_ascii": "统一码"}'
|
898
|
-
|
899
|
-
|
900
|
-
def test_stream_stable():
|
901
|
-
# default: stream_stable = False
|
902
|
-
# When the json to be repaired is the accumulation of streaming json at a certain moment.
|
903
|
-
# The default repair result is unstable.
|
904
|
-
assert repair_json('{"key": "val\\', stream_stable=False) == '{"key": "val\\\\"}'
|
905
|
-
assert repair_json('{"key": "val\\n', stream_stable=False) == '{"key": "val"}'
|
906
|
-
assert (
|
907
|
-
repair_json('{"key": "val\\n123,`key2:value2', stream_stable=False) == '{"key": "val\\n123", "key2": "value2"}'
|
908
|
-
)
|
909
|
-
assert repair_json('{"key": "val\\n123,`key2:value2`"}', stream_stable=True) == '{"key": "val\\n123,`key2:value2`"}'
|
910
|
-
# stream_stable = True
|
911
|
-
assert repair_json('{"key": "val\\', stream_stable=True) == '{"key": "val"}'
|
912
|
-
assert repair_json('{"key": "val\\n', stream_stable=True) == '{"key": "val\\n"}'
|
913
|
-
assert repair_json('{"key": "val\\n123,`key2:value2', stream_stable=True) == '{"key": "val\\n123,`key2:value2"}'
|
914
|
-
assert repair_json('{"key": "val\\n123,`key2:value2`"}', stream_stable=True) == '{"key": "val\\n123,`key2:value2`"}'
|
915
|
-
|
916
|
-
|
917
|
-
def test_cli(capsys):
|
918
|
-
# Create a temporary file
|
919
|
-
temp_fd, temp_path = tempfile.mkstemp(suffix=".json")
|
920
|
-
try:
|
921
|
-
# Write content to the temporary file
|
922
|
-
with os.fdopen(temp_fd, "w") as tmp:
|
923
|
-
tmp.write("{key:value")
|
924
|
-
cli(inline_args=[temp_path, "--indent", 0, "--ensure_ascii"])
|
925
|
-
captured = capsys.readouterr()
|
926
|
-
assert captured.out == '{\n"key": "value"\n}\n'
|
927
|
-
|
928
|
-
# Test the output option
|
929
|
-
tempout_fd, tempout_path = tempfile.mkstemp(suffix=".json")
|
930
|
-
cli(inline_args=[temp_path, "--indent", 0, "-o", tempout_path])
|
931
|
-
with open(tempout_path) as tmp:
|
932
|
-
out = tmp.read()
|
933
|
-
assert out == '{\n"key": "value"\n}'
|
934
|
-
|
935
|
-
# Test the inline option
|
936
|
-
cli(inline_args=[temp_path, "--indent", 0, "-i"])
|
937
|
-
with open(temp_path) as tmp:
|
938
|
-
out = tmp.read()
|
939
|
-
assert out == '{\n"key": "value"\n}'
|
940
|
-
|
941
|
-
finally:
|
942
|
-
# Clean up - delete the temporary file
|
943
|
-
os.remove(temp_path)
|
944
|
-
os.remove(tempout_path)
|
945
|
-
|
946
|
-
# Prepare a JSON string that needs to be repaired.
|
947
|
-
test_input = "{key:value"
|
948
|
-
# Expected output when running cli with --indent 0.
|
949
|
-
expected_output = '{\n"key": "value"\n}\n'
|
950
|
-
# Patch sys.stdin so that cli() reads from it instead of a file.
|
951
|
-
with patch("sys.stdin", new=io.StringIO(test_input)):
|
952
|
-
cli(inline_args=["--indent", 0])
|
953
|
-
captured = capsys.readouterr()
|
954
|
-
assert captured.out == expected_output
|
955
|
-
|
956
|
-
|
957
|
-
def test_cli_inline_requires_filename(capsys):
|
958
|
-
"""cli() should exit with an error when --inline is passed without a filename."""
|
959
|
-
with pytest.raises(SystemExit) as exc:
|
960
|
-
cli(inline_args=["--inline"])
|
961
|
-
captured = capsys.readouterr()
|
962
|
-
assert captured.err.strip() == "Error: Inline mode requires a filename"
|
963
|
-
assert exc.value.code != 0
|
964
|
-
|
965
|
-
|
966
|
-
def test_cli_inline_and_output_error(tmp_path, capsys):
|
967
|
-
"""cli() should exit with an error when --inline and --output are used together."""
|
968
|
-
outfile = tmp_path / "out.json"
|
969
|
-
with pytest.raises(SystemExit) as exc:
|
970
|
-
cli(inline_args=["dummy.json", "--inline", "--output", str(outfile)])
|
971
|
-
captured = capsys.readouterr()
|
972
|
-
assert captured.err.strip() == "Error: You cannot pass both --inline and --output"
|
973
|
-
assert exc.value.code != 0
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|