json-repair 0.47.5__tar.gz → 0.47.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {json_repair-0.47.5/src/json_repair.egg-info → json_repair-0.47.7}/PKG-INFO +1 -1
- {json_repair-0.47.5 → json_repair-0.47.7}/pyproject.toml +1 -1
- {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/json_parser.py +23 -12
- {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/parse_array.py +6 -1
- {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/parse_boolean_or_null.py +7 -1
- {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/parse_comment.py +7 -1
- {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/parse_number.py +7 -2
- {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/parse_object.py +17 -7
- {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/parse_string.py +24 -2
- {json_repair-0.47.5 → json_repair-0.47.7/src/json_repair.egg-info}/PKG-INFO +1 -1
- {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair.egg-info/SOURCES.txt +8 -1
- json_repair-0.47.7/tests/test_json_repair.py +161 -0
- json_repair-0.47.7/tests/test_parse_array.py +37 -0
- json_repair-0.47.7/tests/test_parse_boolean_or_null.py +12 -0
- {json_repair-0.47.5 → json_repair-0.47.7}/tests/test_parse_comment.py +1 -0
- json_repair-0.47.7/tests/test_parse_number.py +27 -0
- json_repair-0.47.7/tests/test_parse_object.py +85 -0
- json_repair-0.47.7/tests/test_parse_string.py +99 -0
- json_repair-0.47.7/tests/test_repair_json_cli.py +67 -0
- json_repair-0.47.5/tests/test_json_repair.py → json_repair-0.47.7/tests/test_repair_json_from_file.py +1 -479
- {json_repair-0.47.5 → json_repair-0.47.7}/LICENSE +0 -0
- {json_repair-0.47.5 → json_repair-0.47.7}/README.md +0 -0
- {json_repair-0.47.5 → json_repair-0.47.7}/setup.cfg +0 -0
- {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/__init__.py +0 -0
- {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/__main__.py +0 -0
- {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/constants.py +0 -0
- {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/json_context.py +0 -0
- {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/json_repair.py +0 -0
- {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/object_comparer.py +0 -0
- {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/py.typed +0 -0
- {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/string_file_wrapper.py +0 -0
- {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair.egg-info/entry_points.txt +0 -0
- {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair.egg-info/top_level.txt +0 -0
- {json_repair-0.47.5 → json_repair-0.47.7}/tests/test_performance.py +0 -0
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.47.
|
6
|
+
version = "0.47.7"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -3,23 +3,34 @@ from typing import Literal, TextIO
|
|
3
3
|
from .constants import STRING_DELIMITERS, JSONReturnType
|
4
4
|
from .json_context import JsonContext
|
5
5
|
from .object_comparer import ObjectComparer
|
6
|
-
from .parse_array import parse_array
|
7
|
-
from .parse_boolean_or_null import parse_boolean_or_null
|
8
|
-
from .parse_comment import parse_comment
|
9
|
-
from .parse_number import parse_number
|
10
|
-
from .parse_object import parse_object
|
11
|
-
from .parse_string import parse_string
|
6
|
+
from .parse_array import parse_array as _parse_array
|
7
|
+
from .parse_boolean_or_null import parse_boolean_or_null as _parse_boolean_or_null
|
8
|
+
from .parse_comment import parse_comment as _parse_comment
|
9
|
+
from .parse_number import parse_number as _parse_number
|
10
|
+
from .parse_object import parse_object as _parse_object
|
11
|
+
from .parse_string import parse_string as _parse_string
|
12
12
|
from .string_file_wrapper import StringFileWrapper
|
13
13
|
|
14
14
|
|
15
15
|
class JSONParser:
|
16
16
|
# Split the parse methods into separate files because this one was like 3000 lines
|
17
|
-
parse_array
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
17
|
+
def parse_array(self, *args, **kwargs):
|
18
|
+
return _parse_array(self, *args, **kwargs)
|
19
|
+
|
20
|
+
def parse_boolean_or_null(self, *args, **kwargs):
|
21
|
+
return _parse_boolean_or_null(self, *args, **kwargs)
|
22
|
+
|
23
|
+
def parse_comment(self, *args, **kwargs):
|
24
|
+
return _parse_comment(self, *args, **kwargs)
|
25
|
+
|
26
|
+
def parse_number(self, *args, **kwargs):
|
27
|
+
return _parse_number(self, *args, **kwargs)
|
28
|
+
|
29
|
+
def parse_object(self, *args, **kwargs):
|
30
|
+
return _parse_object(self, *args, **kwargs)
|
31
|
+
|
32
|
+
def parse_string(self, *args, **kwargs):
|
33
|
+
return _parse_string(self, *args, **kwargs)
|
23
34
|
|
24
35
|
def __init__(
|
25
36
|
self,
|
@@ -1,8 +1,13 @@
|
|
1
|
+
from typing import TYPE_CHECKING
|
2
|
+
|
1
3
|
from .constants import STRING_DELIMITERS, JSONReturnType
|
2
4
|
from .json_context import ContextValues
|
3
5
|
|
6
|
+
if TYPE_CHECKING:
|
7
|
+
from .json_parser import JSONParser
|
8
|
+
|
4
9
|
|
5
|
-
def parse_array(self) -> list[JSONReturnType]:
|
10
|
+
def parse_array(self: "JSONParser") -> list[JSONReturnType]:
|
6
11
|
# <array> ::= '[' [ <json> *(', ' <json>) ] ']' ; A sequence of JSON values separated by commas
|
7
12
|
arr = []
|
8
13
|
self.context.set(ContextValues.ARRAY)
|
@@ -1,4 +1,10 @@
|
|
1
|
-
|
1
|
+
from typing import TYPE_CHECKING
|
2
|
+
|
3
|
+
if TYPE_CHECKING:
|
4
|
+
from .json_parser import JSONParser
|
5
|
+
|
6
|
+
|
7
|
+
def parse_boolean_or_null(self: "JSONParser") -> bool | str | None:
|
2
8
|
# <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
|
3
9
|
starting_index = self.index
|
4
10
|
char = (self.get_char_at() or "").lower()
|
@@ -1,7 +1,13 @@
|
|
1
|
+
from typing import TYPE_CHECKING
|
2
|
+
|
3
|
+
from .constants import JSONReturnType
|
1
4
|
from .json_context import ContextValues
|
2
5
|
|
6
|
+
if TYPE_CHECKING:
|
7
|
+
from .json_parser import JSONParser
|
8
|
+
|
3
9
|
|
4
|
-
def parse_comment(self) ->
|
10
|
+
def parse_comment(self: "JSONParser") -> JSONReturnType:
|
5
11
|
"""
|
6
12
|
Parse code-like comments:
|
7
13
|
|
@@ -1,10 +1,15 @@
|
|
1
|
-
from
|
1
|
+
from typing import TYPE_CHECKING
|
2
|
+
|
2
3
|
from .json_context import ContextValues
|
3
4
|
|
4
5
|
NUMBER_CHARS: set[str] = set("0123456789-.eE/,")
|
5
6
|
|
6
7
|
|
7
|
-
|
8
|
+
if TYPE_CHECKING:
|
9
|
+
from .json_parser import JSONParser
|
10
|
+
|
11
|
+
|
12
|
+
def parse_number(self: "JSONParser") -> float | int | str | bool | None:
|
8
13
|
# <number> is a valid real number expressed in one of a number of given formats
|
9
14
|
number_str = ""
|
10
15
|
char = self.get_char_at()
|
@@ -1,8 +1,13 @@
|
|
1
|
+
from typing import TYPE_CHECKING
|
2
|
+
|
1
3
|
from .constants import JSONReturnType
|
2
4
|
from .json_context import ContextValues
|
3
5
|
|
6
|
+
if TYPE_CHECKING:
|
7
|
+
from .json_parser import JSONParser
|
8
|
+
|
4
9
|
|
5
|
-
def parse_object(self) -> dict[str, JSONReturnType]:
|
10
|
+
def parse_object(self: "JSONParser") -> dict[str, JSONReturnType]:
|
6
11
|
# <object> ::= '{' [ <member> *(', ' <member>) ] '}' ; A sequence of 'members'
|
7
12
|
obj: dict[str, JSONReturnType] = {}
|
8
13
|
# Stop when you either find the closing parentheses or you have iterated over the entire string
|
@@ -59,12 +64,17 @@ def parse_object(self) -> dict[str, JSONReturnType]:
|
|
59
64
|
# If the string is empty but there is a object divider, we are done here
|
60
65
|
break
|
61
66
|
if ContextValues.ARRAY in self.context.context and key in obj:
|
62
|
-
self.
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
67
|
+
if self.stream_stable:
|
68
|
+
# This is possibly another problem, the key is incomplete and it "appears" duplicate
|
69
|
+
# Let's just do nothing
|
70
|
+
pass
|
71
|
+
else:
|
72
|
+
self.log(
|
73
|
+
"While parsing an object we found a duplicate key, closing the object here and rolling back the index",
|
74
|
+
)
|
75
|
+
self.index = rollback_index - 1
|
76
|
+
# add an opening curly brace to make this work
|
77
|
+
self.json_str = self.json_str[: self.index + 1] + "{" + self.json_str[self.index + 1 :]
|
68
78
|
break
|
69
79
|
|
70
80
|
# Skip filler whitespaces
|
@@ -1,8 +1,13 @@
|
|
1
|
+
from typing import TYPE_CHECKING
|
2
|
+
|
1
3
|
from .constants import STRING_DELIMITERS
|
2
4
|
from .json_context import ContextValues
|
3
5
|
|
6
|
+
if TYPE_CHECKING:
|
7
|
+
from .json_parser import JSONParser
|
8
|
+
|
4
9
|
|
5
|
-
def parse_string(self) -> str | bool | None:
|
10
|
+
def parse_string(self: "JSONParser") -> str | bool | None:
|
6
11
|
# <string> is a string of valid characters enclosed in quotes
|
7
12
|
# i.e. { name: "John" }
|
8
13
|
# Somehow all weird cases in an invalid JSON happen to be resolved in this function, so be careful here
|
@@ -328,7 +333,24 @@ def parse_string(self) -> str | bool | None:
|
|
328
333
|
if all(str(self.get_char_at(j)).isspace() for j in range(1, i) if self.get_char_at(j)):
|
329
334
|
break
|
330
335
|
if self.context.current == ContextValues.OBJECT_VALUE:
|
331
|
-
|
336
|
+
i = self.skip_whitespaces_at(idx=i + 1, move_main_index=False)
|
337
|
+
if self.get_char_at(i) == ",":
|
338
|
+
# So we found a comma, this could be a case of a single quote like "va"lue",
|
339
|
+
# Search if it's followed by another key, starting with the first delimeter
|
340
|
+
i = self.skip_to_character(character=lstring_delimiter, idx=i + 1)
|
341
|
+
i += 1
|
342
|
+
i = self.skip_to_character(character=rstring_delimiter, idx=i + 1)
|
343
|
+
i += 1
|
344
|
+
i = self.skip_whitespaces_at(idx=i, move_main_index=False)
|
345
|
+
next_c = self.get_char_at(i)
|
346
|
+
if next_c == ":":
|
347
|
+
self.log(
|
348
|
+
"While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
|
349
|
+
)
|
350
|
+
string_acc += str(char)
|
351
|
+
self.index += 1
|
352
|
+
char = self.get_char_at()
|
353
|
+
continue
|
332
354
|
# We found a delimiter and we need to check if this is a key
|
333
355
|
# so find a rstring_delimiter and a colon after
|
334
356
|
i = self.skip_to_character(character=rstring_delimiter, idx=i + 1)
|
@@ -22,5 +22,12 @@ src/json_repair.egg-info/dependency_links.txt
|
|
22
22
|
src/json_repair.egg-info/entry_points.txt
|
23
23
|
src/json_repair.egg-info/top_level.txt
|
24
24
|
tests/test_json_repair.py
|
25
|
+
tests/test_parse_array.py
|
26
|
+
tests/test_parse_boolean_or_null.py
|
25
27
|
tests/test_parse_comment.py
|
26
|
-
tests/
|
28
|
+
tests/test_parse_number.py
|
29
|
+
tests/test_parse_object.py
|
30
|
+
tests/test_parse_string.py
|
31
|
+
tests/test_performance.py
|
32
|
+
tests/test_repair_json_cli.py
|
33
|
+
tests/test_repair_json_from_file.py
|
@@ -0,0 +1,161 @@
|
|
1
|
+
from src.json_repair.json_repair import loads, repair_json
|
2
|
+
|
3
|
+
|
4
|
+
def test_valid_json():
|
5
|
+
assert (
|
6
|
+
repair_json('{"name": "John", "age": 30, "city": "New York"}')
|
7
|
+
== '{"name": "John", "age": 30, "city": "New York"}'
|
8
|
+
)
|
9
|
+
assert repair_json('{"employees":["John", "Anna", "Peter"]} ') == '{"employees": ["John", "Anna", "Peter"]}'
|
10
|
+
assert repair_json('{"key": "value:value"}') == '{"key": "value:value"}'
|
11
|
+
assert repair_json('{"text": "The quick brown fox,"}') == '{"text": "The quick brown fox,"}'
|
12
|
+
assert repair_json('{"text": "The quick brown fox won\'t jump"}') == '{"text": "The quick brown fox won\'t jump"}'
|
13
|
+
assert repair_json('{"key": ""') == '{"key": ""}'
|
14
|
+
assert repair_json('{"key1": {"key2": [1, 2, 3]}}') == '{"key1": {"key2": [1, 2, 3]}}'
|
15
|
+
assert repair_json('{"key": 12345678901234567890}') == '{"key": 12345678901234567890}'
|
16
|
+
assert repair_json('{"key": "value\u263a"}') == '{"key": "value\\u263a"}'
|
17
|
+
assert repair_json('{"key": "value\\nvalue"}') == '{"key": "value\\nvalue"}'
|
18
|
+
|
19
|
+
|
20
|
+
def test_multiple_jsons():
|
21
|
+
assert repair_json("[]{}") == "[[], {}]"
|
22
|
+
assert repair_json("{}[]{}") == "[{}, [], {}]"
|
23
|
+
assert repair_json('{"key":"value"}[1,2,3,True]') == '[{"key": "value"}, [1, 2, 3, true]]'
|
24
|
+
assert (
|
25
|
+
repair_json('lorem ```json {"key":"value"} ``` ipsum ```json [1,2,3,True] ``` 42')
|
26
|
+
== '[{"key": "value"}, [1, 2, 3, true]]'
|
27
|
+
)
|
28
|
+
assert repair_json('[{"key":"value"}][{"key":"value_after"}]') == '[{"key": "value_after"}]'
|
29
|
+
|
30
|
+
|
31
|
+
def test_repair_json_with_objects():
|
32
|
+
# Test with valid JSON strings
|
33
|
+
assert repair_json("[]", return_objects=True) == []
|
34
|
+
assert repair_json("{}", return_objects=True) == {}
|
35
|
+
assert repair_json('{"key": true, "key2": false, "key3": null}', return_objects=True) == {
|
36
|
+
"key": True,
|
37
|
+
"key2": False,
|
38
|
+
"key3": None,
|
39
|
+
}
|
40
|
+
assert repair_json('{"name": "John", "age": 30, "city": "New York"}', return_objects=True) == {
|
41
|
+
"name": "John",
|
42
|
+
"age": 30,
|
43
|
+
"city": "New York",
|
44
|
+
}
|
45
|
+
assert repair_json("[1, 2, 3, 4]", return_objects=True) == [1, 2, 3, 4]
|
46
|
+
assert repair_json('{"employees":["John", "Anna", "Peter"]} ', return_objects=True) == {
|
47
|
+
"employees": ["John", "Anna", "Peter"]
|
48
|
+
}
|
49
|
+
assert repair_json(
|
50
|
+
"""
|
51
|
+
{
|
52
|
+
"resourceType": "Bundle",
|
53
|
+
"id": "1",
|
54
|
+
"type": "collection",
|
55
|
+
"entry": [
|
56
|
+
{
|
57
|
+
"resource": {
|
58
|
+
"resourceType": "Patient",
|
59
|
+
"id": "1",
|
60
|
+
"name": [
|
61
|
+
{"use": "official", "family": "Corwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."},
|
62
|
+
{"use": "maiden", "family": "Goodwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."]}
|
63
|
+
]
|
64
|
+
}
|
65
|
+
}
|
66
|
+
]
|
67
|
+
}
|
68
|
+
""",
|
69
|
+
return_objects=True,
|
70
|
+
) == {
|
71
|
+
"resourceType": "Bundle",
|
72
|
+
"id": "1",
|
73
|
+
"type": "collection",
|
74
|
+
"entry": [
|
75
|
+
{
|
76
|
+
"resource": {
|
77
|
+
"resourceType": "Patient",
|
78
|
+
"id": "1",
|
79
|
+
"name": [
|
80
|
+
{
|
81
|
+
"use": "official",
|
82
|
+
"family": "Corwin",
|
83
|
+
"given": ["Keisha", "Sunny"],
|
84
|
+
"prefix": ["Mrs."],
|
85
|
+
},
|
86
|
+
{
|
87
|
+
"use": "maiden",
|
88
|
+
"family": "Goodwin",
|
89
|
+
"given": ["Keisha", "Sunny"],
|
90
|
+
"prefix": ["Mrs."],
|
91
|
+
},
|
92
|
+
],
|
93
|
+
}
|
94
|
+
}
|
95
|
+
],
|
96
|
+
}
|
97
|
+
assert repair_json(
|
98
|
+
'{\n"html": "<h3 id="aaa">Waarom meer dan 200 Technical Experts - "Passie voor techniek"?</h3>"}',
|
99
|
+
return_objects=True,
|
100
|
+
) == {"html": '<h3 id="aaa">Waarom meer dan 200 Technical Experts - "Passie voor techniek"?</h3>'}
|
101
|
+
assert repair_json(
|
102
|
+
"""
|
103
|
+
[
|
104
|
+
{
|
105
|
+
"foo": "Foo bar baz",
|
106
|
+
"tag": "#foo-bar-baz"
|
107
|
+
},
|
108
|
+
{
|
109
|
+
"foo": "foo bar "foobar" foo bar baz.",
|
110
|
+
"tag": "#foo-bar-foobar"
|
111
|
+
}
|
112
|
+
]
|
113
|
+
""",
|
114
|
+
return_objects=True,
|
115
|
+
) == [
|
116
|
+
{"foo": "Foo bar baz", "tag": "#foo-bar-baz"},
|
117
|
+
{"foo": 'foo bar "foobar" foo bar baz.', "tag": "#foo-bar-foobar"},
|
118
|
+
]
|
119
|
+
|
120
|
+
|
121
|
+
def test_repair_json_skip_json_loads():
|
122
|
+
assert (
|
123
|
+
repair_json('{"key": true, "key2": false, "key3": null}', skip_json_loads=True)
|
124
|
+
== '{"key": true, "key2": false, "key3": null}'
|
125
|
+
)
|
126
|
+
assert repair_json(
|
127
|
+
'{"key": true, "key2": false, "key3": null}',
|
128
|
+
return_objects=True,
|
129
|
+
skip_json_loads=True,
|
130
|
+
) == {"key": True, "key2": False, "key3": None}
|
131
|
+
assert (
|
132
|
+
repair_json('{"key": true, "key2": false, "key3": }', skip_json_loads=True)
|
133
|
+
== '{"key": true, "key2": false, "key3": ""}'
|
134
|
+
)
|
135
|
+
assert loads('{"key": true, "key2": false, "key3": }', skip_json_loads=True) == {
|
136
|
+
"key": True,
|
137
|
+
"key2": False,
|
138
|
+
"key3": "",
|
139
|
+
}
|
140
|
+
|
141
|
+
|
142
|
+
def test_ensure_ascii():
|
143
|
+
assert repair_json("{'test_中国人_ascii':'统一码'}", ensure_ascii=False) == '{"test_中国人_ascii": "统一码"}'
|
144
|
+
|
145
|
+
|
146
|
+
def test_stream_stable():
|
147
|
+
# default: stream_stable = False
|
148
|
+
# When the json to be repaired is the accumulation of streaming json at a certain moment.
|
149
|
+
# The default repair result is unstable.
|
150
|
+
assert repair_json('{"key": "val\\', stream_stable=False) == '{"key": "val\\\\"}'
|
151
|
+
assert repair_json('{"key": "val\\n', stream_stable=False) == '{"key": "val"}'
|
152
|
+
assert (
|
153
|
+
repair_json('{"key": "val\\n123,`key2:value2', stream_stable=False) == '{"key": "val\\n123", "key2": "value2"}'
|
154
|
+
)
|
155
|
+
assert repair_json('{"key": "val\\n123,`key2:value2`"}', stream_stable=True) == '{"key": "val\\n123,`key2:value2`"}'
|
156
|
+
# stream_stable = True
|
157
|
+
assert repair_json('{"key": "val\\', stream_stable=True) == '{"key": "val"}'
|
158
|
+
assert repair_json('{"key": "val\\n', stream_stable=True) == '{"key": "val\\n"}'
|
159
|
+
assert repair_json('{"key": "val\\n123,`key2:value2', stream_stable=True) == '{"key": "val\\n123,`key2:value2"}'
|
160
|
+
assert repair_json('{"key": "val\\n123,`key2:value2`"}', stream_stable=True) == '{"key": "val\\n123,`key2:value2`"}'
|
161
|
+
assert repair_json('[{"key": "value", "key', stream_stable=True) == '[{"key": "value"}]'
|
@@ -0,0 +1,37 @@
|
|
1
|
+
from src.json_repair.json_repair import repair_json
|
2
|
+
|
3
|
+
|
4
|
+
def test_parse_array():
|
5
|
+
assert repair_json("[]", return_objects=True) == []
|
6
|
+
assert repair_json("[1, 2, 3, 4]", return_objects=True) == [1, 2, 3, 4]
|
7
|
+
assert repair_json("[", return_objects=True) == []
|
8
|
+
assert repair_json("[[1\n\n]") == "[[1]]"
|
9
|
+
|
10
|
+
|
11
|
+
def test_parse_array_edge_cases():
|
12
|
+
assert repair_json("[{]") == "[{}]"
|
13
|
+
assert repair_json("[") == "[]"
|
14
|
+
assert repair_json('["') == "[]"
|
15
|
+
assert repair_json("]") == ""
|
16
|
+
assert repair_json("[1, 2, 3,") == "[1, 2, 3]"
|
17
|
+
assert repair_json("[1, 2, 3, ...]") == "[1, 2, 3]"
|
18
|
+
assert repair_json("[1, 2, ... , 3]") == "[1, 2, 3]"
|
19
|
+
assert repair_json("[1, 2, '...', 3]") == '[1, 2, "...", 3]'
|
20
|
+
assert repair_json("[true, false, null, ...]") == "[true, false, null]"
|
21
|
+
assert repair_json('["a" "b" "c" 1') == '["a", "b", "c", 1]'
|
22
|
+
assert repair_json('{"employees":["John", "Anna",') == '{"employees": ["John", "Anna"]}'
|
23
|
+
assert repair_json('{"employees":["John", "Anna", "Peter') == '{"employees": ["John", "Anna", "Peter"]}'
|
24
|
+
assert repair_json('{"key1": {"key2": [1, 2, 3') == '{"key1": {"key2": [1, 2, 3]}}'
|
25
|
+
assert repair_json('{"key": ["value]}') == '{"key": ["value"]}'
|
26
|
+
assert repair_json('["lorem "ipsum" sic"]') == '["lorem \\"ipsum\\" sic"]'
|
27
|
+
assert (
|
28
|
+
repair_json('{"key1": ["value1", "value2"}, "key2": ["value3", "value4"]}')
|
29
|
+
== '{"key1": ["value1", "value2"], "key2": ["value3", "value4"]}'
|
30
|
+
)
|
31
|
+
assert repair_json('{"key": ["value" "value1" "value2"]}') == '{"key": ["value", "value1", "value2"]}'
|
32
|
+
assert (
|
33
|
+
repair_json('{"key": ["lorem "ipsum" dolor "sit" amet, "consectetur" ", "lorem "ipsum" dolor", "lorem"]}')
|
34
|
+
== '{"key": ["lorem \\"ipsum\\" dolor \\"sit\\" amet, \\"consectetur\\" ", "lorem \\"ipsum\\" dolor", "lorem"]}'
|
35
|
+
)
|
36
|
+
assert repair_json('{"k"e"y": "value"}') == '{"k\\"e\\"y": "value"}'
|
37
|
+
assert repair_json('["key":"value"}]') == '[{"key": "value"}]'
|
@@ -0,0 +1,12 @@
|
|
1
|
+
from src.json_repair.json_repair import repair_json
|
2
|
+
|
3
|
+
|
4
|
+
def test_parse_boolean_or_null():
|
5
|
+
assert repair_json("True", return_objects=True) == ""
|
6
|
+
assert repair_json("False", return_objects=True) == ""
|
7
|
+
assert repair_json("Null", return_objects=True) == ""
|
8
|
+
assert repair_json("true", return_objects=True)
|
9
|
+
assert not repair_json("false", return_objects=True)
|
10
|
+
assert repair_json("null", return_objects=True) is None
|
11
|
+
assert repair_json(' {"key": true, "key2": false, "key3": null}') == '{"key": true, "key2": false, "key3": null}'
|
12
|
+
assert repair_json('{"key": TRUE, "key2": FALSE, "key3": Null} ') == '{"key": true, "key2": false, "key3": null}'
|
@@ -0,0 +1,27 @@
|
|
1
|
+
from src.json_repair.json_repair import repair_json
|
2
|
+
|
3
|
+
|
4
|
+
def test_parse_number():
|
5
|
+
assert repair_json("1", return_objects=True) == 1
|
6
|
+
assert repair_json("1.2", return_objects=True) == 1.2
|
7
|
+
|
8
|
+
|
9
|
+
def test_parse_number_edge_cases():
|
10
|
+
assert (
|
11
|
+
repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'
|
12
|
+
)
|
13
|
+
assert repair_json('{"key": 1/3}') == '{"key": "1/3"}'
|
14
|
+
assert repair_json('{"key": .25}') == '{"key": 0.25}'
|
15
|
+
assert repair_json('{"here": "now", "key": 1/3, "foo": "bar"}') == '{"here": "now", "key": "1/3", "foo": "bar"}'
|
16
|
+
assert repair_json('{"key": 12345/67890}') == '{"key": "12345/67890"}'
|
17
|
+
assert repair_json("[105,12") == "[105, 12]"
|
18
|
+
assert repair_json('{"key", 105,12,') == '{"key": "105,12"}'
|
19
|
+
assert repair_json('{"key": 1/3, "foo": "bar"}') == '{"key": "1/3", "foo": "bar"}'
|
20
|
+
assert repair_json('{"key": 10-20}') == '{"key": "10-20"}'
|
21
|
+
assert repair_json('{"key": 1.1.1}') == '{"key": "1.1.1"}'
|
22
|
+
assert repair_json("[- ") == "[]"
|
23
|
+
assert repair_json('{"key": 1. }') == '{"key": 1.0}'
|
24
|
+
assert repair_json('{"key": 1e10 }') == '{"key": 10000000000.0}'
|
25
|
+
assert repair_json('{"key": 1e }') == '{"key": 1}'
|
26
|
+
assert repair_json('{"key": 1notanumber }') == '{"key": "1notanumber"}'
|
27
|
+
assert repair_json("[1, 2notanumber]") == '[1, "2notanumber"]'
|
@@ -0,0 +1,85 @@
|
|
1
|
+
from src.json_repair.json_repair import repair_json
|
2
|
+
|
3
|
+
|
4
|
+
def test_parse_object():
|
5
|
+
assert repair_json("{}", return_objects=True) == {}
|
6
|
+
assert repair_json('{ "key": "value", "key2": 1, "key3": True }', return_objects=True) == {
|
7
|
+
"key": "value",
|
8
|
+
"key2": 1,
|
9
|
+
"key3": True,
|
10
|
+
}
|
11
|
+
assert repair_json("{", return_objects=True) == {}
|
12
|
+
assert repair_json('{ "key": value, "key2": 1 "key3": null }', return_objects=True) == {
|
13
|
+
"key": "value",
|
14
|
+
"key2": 1,
|
15
|
+
"key3": None,
|
16
|
+
}
|
17
|
+
assert repair_json(" { } ") == "{}"
|
18
|
+
assert repair_json("{") == "{}"
|
19
|
+
assert repair_json("}") == ""
|
20
|
+
assert repair_json('{"') == "{}"
|
21
|
+
|
22
|
+
|
23
|
+
def test_parse_object_edge_cases():
|
24
|
+
assert repair_json("{foo: [}") == '{"foo": []}'
|
25
|
+
assert repair_json("{ ") == "{}"
|
26
|
+
assert repair_json('{"": "value"') == '{"": "value"}'
|
27
|
+
assert repair_json('{"value_1": true, COMMENT "value_2": "data"}') == '{"value_1": true, "value_2": "data"}'
|
28
|
+
assert (
|
29
|
+
repair_json('{"value_1": true, SHOULD_NOT_EXIST "value_2": "data" AAAA }')
|
30
|
+
== '{"value_1": true, "value_2": "data"}'
|
31
|
+
)
|
32
|
+
assert repair_json('{"" : true, "key2": "value2"}') == '{"": true, "key2": "value2"}'
|
33
|
+
assert (
|
34
|
+
repair_json("""{""answer"":[{""traits"":''Female aged 60+'',""answer1"":""5""}]}""")
|
35
|
+
== '{"answer": [{"traits": "Female aged 60+", "answer1": "5"}]}'
|
36
|
+
)
|
37
|
+
assert (
|
38
|
+
repair_json('{ "words": abcdef", "numbers": 12345", "words2": ghijkl" }')
|
39
|
+
== '{"words": "abcdef", "numbers": 12345, "words2": "ghijkl"}'
|
40
|
+
)
|
41
|
+
assert (
|
42
|
+
repair_json("""{"number": 1,"reason": "According...""ans": "YES"}""")
|
43
|
+
== '{"number": 1, "reason": "According...", "ans": "YES"}'
|
44
|
+
)
|
45
|
+
assert repair_json("""{ "a" : "{ b": {} }" }""") == '{"a": "{ b"}'
|
46
|
+
assert repair_json("""{"b": "xxxxx" true}""") == '{"b": "xxxxx"}'
|
47
|
+
assert repair_json('{"key": "Lorem "ipsum" s,"}') == '{"key": "Lorem \\"ipsum\\" s,"}'
|
48
|
+
assert repair_json('{"lorem": ipsum, sic, datum.",}') == '{"lorem": "ipsum, sic, datum."}'
|
49
|
+
assert (
|
50
|
+
repair_json('{"lorem": sic tamet. "ipsum": sic tamet, quick brown fox. "sic": ipsum}')
|
51
|
+
== '{"lorem": "sic tamet.", "ipsum": "sic tamet", "sic": "ipsum"}'
|
52
|
+
)
|
53
|
+
assert (
|
54
|
+
repair_json('{"lorem_ipsum": "sic tamet, quick brown fox. }')
|
55
|
+
== '{"lorem_ipsum": "sic tamet, quick brown fox."}'
|
56
|
+
)
|
57
|
+
assert repair_json('{"key":value, " key2":"value2" }') == '{"key": "value", " key2": "value2"}'
|
58
|
+
assert repair_json('{"key":value "key2":"value2" }') == '{"key": "value", "key2": "value2"}'
|
59
|
+
assert (
|
60
|
+
repair_json("{'text': 'words{words in brackets}more words'}")
|
61
|
+
== '{"text": "words{words in brackets}more words"}'
|
62
|
+
)
|
63
|
+
assert repair_json("{text:words{words in brackets}}") == '{"text": "words{words in brackets}"}'
|
64
|
+
assert repair_json("{text:words{words in brackets}m}") == '{"text": "words{words in brackets}m"}'
|
65
|
+
assert repair_json('{"key": "value, value2"```') == '{"key": "value, value2"}'
|
66
|
+
assert repair_json("{key:value,key2:value2}") == '{"key": "value", "key2": "value2"}'
|
67
|
+
assert repair_json('{"key:"value"}') == '{"key": "value"}'
|
68
|
+
assert repair_json('{"key:value}') == '{"key": "value"}'
|
69
|
+
assert (
|
70
|
+
repair_json('[{"lorem": {"ipsum": "sic"}, """" "lorem": {"ipsum": "sic"}]')
|
71
|
+
== '[{"lorem": {"ipsum": "sic"}}, {"lorem": {"ipsum": "sic"}}]'
|
72
|
+
)
|
73
|
+
assert (
|
74
|
+
repair_json('{ "key": ["arrayvalue"], ["arrayvalue1"], ["arrayvalue2"], "key3": "value3" }')
|
75
|
+
== '{"key": ["arrayvalue", "arrayvalue1", "arrayvalue2"], "key3": "value3"}'
|
76
|
+
)
|
77
|
+
assert (
|
78
|
+
repair_json('{ "key": ["arrayvalue"], "key3": "value3", ["arrayvalue1"] }')
|
79
|
+
== '{"key": ["arrayvalue"], "key3": "value3", "arrayvalue1": ""}'
|
80
|
+
)
|
81
|
+
assert (
|
82
|
+
repair_json('{"key": "{\\\\"key\\\\\\":[\\"value\\\\\\"],\\"key2\\":"value2"}"}')
|
83
|
+
== '{"key": "{\\"key\\":[\\"value\\"],\\"key2\\":\\"value2\\"}"}'
|
84
|
+
)
|
85
|
+
assert repair_json('{"key": , "key2": "value2"}') == '{"key": "", "key2": "value2"}'
|