json-repair 0.47.5__tar.gz → 0.47.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {json_repair-0.47.5/src/json_repair.egg-info → json_repair-0.47.7}/PKG-INFO +1 -1
  2. {json_repair-0.47.5 → json_repair-0.47.7}/pyproject.toml +1 -1
  3. {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/json_parser.py +23 -12
  4. {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/parse_array.py +6 -1
  5. {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/parse_boolean_or_null.py +7 -1
  6. {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/parse_comment.py +7 -1
  7. {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/parse_number.py +7 -2
  8. {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/parse_object.py +17 -7
  9. {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/parse_string.py +24 -2
  10. {json_repair-0.47.5 → json_repair-0.47.7/src/json_repair.egg-info}/PKG-INFO +1 -1
  11. {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair.egg-info/SOURCES.txt +8 -1
  12. json_repair-0.47.7/tests/test_json_repair.py +161 -0
  13. json_repair-0.47.7/tests/test_parse_array.py +37 -0
  14. json_repair-0.47.7/tests/test_parse_boolean_or_null.py +12 -0
  15. {json_repair-0.47.5 → json_repair-0.47.7}/tests/test_parse_comment.py +1 -0
  16. json_repair-0.47.7/tests/test_parse_number.py +27 -0
  17. json_repair-0.47.7/tests/test_parse_object.py +85 -0
  18. json_repair-0.47.7/tests/test_parse_string.py +99 -0
  19. json_repair-0.47.7/tests/test_repair_json_cli.py +67 -0
  20. json_repair-0.47.5/tests/test_json_repair.py → json_repair-0.47.7/tests/test_repair_json_from_file.py +1 -479
  21. {json_repair-0.47.5 → json_repair-0.47.7}/LICENSE +0 -0
  22. {json_repair-0.47.5 → json_repair-0.47.7}/README.md +0 -0
  23. {json_repair-0.47.5 → json_repair-0.47.7}/setup.cfg +0 -0
  24. {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/__init__.py +0 -0
  25. {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/__main__.py +0 -0
  26. {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/constants.py +0 -0
  27. {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/json_context.py +0 -0
  28. {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/json_repair.py +0 -0
  29. {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/object_comparer.py +0 -0
  30. {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/py.typed +0 -0
  31. {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair/string_file_wrapper.py +0 -0
  32. {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair.egg-info/dependency_links.txt +0 -0
  33. {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair.egg-info/entry_points.txt +0 -0
  34. {json_repair-0.47.5 → json_repair-0.47.7}/src/json_repair.egg-info/top_level.txt +0 -0
  35. {json_repair-0.47.5 → json_repair-0.47.7}/tests/test_performance.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: json_repair
3
- Version: 0.47.5
3
+ Version: 0.47.7
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
3
3
  build-backend = "setuptools.build_meta"
4
4
  [project]
5
5
  name = "json_repair"
6
- version = "0.47.5"
6
+ version = "0.47.7"
7
7
  license = {file = "LICENSE"}
8
8
  authors = [
9
9
  { name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
@@ -3,23 +3,34 @@ from typing import Literal, TextIO
3
3
  from .constants import STRING_DELIMITERS, JSONReturnType
4
4
  from .json_context import JsonContext
5
5
  from .object_comparer import ObjectComparer
6
- from .parse_array import parse_array
7
- from .parse_boolean_or_null import parse_boolean_or_null
8
- from .parse_comment import parse_comment
9
- from .parse_number import parse_number
10
- from .parse_object import parse_object
11
- from .parse_string import parse_string
6
+ from .parse_array import parse_array as _parse_array
7
+ from .parse_boolean_or_null import parse_boolean_or_null as _parse_boolean_or_null
8
+ from .parse_comment import parse_comment as _parse_comment
9
+ from .parse_number import parse_number as _parse_number
10
+ from .parse_object import parse_object as _parse_object
11
+ from .parse_string import parse_string as _parse_string
12
12
  from .string_file_wrapper import StringFileWrapper
13
13
 
14
14
 
15
15
  class JSONParser:
16
16
  # Split the parse methods into separate files because this one was like 3000 lines
17
- parse_array = parse_array
18
- parse_boolean_or_null = parse_boolean_or_null
19
- parse_comment = parse_comment
20
- parse_number = parse_number
21
- parse_object = parse_object
22
- parse_string = parse_string
17
+ def parse_array(self, *args, **kwargs):
18
+ return _parse_array(self, *args, **kwargs)
19
+
20
+ def parse_boolean_or_null(self, *args, **kwargs):
21
+ return _parse_boolean_or_null(self, *args, **kwargs)
22
+
23
+ def parse_comment(self, *args, **kwargs):
24
+ return _parse_comment(self, *args, **kwargs)
25
+
26
+ def parse_number(self, *args, **kwargs):
27
+ return _parse_number(self, *args, **kwargs)
28
+
29
+ def parse_object(self, *args, **kwargs):
30
+ return _parse_object(self, *args, **kwargs)
31
+
32
+ def parse_string(self, *args, **kwargs):
33
+ return _parse_string(self, *args, **kwargs)
23
34
 
24
35
  def __init__(
25
36
  self,
@@ -1,8 +1,13 @@
1
+ from typing import TYPE_CHECKING
2
+
1
3
  from .constants import STRING_DELIMITERS, JSONReturnType
2
4
  from .json_context import ContextValues
3
5
 
6
+ if TYPE_CHECKING:
7
+ from .json_parser import JSONParser
8
+
4
9
 
5
- def parse_array(self) -> list[JSONReturnType]:
10
+ def parse_array(self: "JSONParser") -> list[JSONReturnType]:
6
11
  # <array> ::= '[' [ <json> *(', ' <json>) ] ']' ; A sequence of JSON values separated by commas
7
12
  arr = []
8
13
  self.context.set(ContextValues.ARRAY)
@@ -1,4 +1,10 @@
1
- def parse_boolean_or_null(self) -> bool | str | None:
1
+ from typing import TYPE_CHECKING
2
+
3
+ if TYPE_CHECKING:
4
+ from .json_parser import JSONParser
5
+
6
+
7
+ def parse_boolean_or_null(self: "JSONParser") -> bool | str | None:
2
8
  # <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
3
9
  starting_index = self.index
4
10
  char = (self.get_char_at() or "").lower()
@@ -1,7 +1,13 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from .constants import JSONReturnType
1
4
  from .json_context import ContextValues
2
5
 
6
+ if TYPE_CHECKING:
7
+ from .json_parser import JSONParser
8
+
3
9
 
4
- def parse_comment(self) -> str:
10
+ def parse_comment(self: "JSONParser") -> JSONReturnType:
5
11
  """
6
12
  Parse code-like comments:
7
13
 
@@ -1,10 +1,15 @@
1
- from .constants import JSONReturnType
1
+ from typing import TYPE_CHECKING
2
+
2
3
  from .json_context import ContextValues
3
4
 
4
5
  NUMBER_CHARS: set[str] = set("0123456789-.eE/,")
5
6
 
6
7
 
7
- def parse_number(self) -> float | int | str | JSONReturnType:
8
+ if TYPE_CHECKING:
9
+ from .json_parser import JSONParser
10
+
11
+
12
+ def parse_number(self: "JSONParser") -> float | int | str | bool | None:
8
13
  # <number> is a valid real number expressed in one of a number of given formats
9
14
  number_str = ""
10
15
  char = self.get_char_at()
@@ -1,8 +1,13 @@
1
+ from typing import TYPE_CHECKING
2
+
1
3
  from .constants import JSONReturnType
2
4
  from .json_context import ContextValues
3
5
 
6
+ if TYPE_CHECKING:
7
+ from .json_parser import JSONParser
8
+
4
9
 
5
- def parse_object(self) -> dict[str, JSONReturnType]:
10
+ def parse_object(self: "JSONParser") -> dict[str, JSONReturnType]:
6
11
  # <object> ::= '{' [ <member> *(', ' <member>) ] '}' ; A sequence of 'members'
7
12
  obj: dict[str, JSONReturnType] = {}
8
13
  # Stop when you either find the closing parentheses or you have iterated over the entire string
@@ -59,12 +64,17 @@ def parse_object(self) -> dict[str, JSONReturnType]:
59
64
  # If the string is empty but there is a object divider, we are done here
60
65
  break
61
66
  if ContextValues.ARRAY in self.context.context and key in obj:
62
- self.log(
63
- "While parsing an object we found a duplicate key, closing the object here and rolling back the index",
64
- )
65
- self.index = rollback_index - 1
66
- # add an opening curly brace to make this work
67
- self.json_str = self.json_str[: self.index + 1] + "{" + self.json_str[self.index + 1 :]
67
+ if self.stream_stable:
68
+ # This is possibly another problem, the key is incomplete and it "appears" duplicate
69
+ # Let's just do nothing
70
+ pass
71
+ else:
72
+ self.log(
73
+ "While parsing an object we found a duplicate key, closing the object here and rolling back the index",
74
+ )
75
+ self.index = rollback_index - 1
76
+ # add an opening curly brace to make this work
77
+ self.json_str = self.json_str[: self.index + 1] + "{" + self.json_str[self.index + 1 :]
68
78
  break
69
79
 
70
80
  # Skip filler whitespaces
@@ -1,8 +1,13 @@
1
+ from typing import TYPE_CHECKING
2
+
1
3
  from .constants import STRING_DELIMITERS
2
4
  from .json_context import ContextValues
3
5
 
6
+ if TYPE_CHECKING:
7
+ from .json_parser import JSONParser
8
+
4
9
 
5
- def parse_string(self) -> str | bool | None:
10
+ def parse_string(self: "JSONParser") -> str | bool | None:
6
11
  # <string> is a string of valid characters enclosed in quotes
7
12
  # i.e. { name: "John" }
8
13
  # Somehow all weird cases in an invalid JSON happen to be resolved in this function, so be careful here
@@ -328,7 +333,24 @@ def parse_string(self) -> str | bool | None:
328
333
  if all(str(self.get_char_at(j)).isspace() for j in range(1, i) if self.get_char_at(j)):
329
334
  break
330
335
  if self.context.current == ContextValues.OBJECT_VALUE:
331
- # But this might not be it! This could be just a missing comma
336
+ i = self.skip_whitespaces_at(idx=i + 1, move_main_index=False)
337
+ if self.get_char_at(i) == ",":
338
+ # So we found a comma, this could be a case of a single quote like "va"lue",
339
+ # Search if it's followed by another key, starting with the first delimeter
340
+ i = self.skip_to_character(character=lstring_delimiter, idx=i + 1)
341
+ i += 1
342
+ i = self.skip_to_character(character=rstring_delimiter, idx=i + 1)
343
+ i += 1
344
+ i = self.skip_whitespaces_at(idx=i, move_main_index=False)
345
+ next_c = self.get_char_at(i)
346
+ if next_c == ":":
347
+ self.log(
348
+ "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
349
+ )
350
+ string_acc += str(char)
351
+ self.index += 1
352
+ char = self.get_char_at()
353
+ continue
332
354
  # We found a delimiter and we need to check if this is a key
333
355
  # so find a rstring_delimiter and a colon after
334
356
  i = self.skip_to_character(character=rstring_delimiter, idx=i + 1)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: json_repair
3
- Version: 0.47.5
3
+ Version: 0.47.7
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -22,5 +22,12 @@ src/json_repair.egg-info/dependency_links.txt
22
22
  src/json_repair.egg-info/entry_points.txt
23
23
  src/json_repair.egg-info/top_level.txt
24
24
  tests/test_json_repair.py
25
+ tests/test_parse_array.py
26
+ tests/test_parse_boolean_or_null.py
25
27
  tests/test_parse_comment.py
26
- tests/test_performance.py
28
+ tests/test_parse_number.py
29
+ tests/test_parse_object.py
30
+ tests/test_parse_string.py
31
+ tests/test_performance.py
32
+ tests/test_repair_json_cli.py
33
+ tests/test_repair_json_from_file.py
@@ -0,0 +1,161 @@
1
+ from src.json_repair.json_repair import loads, repair_json
2
+
3
+
4
+ def test_valid_json():
5
+ assert (
6
+ repair_json('{"name": "John", "age": 30, "city": "New York"}')
7
+ == '{"name": "John", "age": 30, "city": "New York"}'
8
+ )
9
+ assert repair_json('{"employees":["John", "Anna", "Peter"]} ') == '{"employees": ["John", "Anna", "Peter"]}'
10
+ assert repair_json('{"key": "value:value"}') == '{"key": "value:value"}'
11
+ assert repair_json('{"text": "The quick brown fox,"}') == '{"text": "The quick brown fox,"}'
12
+ assert repair_json('{"text": "The quick brown fox won\'t jump"}') == '{"text": "The quick brown fox won\'t jump"}'
13
+ assert repair_json('{"key": ""') == '{"key": ""}'
14
+ assert repair_json('{"key1": {"key2": [1, 2, 3]}}') == '{"key1": {"key2": [1, 2, 3]}}'
15
+ assert repair_json('{"key": 12345678901234567890}') == '{"key": 12345678901234567890}'
16
+ assert repair_json('{"key": "value\u263a"}') == '{"key": "value\\u263a"}'
17
+ assert repair_json('{"key": "value\\nvalue"}') == '{"key": "value\\nvalue"}'
18
+
19
+
20
+ def test_multiple_jsons():
21
+ assert repair_json("[]{}") == "[[], {}]"
22
+ assert repair_json("{}[]{}") == "[{}, [], {}]"
23
+ assert repair_json('{"key":"value"}[1,2,3,True]') == '[{"key": "value"}, [1, 2, 3, true]]'
24
+ assert (
25
+ repair_json('lorem ```json {"key":"value"} ``` ipsum ```json [1,2,3,True] ``` 42')
26
+ == '[{"key": "value"}, [1, 2, 3, true]]'
27
+ )
28
+ assert repair_json('[{"key":"value"}][{"key":"value_after"}]') == '[{"key": "value_after"}]'
29
+
30
+
31
+ def test_repair_json_with_objects():
32
+ # Test with valid JSON strings
33
+ assert repair_json("[]", return_objects=True) == []
34
+ assert repair_json("{}", return_objects=True) == {}
35
+ assert repair_json('{"key": true, "key2": false, "key3": null}', return_objects=True) == {
36
+ "key": True,
37
+ "key2": False,
38
+ "key3": None,
39
+ }
40
+ assert repair_json('{"name": "John", "age": 30, "city": "New York"}', return_objects=True) == {
41
+ "name": "John",
42
+ "age": 30,
43
+ "city": "New York",
44
+ }
45
+ assert repair_json("[1, 2, 3, 4]", return_objects=True) == [1, 2, 3, 4]
46
+ assert repair_json('{"employees":["John", "Anna", "Peter"]} ', return_objects=True) == {
47
+ "employees": ["John", "Anna", "Peter"]
48
+ }
49
+ assert repair_json(
50
+ """
51
+ {
52
+ "resourceType": "Bundle",
53
+ "id": "1",
54
+ "type": "collection",
55
+ "entry": [
56
+ {
57
+ "resource": {
58
+ "resourceType": "Patient",
59
+ "id": "1",
60
+ "name": [
61
+ {"use": "official", "family": "Corwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."},
62
+ {"use": "maiden", "family": "Goodwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."]}
63
+ ]
64
+ }
65
+ }
66
+ ]
67
+ }
68
+ """,
69
+ return_objects=True,
70
+ ) == {
71
+ "resourceType": "Bundle",
72
+ "id": "1",
73
+ "type": "collection",
74
+ "entry": [
75
+ {
76
+ "resource": {
77
+ "resourceType": "Patient",
78
+ "id": "1",
79
+ "name": [
80
+ {
81
+ "use": "official",
82
+ "family": "Corwin",
83
+ "given": ["Keisha", "Sunny"],
84
+ "prefix": ["Mrs."],
85
+ },
86
+ {
87
+ "use": "maiden",
88
+ "family": "Goodwin",
89
+ "given": ["Keisha", "Sunny"],
90
+ "prefix": ["Mrs."],
91
+ },
92
+ ],
93
+ }
94
+ }
95
+ ],
96
+ }
97
+ assert repair_json(
98
+ '{\n"html": "<h3 id="aaa">Waarom meer dan 200 Technical Experts - "Passie voor techniek"?</h3>"}',
99
+ return_objects=True,
100
+ ) == {"html": '<h3 id="aaa">Waarom meer dan 200 Technical Experts - "Passie voor techniek"?</h3>'}
101
+ assert repair_json(
102
+ """
103
+ [
104
+ {
105
+ "foo": "Foo bar baz",
106
+ "tag": "#foo-bar-baz"
107
+ },
108
+ {
109
+ "foo": "foo bar "foobar" foo bar baz.",
110
+ "tag": "#foo-bar-foobar"
111
+ }
112
+ ]
113
+ """,
114
+ return_objects=True,
115
+ ) == [
116
+ {"foo": "Foo bar baz", "tag": "#foo-bar-baz"},
117
+ {"foo": 'foo bar "foobar" foo bar baz.', "tag": "#foo-bar-foobar"},
118
+ ]
119
+
120
+
121
+ def test_repair_json_skip_json_loads():
122
+ assert (
123
+ repair_json('{"key": true, "key2": false, "key3": null}', skip_json_loads=True)
124
+ == '{"key": true, "key2": false, "key3": null}'
125
+ )
126
+ assert repair_json(
127
+ '{"key": true, "key2": false, "key3": null}',
128
+ return_objects=True,
129
+ skip_json_loads=True,
130
+ ) == {"key": True, "key2": False, "key3": None}
131
+ assert (
132
+ repair_json('{"key": true, "key2": false, "key3": }', skip_json_loads=True)
133
+ == '{"key": true, "key2": false, "key3": ""}'
134
+ )
135
+ assert loads('{"key": true, "key2": false, "key3": }', skip_json_loads=True) == {
136
+ "key": True,
137
+ "key2": False,
138
+ "key3": "",
139
+ }
140
+
141
+
142
+ def test_ensure_ascii():
143
+ assert repair_json("{'test_中国人_ascii':'统一码'}", ensure_ascii=False) == '{"test_中国人_ascii": "统一码"}'
144
+
145
+
146
+ def test_stream_stable():
147
+ # default: stream_stable = False
148
+ # When the json to be repaired is the accumulation of streaming json at a certain moment.
149
+ # The default repair result is unstable.
150
+ assert repair_json('{"key": "val\\', stream_stable=False) == '{"key": "val\\\\"}'
151
+ assert repair_json('{"key": "val\\n', stream_stable=False) == '{"key": "val"}'
152
+ assert (
153
+ repair_json('{"key": "val\\n123,`key2:value2', stream_stable=False) == '{"key": "val\\n123", "key2": "value2"}'
154
+ )
155
+ assert repair_json('{"key": "val\\n123,`key2:value2`"}', stream_stable=True) == '{"key": "val\\n123,`key2:value2`"}'
156
+ # stream_stable = True
157
+ assert repair_json('{"key": "val\\', stream_stable=True) == '{"key": "val"}'
158
+ assert repair_json('{"key": "val\\n', stream_stable=True) == '{"key": "val\\n"}'
159
+ assert repair_json('{"key": "val\\n123,`key2:value2', stream_stable=True) == '{"key": "val\\n123,`key2:value2"}'
160
+ assert repair_json('{"key": "val\\n123,`key2:value2`"}', stream_stable=True) == '{"key": "val\\n123,`key2:value2`"}'
161
+ assert repair_json('[{"key": "value", "key', stream_stable=True) == '[{"key": "value"}]'
@@ -0,0 +1,37 @@
1
+ from src.json_repair.json_repair import repair_json
2
+
3
+
4
+ def test_parse_array():
5
+ assert repair_json("[]", return_objects=True) == []
6
+ assert repair_json("[1, 2, 3, 4]", return_objects=True) == [1, 2, 3, 4]
7
+ assert repair_json("[", return_objects=True) == []
8
+ assert repair_json("[[1\n\n]") == "[[1]]"
9
+
10
+
11
+ def test_parse_array_edge_cases():
12
+ assert repair_json("[{]") == "[{}]"
13
+ assert repair_json("[") == "[]"
14
+ assert repair_json('["') == "[]"
15
+ assert repair_json("]") == ""
16
+ assert repair_json("[1, 2, 3,") == "[1, 2, 3]"
17
+ assert repair_json("[1, 2, 3, ...]") == "[1, 2, 3]"
18
+ assert repair_json("[1, 2, ... , 3]") == "[1, 2, 3]"
19
+ assert repair_json("[1, 2, '...', 3]") == '[1, 2, "...", 3]'
20
+ assert repair_json("[true, false, null, ...]") == "[true, false, null]"
21
+ assert repair_json('["a" "b" "c" 1') == '["a", "b", "c", 1]'
22
+ assert repair_json('{"employees":["John", "Anna",') == '{"employees": ["John", "Anna"]}'
23
+ assert repair_json('{"employees":["John", "Anna", "Peter') == '{"employees": ["John", "Anna", "Peter"]}'
24
+ assert repair_json('{"key1": {"key2": [1, 2, 3') == '{"key1": {"key2": [1, 2, 3]}}'
25
+ assert repair_json('{"key": ["value]}') == '{"key": ["value"]}'
26
+ assert repair_json('["lorem "ipsum" sic"]') == '["lorem \\"ipsum\\" sic"]'
27
+ assert (
28
+ repair_json('{"key1": ["value1", "value2"}, "key2": ["value3", "value4"]}')
29
+ == '{"key1": ["value1", "value2"], "key2": ["value3", "value4"]}'
30
+ )
31
+ assert repair_json('{"key": ["value" "value1" "value2"]}') == '{"key": ["value", "value1", "value2"]}'
32
+ assert (
33
+ repair_json('{"key": ["lorem "ipsum" dolor "sit" amet, "consectetur" ", "lorem "ipsum" dolor", "lorem"]}')
34
+ == '{"key": ["lorem \\"ipsum\\" dolor \\"sit\\" amet, \\"consectetur\\" ", "lorem \\"ipsum\\" dolor", "lorem"]}'
35
+ )
36
+ assert repair_json('{"k"e"y": "value"}') == '{"k\\"e\\"y": "value"}'
37
+ assert repair_json('["key":"value"}]') == '[{"key": "value"}]'
@@ -0,0 +1,12 @@
1
+ from src.json_repair.json_repair import repair_json
2
+
3
+
4
+ def test_parse_boolean_or_null():
5
+ assert repair_json("True", return_objects=True) == ""
6
+ assert repair_json("False", return_objects=True) == ""
7
+ assert repair_json("Null", return_objects=True) == ""
8
+ assert repair_json("true", return_objects=True)
9
+ assert not repair_json("false", return_objects=True)
10
+ assert repair_json("null", return_objects=True) is None
11
+ assert repair_json(' {"key": true, "key2": false, "key3": null}') == '{"key": true, "key2": false, "key3": null}'
12
+ assert repair_json('{"key": TRUE, "key2": FALSE, "key3": Null} ') == '{"key": true, "key2": false, "key3": null}'
@@ -2,6 +2,7 @@ from src.json_repair.json_repair import repair_json
2
2
 
3
3
 
4
4
  def test_parse_comment():
5
+ assert repair_json("/") == ""
5
6
  assert repair_json('/* comment */ {"key": "value"}')
6
7
  assert (
7
8
  repair_json('{ "key": { "key2": "value2" // comment }, "key3": "value3" }')
@@ -0,0 +1,27 @@
1
+ from src.json_repair.json_repair import repair_json
2
+
3
+
4
+ def test_parse_number():
5
+ assert repair_json("1", return_objects=True) == 1
6
+ assert repair_json("1.2", return_objects=True) == 1.2
7
+
8
+
9
+ def test_parse_number_edge_cases():
10
+ assert (
11
+ repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'
12
+ )
13
+ assert repair_json('{"key": 1/3}') == '{"key": "1/3"}'
14
+ assert repair_json('{"key": .25}') == '{"key": 0.25}'
15
+ assert repair_json('{"here": "now", "key": 1/3, "foo": "bar"}') == '{"here": "now", "key": "1/3", "foo": "bar"}'
16
+ assert repair_json('{"key": 12345/67890}') == '{"key": "12345/67890"}'
17
+ assert repair_json("[105,12") == "[105, 12]"
18
+ assert repair_json('{"key", 105,12,') == '{"key": "105,12"}'
19
+ assert repair_json('{"key": 1/3, "foo": "bar"}') == '{"key": "1/3", "foo": "bar"}'
20
+ assert repair_json('{"key": 10-20}') == '{"key": "10-20"}'
21
+ assert repair_json('{"key": 1.1.1}') == '{"key": "1.1.1"}'
22
+ assert repair_json("[- ") == "[]"
23
+ assert repair_json('{"key": 1. }') == '{"key": 1.0}'
24
+ assert repair_json('{"key": 1e10 }') == '{"key": 10000000000.0}'
25
+ assert repair_json('{"key": 1e }') == '{"key": 1}'
26
+ assert repair_json('{"key": 1notanumber }') == '{"key": "1notanumber"}'
27
+ assert repair_json("[1, 2notanumber]") == '[1, "2notanumber"]'
@@ -0,0 +1,85 @@
1
+ from src.json_repair.json_repair import repair_json
2
+
3
+
4
+ def test_parse_object():
5
+ assert repair_json("{}", return_objects=True) == {}
6
+ assert repair_json('{ "key": "value", "key2": 1, "key3": True }', return_objects=True) == {
7
+ "key": "value",
8
+ "key2": 1,
9
+ "key3": True,
10
+ }
11
+ assert repair_json("{", return_objects=True) == {}
12
+ assert repair_json('{ "key": value, "key2": 1 "key3": null }', return_objects=True) == {
13
+ "key": "value",
14
+ "key2": 1,
15
+ "key3": None,
16
+ }
17
+ assert repair_json(" { } ") == "{}"
18
+ assert repair_json("{") == "{}"
19
+ assert repair_json("}") == ""
20
+ assert repair_json('{"') == "{}"
21
+
22
+
23
+ def test_parse_object_edge_cases():
24
+ assert repair_json("{foo: [}") == '{"foo": []}'
25
+ assert repair_json("{ ") == "{}"
26
+ assert repair_json('{"": "value"') == '{"": "value"}'
27
+ assert repair_json('{"value_1": true, COMMENT "value_2": "data"}') == '{"value_1": true, "value_2": "data"}'
28
+ assert (
29
+ repair_json('{"value_1": true, SHOULD_NOT_EXIST "value_2": "data" AAAA }')
30
+ == '{"value_1": true, "value_2": "data"}'
31
+ )
32
+ assert repair_json('{"" : true, "key2": "value2"}') == '{"": true, "key2": "value2"}'
33
+ assert (
34
+ repair_json("""{""answer"":[{""traits"":''Female aged 60+'',""answer1"":""5""}]}""")
35
+ == '{"answer": [{"traits": "Female aged 60+", "answer1": "5"}]}'
36
+ )
37
+ assert (
38
+ repair_json('{ "words": abcdef", "numbers": 12345", "words2": ghijkl" }')
39
+ == '{"words": "abcdef", "numbers": 12345, "words2": "ghijkl"}'
40
+ )
41
+ assert (
42
+ repair_json("""{"number": 1,"reason": "According...""ans": "YES"}""")
43
+ == '{"number": 1, "reason": "According...", "ans": "YES"}'
44
+ )
45
+ assert repair_json("""{ "a" : "{ b": {} }" }""") == '{"a": "{ b"}'
46
+ assert repair_json("""{"b": "xxxxx" true}""") == '{"b": "xxxxx"}'
47
+ assert repair_json('{"key": "Lorem "ipsum" s,"}') == '{"key": "Lorem \\"ipsum\\" s,"}'
48
+ assert repair_json('{"lorem": ipsum, sic, datum.",}') == '{"lorem": "ipsum, sic, datum."}'
49
+ assert (
50
+ repair_json('{"lorem": sic tamet. "ipsum": sic tamet, quick brown fox. "sic": ipsum}')
51
+ == '{"lorem": "sic tamet.", "ipsum": "sic tamet", "sic": "ipsum"}'
52
+ )
53
+ assert (
54
+ repair_json('{"lorem_ipsum": "sic tamet, quick brown fox. }')
55
+ == '{"lorem_ipsum": "sic tamet, quick brown fox."}'
56
+ )
57
+ assert repair_json('{"key":value, " key2":"value2" }') == '{"key": "value", " key2": "value2"}'
58
+ assert repair_json('{"key":value "key2":"value2" }') == '{"key": "value", "key2": "value2"}'
59
+ assert (
60
+ repair_json("{'text': 'words{words in brackets}more words'}")
61
+ == '{"text": "words{words in brackets}more words"}'
62
+ )
63
+ assert repair_json("{text:words{words in brackets}}") == '{"text": "words{words in brackets}"}'
64
+ assert repair_json("{text:words{words in brackets}m}") == '{"text": "words{words in brackets}m"}'
65
+ assert repair_json('{"key": "value, value2"```') == '{"key": "value, value2"}'
66
+ assert repair_json("{key:value,key2:value2}") == '{"key": "value", "key2": "value2"}'
67
+ assert repair_json('{"key:"value"}') == '{"key": "value"}'
68
+ assert repair_json('{"key:value}') == '{"key": "value"}'
69
+ assert (
70
+ repair_json('[{"lorem": {"ipsum": "sic"}, """" "lorem": {"ipsum": "sic"}]')
71
+ == '[{"lorem": {"ipsum": "sic"}}, {"lorem": {"ipsum": "sic"}}]'
72
+ )
73
+ assert (
74
+ repair_json('{ "key": ["arrayvalue"], ["arrayvalue1"], ["arrayvalue2"], "key3": "value3" }')
75
+ == '{"key": ["arrayvalue", "arrayvalue1", "arrayvalue2"], "key3": "value3"}'
76
+ )
77
+ assert (
78
+ repair_json('{ "key": ["arrayvalue"], "key3": "value3", ["arrayvalue1"] }')
79
+ == '{"key": ["arrayvalue"], "key3": "value3", "arrayvalue1": ""}'
80
+ )
81
+ assert (
82
+ repair_json('{"key": "{\\\\"key\\\\\\":[\\"value\\\\\\"],\\"key2\\":"value2"}"}')
83
+ == '{"key": "{\\"key\\":[\\"value\\"],\\"key2\\":\\"value2\\"}"}'
84
+ )
85
+ assert repair_json('{"key": , "key2": "value2"}') == '{"key": "", "key2": "value2"}'