json-repair 0.47.5__tar.gz → 0.47.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {json_repair-0.47.5/src/json_repair.egg-info → json_repair-0.47.6}/PKG-INFO +1 -1
  2. {json_repair-0.47.5 → json_repair-0.47.6}/pyproject.toml +1 -1
  3. {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/parse_string.py +18 -1
  4. {json_repair-0.47.5 → json_repair-0.47.6/src/json_repair.egg-info}/PKG-INFO +1 -1
  5. {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair.egg-info/SOURCES.txt +8 -1
  6. json_repair-0.47.6/tests/test_json_repair.py +160 -0
  7. json_repair-0.47.6/tests/test_parse_array.py +37 -0
  8. json_repair-0.47.6/tests/test_parse_boolean_or_null.py +12 -0
  9. {json_repair-0.47.5 → json_repair-0.47.6}/tests/test_parse_comment.py +1 -0
  10. json_repair-0.47.6/tests/test_parse_number.py +27 -0
  11. json_repair-0.47.6/tests/test_parse_object.py +85 -0
  12. json_repair-0.47.6/tests/test_parse_string.py +99 -0
  13. json_repair-0.47.6/tests/test_repair_json_cli.py +67 -0
  14. json_repair-0.47.5/tests/test_json_repair.py → json_repair-0.47.6/tests/test_repair_json_from_file.py +1 -479
  15. {json_repair-0.47.5 → json_repair-0.47.6}/LICENSE +0 -0
  16. {json_repair-0.47.5 → json_repair-0.47.6}/README.md +0 -0
  17. {json_repair-0.47.5 → json_repair-0.47.6}/setup.cfg +0 -0
  18. {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/__init__.py +0 -0
  19. {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/__main__.py +0 -0
  20. {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/constants.py +0 -0
  21. {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/json_context.py +0 -0
  22. {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/json_parser.py +0 -0
  23. {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/json_repair.py +0 -0
  24. {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/object_comparer.py +0 -0
  25. {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/parse_array.py +0 -0
  26. {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/parse_boolean_or_null.py +0 -0
  27. {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/parse_comment.py +0 -0
  28. {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/parse_number.py +0 -0
  29. {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/parse_object.py +0 -0
  30. {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/py.typed +0 -0
  31. {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair/string_file_wrapper.py +0 -0
  32. {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair.egg-info/dependency_links.txt +0 -0
  33. {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair.egg-info/entry_points.txt +0 -0
  34. {json_repair-0.47.5 → json_repair-0.47.6}/src/json_repair.egg-info/top_level.txt +0 -0
  35. {json_repair-0.47.5 → json_repair-0.47.6}/tests/test_performance.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: json_repair
3
- Version: 0.47.5
3
+ Version: 0.47.6
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
3
3
  build-backend = "setuptools.build_meta"
4
4
  [project]
5
5
  name = "json_repair"
6
- version = "0.47.5"
6
+ version = "0.47.6"
7
7
  license = {file = "LICENSE"}
8
8
  authors = [
9
9
  { name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
@@ -328,7 +328,24 @@ def parse_string(self) -> str | bool | None:
328
328
  if all(str(self.get_char_at(j)).isspace() for j in range(1, i) if self.get_char_at(j)):
329
329
  break
330
330
  if self.context.current == ContextValues.OBJECT_VALUE:
331
- # But this might not be it! This could be just a missing comma
331
+ i = self.skip_whitespaces_at(idx=i + 1, move_main_index=False)
332
+ if self.get_char_at(i) == ",":
333
+ # So we found a comma, this could be a case of a single quote like "va"lue",
334
+ # Search if it's followed by another key, starting with the first delimeter
335
+ i = self.skip_to_character(character=lstring_delimiter, idx=i + 1)
336
+ i += 1
337
+ i = self.skip_to_character(character=rstring_delimiter, idx=i + 1)
338
+ i += 1
339
+ i = self.skip_whitespaces_at(idx=i, move_main_index=False)
340
+ next_c = self.get_char_at(i)
341
+ if next_c == ":":
342
+ self.log(
343
+ "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
344
+ )
345
+ string_acc += str(char)
346
+ self.index += 1
347
+ char = self.get_char_at()
348
+ continue
332
349
  # We found a delimiter and we need to check if this is a key
333
350
  # so find a rstring_delimiter and a colon after
334
351
  i = self.skip_to_character(character=rstring_delimiter, idx=i + 1)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: json_repair
3
- Version: 0.47.5
3
+ Version: 0.47.6
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -22,5 +22,12 @@ src/json_repair.egg-info/dependency_links.txt
22
22
  src/json_repair.egg-info/entry_points.txt
23
23
  src/json_repair.egg-info/top_level.txt
24
24
  tests/test_json_repair.py
25
+ tests/test_parse_array.py
26
+ tests/test_parse_boolean_or_null.py
25
27
  tests/test_parse_comment.py
26
- tests/test_performance.py
28
+ tests/test_parse_number.py
29
+ tests/test_parse_object.py
30
+ tests/test_parse_string.py
31
+ tests/test_performance.py
32
+ tests/test_repair_json_cli.py
33
+ tests/test_repair_json_from_file.py
@@ -0,0 +1,160 @@
1
+ from src.json_repair.json_repair import loads, repair_json
2
+
3
+
4
+ def test_valid_json():
5
+ assert (
6
+ repair_json('{"name": "John", "age": 30, "city": "New York"}')
7
+ == '{"name": "John", "age": 30, "city": "New York"}'
8
+ )
9
+ assert repair_json('{"employees":["John", "Anna", "Peter"]} ') == '{"employees": ["John", "Anna", "Peter"]}'
10
+ assert repair_json('{"key": "value:value"}') == '{"key": "value:value"}'
11
+ assert repair_json('{"text": "The quick brown fox,"}') == '{"text": "The quick brown fox,"}'
12
+ assert repair_json('{"text": "The quick brown fox won\'t jump"}') == '{"text": "The quick brown fox won\'t jump"}'
13
+ assert repair_json('{"key": ""') == '{"key": ""}'
14
+ assert repair_json('{"key1": {"key2": [1, 2, 3]}}') == '{"key1": {"key2": [1, 2, 3]}}'
15
+ assert repair_json('{"key": 12345678901234567890}') == '{"key": 12345678901234567890}'
16
+ assert repair_json('{"key": "value\u263a"}') == '{"key": "value\\u263a"}'
17
+ assert repair_json('{"key": "value\\nvalue"}') == '{"key": "value\\nvalue"}'
18
+
19
+
20
+ def test_multiple_jsons():
21
+ assert repair_json("[]{}") == "[[], {}]"
22
+ assert repair_json("{}[]{}") == "[{}, [], {}]"
23
+ assert repair_json('{"key":"value"}[1,2,3,True]') == '[{"key": "value"}, [1, 2, 3, true]]'
24
+ assert (
25
+ repair_json('lorem ```json {"key":"value"} ``` ipsum ```json [1,2,3,True] ``` 42')
26
+ == '[{"key": "value"}, [1, 2, 3, true]]'
27
+ )
28
+ assert repair_json('[{"key":"value"}][{"key":"value_after"}]') == '[{"key": "value_after"}]'
29
+
30
+
31
+ def test_repair_json_with_objects():
32
+ # Test with valid JSON strings
33
+ assert repair_json("[]", return_objects=True) == []
34
+ assert repair_json("{}", return_objects=True) == {}
35
+ assert repair_json('{"key": true, "key2": false, "key3": null}', return_objects=True) == {
36
+ "key": True,
37
+ "key2": False,
38
+ "key3": None,
39
+ }
40
+ assert repair_json('{"name": "John", "age": 30, "city": "New York"}', return_objects=True) == {
41
+ "name": "John",
42
+ "age": 30,
43
+ "city": "New York",
44
+ }
45
+ assert repair_json("[1, 2, 3, 4]", return_objects=True) == [1, 2, 3, 4]
46
+ assert repair_json('{"employees":["John", "Anna", "Peter"]} ', return_objects=True) == {
47
+ "employees": ["John", "Anna", "Peter"]
48
+ }
49
+ assert repair_json(
50
+ """
51
+ {
52
+ "resourceType": "Bundle",
53
+ "id": "1",
54
+ "type": "collection",
55
+ "entry": [
56
+ {
57
+ "resource": {
58
+ "resourceType": "Patient",
59
+ "id": "1",
60
+ "name": [
61
+ {"use": "official", "family": "Corwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."},
62
+ {"use": "maiden", "family": "Goodwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."]}
63
+ ]
64
+ }
65
+ }
66
+ ]
67
+ }
68
+ """,
69
+ return_objects=True,
70
+ ) == {
71
+ "resourceType": "Bundle",
72
+ "id": "1",
73
+ "type": "collection",
74
+ "entry": [
75
+ {
76
+ "resource": {
77
+ "resourceType": "Patient",
78
+ "id": "1",
79
+ "name": [
80
+ {
81
+ "use": "official",
82
+ "family": "Corwin",
83
+ "given": ["Keisha", "Sunny"],
84
+ "prefix": ["Mrs."],
85
+ },
86
+ {
87
+ "use": "maiden",
88
+ "family": "Goodwin",
89
+ "given": ["Keisha", "Sunny"],
90
+ "prefix": ["Mrs."],
91
+ },
92
+ ],
93
+ }
94
+ }
95
+ ],
96
+ }
97
+ assert repair_json(
98
+ '{\n"html": "<h3 id="aaa">Waarom meer dan 200 Technical Experts - "Passie voor techniek"?</h3>"}',
99
+ return_objects=True,
100
+ ) == {"html": '<h3 id="aaa">Waarom meer dan 200 Technical Experts - "Passie voor techniek"?</h3>'}
101
+ assert repair_json(
102
+ """
103
+ [
104
+ {
105
+ "foo": "Foo bar baz",
106
+ "tag": "#foo-bar-baz"
107
+ },
108
+ {
109
+ "foo": "foo bar "foobar" foo bar baz.",
110
+ "tag": "#foo-bar-foobar"
111
+ }
112
+ ]
113
+ """,
114
+ return_objects=True,
115
+ ) == [
116
+ {"foo": "Foo bar baz", "tag": "#foo-bar-baz"},
117
+ {"foo": 'foo bar "foobar" foo bar baz.', "tag": "#foo-bar-foobar"},
118
+ ]
119
+
120
+
121
+ def test_repair_json_skip_json_loads():
122
+ assert (
123
+ repair_json('{"key": true, "key2": false, "key3": null}', skip_json_loads=True)
124
+ == '{"key": true, "key2": false, "key3": null}'
125
+ )
126
+ assert repair_json(
127
+ '{"key": true, "key2": false, "key3": null}',
128
+ return_objects=True,
129
+ skip_json_loads=True,
130
+ ) == {"key": True, "key2": False, "key3": None}
131
+ assert (
132
+ repair_json('{"key": true, "key2": false, "key3": }', skip_json_loads=True)
133
+ == '{"key": true, "key2": false, "key3": ""}'
134
+ )
135
+ assert loads('{"key": true, "key2": false, "key3": }', skip_json_loads=True) == {
136
+ "key": True,
137
+ "key2": False,
138
+ "key3": "",
139
+ }
140
+
141
+
142
+ def test_ensure_ascii():
143
+ assert repair_json("{'test_中国人_ascii':'统一码'}", ensure_ascii=False) == '{"test_中国人_ascii": "统一码"}'
144
+
145
+
146
+ def test_stream_stable():
147
+ # default: stream_stable = False
148
+ # When the json to be repaired is the accumulation of streaming json at a certain moment.
149
+ # The default repair result is unstable.
150
+ assert repair_json('{"key": "val\\', stream_stable=False) == '{"key": "val\\\\"}'
151
+ assert repair_json('{"key": "val\\n', stream_stable=False) == '{"key": "val"}'
152
+ assert (
153
+ repair_json('{"key": "val\\n123,`key2:value2', stream_stable=False) == '{"key": "val\\n123", "key2": "value2"}'
154
+ )
155
+ assert repair_json('{"key": "val\\n123,`key2:value2`"}', stream_stable=True) == '{"key": "val\\n123,`key2:value2`"}'
156
+ # stream_stable = True
157
+ assert repair_json('{"key": "val\\', stream_stable=True) == '{"key": "val"}'
158
+ assert repair_json('{"key": "val\\n', stream_stable=True) == '{"key": "val\\n"}'
159
+ assert repair_json('{"key": "val\\n123,`key2:value2', stream_stable=True) == '{"key": "val\\n123,`key2:value2"}'
160
+ assert repair_json('{"key": "val\\n123,`key2:value2`"}', stream_stable=True) == '{"key": "val\\n123,`key2:value2`"}'
@@ -0,0 +1,37 @@
1
+ from src.json_repair.json_repair import repair_json
2
+
3
+
4
+ def test_parse_array():
5
+ assert repair_json("[]", return_objects=True) == []
6
+ assert repair_json("[1, 2, 3, 4]", return_objects=True) == [1, 2, 3, 4]
7
+ assert repair_json("[", return_objects=True) == []
8
+ assert repair_json("[[1\n\n]") == "[[1]]"
9
+
10
+
11
+ def test_parse_array_edge_cases():
12
+ assert repair_json("[{]") == "[{}]"
13
+ assert repair_json("[") == "[]"
14
+ assert repair_json('["') == "[]"
15
+ assert repair_json("]") == ""
16
+ assert repair_json("[1, 2, 3,") == "[1, 2, 3]"
17
+ assert repair_json("[1, 2, 3, ...]") == "[1, 2, 3]"
18
+ assert repair_json("[1, 2, ... , 3]") == "[1, 2, 3]"
19
+ assert repair_json("[1, 2, '...', 3]") == '[1, 2, "...", 3]'
20
+ assert repair_json("[true, false, null, ...]") == "[true, false, null]"
21
+ assert repair_json('["a" "b" "c" 1') == '["a", "b", "c", 1]'
22
+ assert repair_json('{"employees":["John", "Anna",') == '{"employees": ["John", "Anna"]}'
23
+ assert repair_json('{"employees":["John", "Anna", "Peter') == '{"employees": ["John", "Anna", "Peter"]}'
24
+ assert repair_json('{"key1": {"key2": [1, 2, 3') == '{"key1": {"key2": [1, 2, 3]}}'
25
+ assert repair_json('{"key": ["value]}') == '{"key": ["value"]}'
26
+ assert repair_json('["lorem "ipsum" sic"]') == '["lorem \\"ipsum\\" sic"]'
27
+ assert (
28
+ repair_json('{"key1": ["value1", "value2"}, "key2": ["value3", "value4"]}')
29
+ == '{"key1": ["value1", "value2"], "key2": ["value3", "value4"]}'
30
+ )
31
+ assert repair_json('{"key": ["value" "value1" "value2"]}') == '{"key": ["value", "value1", "value2"]}'
32
+ assert (
33
+ repair_json('{"key": ["lorem "ipsum" dolor "sit" amet, "consectetur" ", "lorem "ipsum" dolor", "lorem"]}')
34
+ == '{"key": ["lorem \\"ipsum\\" dolor \\"sit\\" amet, \\"consectetur\\" ", "lorem \\"ipsum\\" dolor", "lorem"]}'
35
+ )
36
+ assert repair_json('{"k"e"y": "value"}') == '{"k\\"e\\"y": "value"}'
37
+ assert repair_json('["key":"value"}]') == '[{"key": "value"}]'
@@ -0,0 +1,12 @@
1
+ from src.json_repair.json_repair import repair_json
2
+
3
+
4
+ def test_parse_boolean_or_null():
5
+ assert repair_json("True", return_objects=True) == ""
6
+ assert repair_json("False", return_objects=True) == ""
7
+ assert repair_json("Null", return_objects=True) == ""
8
+ assert repair_json("true", return_objects=True)
9
+ assert not repair_json("false", return_objects=True)
10
+ assert repair_json("null", return_objects=True) is None
11
+ assert repair_json(' {"key": true, "key2": false, "key3": null}') == '{"key": true, "key2": false, "key3": null}'
12
+ assert repair_json('{"key": TRUE, "key2": FALSE, "key3": Null} ') == '{"key": true, "key2": false, "key3": null}'
@@ -2,6 +2,7 @@ from src.json_repair.json_repair import repair_json
2
2
 
3
3
 
4
4
  def test_parse_comment():
5
+ assert repair_json("/") == ""
5
6
  assert repair_json('/* comment */ {"key": "value"}')
6
7
  assert (
7
8
  repair_json('{ "key": { "key2": "value2" // comment }, "key3": "value3" }')
@@ -0,0 +1,27 @@
1
+ from src.json_repair.json_repair import repair_json
2
+
3
+
4
+ def test_parse_number():
5
+ assert repair_json("1", return_objects=True) == 1
6
+ assert repair_json("1.2", return_objects=True) == 1.2
7
+
8
+
9
+ def test_parse_number_edge_cases():
10
+ assert (
11
+ repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'
12
+ )
13
+ assert repair_json('{"key": 1/3}') == '{"key": "1/3"}'
14
+ assert repair_json('{"key": .25}') == '{"key": 0.25}'
15
+ assert repair_json('{"here": "now", "key": 1/3, "foo": "bar"}') == '{"here": "now", "key": "1/3", "foo": "bar"}'
16
+ assert repair_json('{"key": 12345/67890}') == '{"key": "12345/67890"}'
17
+ assert repair_json("[105,12") == "[105, 12]"
18
+ assert repair_json('{"key", 105,12,') == '{"key": "105,12"}'
19
+ assert repair_json('{"key": 1/3, "foo": "bar"}') == '{"key": "1/3", "foo": "bar"}'
20
+ assert repair_json('{"key": 10-20}') == '{"key": "10-20"}'
21
+ assert repair_json('{"key": 1.1.1}') == '{"key": "1.1.1"}'
22
+ assert repair_json("[- ") == "[]"
23
+ assert repair_json('{"key": 1. }') == '{"key": 1.0}'
24
+ assert repair_json('{"key": 1e10 }') == '{"key": 10000000000.0}'
25
+ assert repair_json('{"key": 1e }') == '{"key": 1}'
26
+ assert repair_json('{"key": 1notanumber }') == '{"key": "1notanumber"}'
27
+ assert repair_json("[1, 2notanumber]") == '[1, "2notanumber"]'
@@ -0,0 +1,85 @@
1
+ from src.json_repair.json_repair import repair_json
2
+
3
+
4
+ def test_parse_object():
5
+ assert repair_json("{}", return_objects=True) == {}
6
+ assert repair_json('{ "key": "value", "key2": 1, "key3": True }', return_objects=True) == {
7
+ "key": "value",
8
+ "key2": 1,
9
+ "key3": True,
10
+ }
11
+ assert repair_json("{", return_objects=True) == {}
12
+ assert repair_json('{ "key": value, "key2": 1 "key3": null }', return_objects=True) == {
13
+ "key": "value",
14
+ "key2": 1,
15
+ "key3": None,
16
+ }
17
+ assert repair_json(" { } ") == "{}"
18
+ assert repair_json("{") == "{}"
19
+ assert repair_json("}") == ""
20
+ assert repair_json('{"') == "{}"
21
+
22
+
23
+ def test_parse_object_edge_cases():
24
+ assert repair_json("{foo: [}") == '{"foo": []}'
25
+ assert repair_json("{ ") == "{}"
26
+ assert repair_json('{"": "value"') == '{"": "value"}'
27
+ assert repair_json('{"value_1": true, COMMENT "value_2": "data"}') == '{"value_1": true, "value_2": "data"}'
28
+ assert (
29
+ repair_json('{"value_1": true, SHOULD_NOT_EXIST "value_2": "data" AAAA }')
30
+ == '{"value_1": true, "value_2": "data"}'
31
+ )
32
+ assert repair_json('{"" : true, "key2": "value2"}') == '{"": true, "key2": "value2"}'
33
+ assert (
34
+ repair_json("""{""answer"":[{""traits"":''Female aged 60+'',""answer1"":""5""}]}""")
35
+ == '{"answer": [{"traits": "Female aged 60+", "answer1": "5"}]}'
36
+ )
37
+ assert (
38
+ repair_json('{ "words": abcdef", "numbers": 12345", "words2": ghijkl" }')
39
+ == '{"words": "abcdef", "numbers": 12345, "words2": "ghijkl"}'
40
+ )
41
+ assert (
42
+ repair_json("""{"number": 1,"reason": "According...""ans": "YES"}""")
43
+ == '{"number": 1, "reason": "According...", "ans": "YES"}'
44
+ )
45
+ assert repair_json("""{ "a" : "{ b": {} }" }""") == '{"a": "{ b"}'
46
+ assert repair_json("""{"b": "xxxxx" true}""") == '{"b": "xxxxx"}'
47
+ assert repair_json('{"key": "Lorem "ipsum" s,"}') == '{"key": "Lorem \\"ipsum\\" s,"}'
48
+ assert repair_json('{"lorem": ipsum, sic, datum.",}') == '{"lorem": "ipsum, sic, datum."}'
49
+ assert (
50
+ repair_json('{"lorem": sic tamet. "ipsum": sic tamet, quick brown fox. "sic": ipsum}')
51
+ == '{"lorem": "sic tamet.", "ipsum": "sic tamet", "sic": "ipsum"}'
52
+ )
53
+ assert (
54
+ repair_json('{"lorem_ipsum": "sic tamet, quick brown fox. }')
55
+ == '{"lorem_ipsum": "sic tamet, quick brown fox."}'
56
+ )
57
+ assert repair_json('{"key":value, " key2":"value2" }') == '{"key": "value", " key2": "value2"}'
58
+ assert repair_json('{"key":value "key2":"value2" }') == '{"key": "value", "key2": "value2"}'
59
+ assert (
60
+ repair_json("{'text': 'words{words in brackets}more words'}")
61
+ == '{"text": "words{words in brackets}more words"}'
62
+ )
63
+ assert repair_json("{text:words{words in brackets}}") == '{"text": "words{words in brackets}"}'
64
+ assert repair_json("{text:words{words in brackets}m}") == '{"text": "words{words in brackets}m"}'
65
+ assert repair_json('{"key": "value, value2"```') == '{"key": "value, value2"}'
66
+ assert repair_json("{key:value,key2:value2}") == '{"key": "value", "key2": "value2"}'
67
+ assert repair_json('{"key:"value"}') == '{"key": "value"}'
68
+ assert repair_json('{"key:value}') == '{"key": "value"}'
69
+ assert (
70
+ repair_json('[{"lorem": {"ipsum": "sic"}, """" "lorem": {"ipsum": "sic"}]')
71
+ == '[{"lorem": {"ipsum": "sic"}}, {"lorem": {"ipsum": "sic"}}]'
72
+ )
73
+ assert (
74
+ repair_json('{ "key": ["arrayvalue"], ["arrayvalue1"], ["arrayvalue2"], "key3": "value3" }')
75
+ == '{"key": ["arrayvalue", "arrayvalue1", "arrayvalue2"], "key3": "value3"}'
76
+ )
77
+ assert (
78
+ repair_json('{ "key": ["arrayvalue"], "key3": "value3", ["arrayvalue1"] }')
79
+ == '{"key": ["arrayvalue"], "key3": "value3", "arrayvalue1": ""}'
80
+ )
81
+ assert (
82
+ repair_json('{"key": "{\\\\"key\\\\\\":[\\"value\\\\\\"],\\"key2\\":"value2"}"}')
83
+ == '{"key": "{\\"key\\":[\\"value\\"],\\"key2\\":\\"value2\\"}"}'
84
+ )
85
+ assert repair_json('{"key": , "key2": "value2"}') == '{"key": "", "key2": "value2"}'
@@ -0,0 +1,99 @@
1
+ from src.json_repair.json_repair import repair_json
2
+
3
+
4
+ def test_parse_string():
5
+ assert repair_json('"') == ""
6
+ assert repair_json("\n") == ""
7
+ assert repair_json(" ") == ""
8
+ assert repair_json("string") == ""
9
+ assert repair_json("stringbeforeobject {}") == "{}"
10
+
11
+
12
+ def test_missing_and_mixed_quotes():
13
+ assert (
14
+ repair_json("{'key': 'string', 'key2': false, \"key3\": null, \"key4\": unquoted}")
15
+ == '{"key": "string", "key2": false, "key3": null, "key4": "unquoted"}'
16
+ )
17
+ assert (
18
+ repair_json('{"name": "John", "age": 30, "city": "New York')
19
+ == '{"name": "John", "age": 30, "city": "New York"}'
20
+ )
21
+ assert (
22
+ repair_json('{"name": "John", "age": 30, city: "New York"}')
23
+ == '{"name": "John", "age": 30, "city": "New York"}'
24
+ )
25
+ assert (
26
+ repair_json('{"name": "John", "age": 30, "city": New York}')
27
+ == '{"name": "John", "age": 30, "city": "New York"}'
28
+ )
29
+ assert (
30
+ repair_json('{"name": John, "age": 30, "city": "New York"}')
31
+ == '{"name": "John", "age": 30, "city": "New York"}'
32
+ )
33
+ assert repair_json('{“slanted_delimiter”: "value"}') == '{"slanted_delimiter": "value"}'
34
+ assert repair_json('{"name": "John", "age": 30, "city": "New') == '{"name": "John", "age": 30, "city": "New"}'
35
+ assert (
36
+ repair_json('{"name": "John", "age": 30, "city": "New York, "gender": "male"}')
37
+ == '{"name": "John", "age": 30, "city": "New York", "gender": "male"}'
38
+ )
39
+
40
+ assert (
41
+ repair_json('[{"key": "value", COMMENT "notes": "lorem "ipsum", sic." }]')
42
+ == '[{"key": "value", "notes": "lorem \\"ipsum\\", sic."}]'
43
+ )
44
+ assert repair_json('{"key": ""value"}') == '{"key": "value"}'
45
+ assert repair_json('{"key": "value", 5: "value"}') == '{"key": "value", "5": "value"}'
46
+ assert repair_json('{"foo": "\\"bar\\""') == '{"foo": "\\"bar\\""}'
47
+ assert repair_json('{"" key":"val"') == '{" key": "val"}'
48
+ assert repair_json('{"key": value "key2" : "value2" ') == '{"key": "value", "key2": "value2"}'
49
+ assert (
50
+ repair_json('{"key": "lorem ipsum ... "sic " tamet. ...}') == '{"key": "lorem ipsum ... \\"sic \\" tamet. ..."}'
51
+ )
52
+ assert repair_json('{"key": value , }') == '{"key": "value"}'
53
+ assert (
54
+ repair_json('{"comment": "lorem, "ipsum" sic "tamet". To improve"}')
55
+ == '{"comment": "lorem, \\"ipsum\\" sic \\"tamet\\". To improve"}'
56
+ )
57
+ assert repair_json('{"key": "v"alu"e"} key:') == '{"key": "v\\"alu\\"e"}'
58
+ assert repair_json('{"key": "v"alue", "key2": "value2"}') == '{"key": "v\\"alue", "key2": "value2"}'
59
+
60
+
61
+ def test_escaping():
62
+ assert repair_json("'\"'") == ""
63
+ assert repair_json('{"key": \'string"\n\t\\le\'') == '{"key": "string\\"\\n\\t\\\\le"}'
64
+ assert (
65
+ repair_json(
66
+ r'{"real_content": "Some string: Some other string \t Some string <a href=\"https://domain.com\">Some link</a>"'
67
+ )
68
+ == r'{"real_content": "Some string: Some other string \t Some string <a href=\"https://domain.com\">Some link</a>"}'
69
+ )
70
+ assert repair_json('{"key_1\n": "value"}') == '{"key_1": "value"}'
71
+ assert repair_json('{"key\t_": "value"}') == '{"key\\t_": "value"}'
72
+ assert repair_json("{\"key\": '\u0076\u0061\u006c\u0075\u0065'}") == '{"key": "value"}'
73
+ assert repair_json('{"key": "\\u0076\\u0061\\u006C\\u0075\\u0065"}', skip_json_loads=True) == '{"key": "value"}'
74
+
75
+
76
+ def test_markdown():
77
+ assert (
78
+ repair_json('{ "content": "[LINK]("https://google.com")" }')
79
+ == '{"content": "[LINK](\\"https://google.com\\")"}'
80
+ )
81
+ assert repair_json('{ "content": "[LINK](" }') == '{"content": "[LINK]("}'
82
+ assert repair_json('{ "content": "[LINK](", "key": true }') == '{"content": "[LINK](", "key": true}'
83
+
84
+
85
+ def test_leading_trailing_characters():
86
+ assert repair_json('````{ "key": "value" }```') == '{"key": "value"}'
87
+ assert repair_json("""{ "a": "", "b": [ { "c": 1} ] \n}```""") == '{"a": "", "b": [{"c": 1}]}'
88
+ assert (
89
+ repair_json("Based on the information extracted, here is the filled JSON output: ```json { 'a': 'b' } ```")
90
+ == '{"a": "b"}'
91
+ )
92
+ assert (
93
+ repair_json("""
94
+ The next 64 elements are:
95
+ ```json
96
+ { "key": "value" }
97
+ ```""")
98
+ == '{"key": "value"}'
99
+ )
@@ -0,0 +1,67 @@
1
+ import io
2
+ import os
3
+ import tempfile
4
+ from unittest.mock import patch
5
+
6
+ import pytest
7
+
8
+ from src.json_repair.json_repair import cli
9
+
10
+
11
+ def test_cli(capsys):
12
+ # Create a temporary file
13
+ temp_fd, temp_path = tempfile.mkstemp(suffix=".json")
14
+ try:
15
+ # Write content to the temporary file
16
+ with os.fdopen(temp_fd, "w") as tmp:
17
+ tmp.write("{key:value")
18
+ cli(inline_args=[temp_path, "--indent", 0, "--ensure_ascii"])
19
+ captured = capsys.readouterr()
20
+ assert captured.out == '{\n"key": "value"\n}\n'
21
+
22
+ # Test the output option
23
+ tempout_fd, tempout_path = tempfile.mkstemp(suffix=".json")
24
+ cli(inline_args=[temp_path, "--indent", 0, "-o", tempout_path])
25
+ with open(tempout_path) as tmp:
26
+ out = tmp.read()
27
+ assert out == '{\n"key": "value"\n}'
28
+
29
+ # Test the inline option
30
+ cli(inline_args=[temp_path, "--indent", 0, "-i"])
31
+ with open(temp_path) as tmp:
32
+ out = tmp.read()
33
+ assert out == '{\n"key": "value"\n}'
34
+
35
+ finally:
36
+ # Clean up - delete the temporary file
37
+ os.remove(temp_path)
38
+ os.remove(tempout_path)
39
+
40
+ # Prepare a JSON string that needs to be repaired.
41
+ test_input = "{key:value"
42
+ # Expected output when running cli with --indent 0.
43
+ expected_output = '{\n"key": "value"\n}\n'
44
+ # Patch sys.stdin so that cli() reads from it instead of a file.
45
+ with patch("sys.stdin", new=io.StringIO(test_input)):
46
+ cli(inline_args=["--indent", 0])
47
+ captured = capsys.readouterr()
48
+ assert captured.out == expected_output
49
+
50
+
51
+ def test_cli_inline_requires_filename(capsys):
52
+ """cli() should exit with an error when --inline is passed without a filename."""
53
+ with pytest.raises(SystemExit) as exc:
54
+ cli(inline_args=["--inline"])
55
+ captured = capsys.readouterr()
56
+ assert captured.err.strip() == "Error: Inline mode requires a filename"
57
+ assert exc.value.code != 0
58
+
59
+
60
+ def test_cli_inline_and_output_error(tmp_path, capsys):
61
+ """cli() should exit with an error when --inline and --output are used together."""
62
+ outfile = tmp_path / "out.json"
63
+ with pytest.raises(SystemExit) as exc:
64
+ cli(inline_args=["dummy.json", "--inline", "--output", str(outfile)])
65
+ captured = capsys.readouterr()
66
+ assert captured.err.strip() == "Error: You cannot pass both --inline and --output"
67
+ assert exc.value.code != 0
@@ -1,406 +1,8 @@
1
- import io
2
1
  import os.path
3
2
  import pathlib
4
3
  import tempfile
5
- from unittest.mock import patch
6
4
 
7
- import pytest
8
-
9
- from src.json_repair.json_repair import cli, from_file, loads, repair_json
10
-
11
-
12
- def test_basic_types_valid():
13
- assert repair_json("True", return_objects=True) == ""
14
- assert repair_json("False", return_objects=True) == ""
15
- assert repair_json("Null", return_objects=True) == ""
16
- assert repair_json("1", return_objects=True) == 1
17
- assert repair_json("[]", return_objects=True) == []
18
- assert repair_json("[1, 2, 3, 4]", return_objects=True) == [1, 2, 3, 4]
19
- assert repair_json("{}", return_objects=True) == {}
20
- assert repair_json('{ "key": "value", "key2": 1, "key3": True }', return_objects=True) == {
21
- "key": "value",
22
- "key2": 1,
23
- "key3": True,
24
- }
25
-
26
-
27
- def test_basic_types_invalid():
28
- assert repair_json("true", return_objects=True)
29
- assert not repair_json("false", return_objects=True)
30
- assert repair_json("null", return_objects=True) is None
31
- assert repair_json("1.2", return_objects=True) == 1.2
32
- assert repair_json("[", return_objects=True) == []
33
- assert repair_json("[1, 2, 3, 4", return_objects=True) == [1, 2, 3, 4]
34
- assert repair_json("{", return_objects=True) == {}
35
- assert repair_json('{ "key": value, "key2": 1 "key3": null }', return_objects=True) == {
36
- "key": "value",
37
- "key2": 1,
38
- "key3": None,
39
- }
40
-
41
-
42
- def test_valid_json():
43
- assert (
44
- repair_json('{"name": "John", "age": 30, "city": "New York"}')
45
- == '{"name": "John", "age": 30, "city": "New York"}'
46
- )
47
- assert repair_json('{"employees":["John", "Anna", "Peter"]} ') == '{"employees": ["John", "Anna", "Peter"]}'
48
- assert repair_json('{"key": "value:value"}') == '{"key": "value:value"}'
49
- assert repair_json('{"text": "The quick brown fox,"}') == '{"text": "The quick brown fox,"}'
50
- assert repair_json('{"text": "The quick brown fox won\'t jump"}') == '{"text": "The quick brown fox won\'t jump"}'
51
- assert repair_json('{"key": ""') == '{"key": ""}'
52
- assert repair_json('{"key1": {"key2": [1, 2, 3]}}') == '{"key1": {"key2": [1, 2, 3]}}'
53
- assert repair_json('{"key": 12345678901234567890}') == '{"key": 12345678901234567890}'
54
- assert repair_json('{"key": "value\u263a"}') == '{"key": "value\\u263a"}'
55
- assert repair_json('{"key": "value\\nvalue"}') == '{"key": "value\\nvalue"}'
56
-
57
-
58
- def test_brackets_edge_cases():
59
- assert repair_json("[{]") == "[{}]"
60
- assert repair_json(" { } ") == "{}"
61
- assert repair_json("[") == "[]"
62
- assert repair_json("]") == ""
63
- assert repair_json("{") == "{}"
64
- assert repair_json("}") == ""
65
- assert repair_json('{"') == "{}"
66
- assert repair_json('["') == "[]"
67
- assert repair_json("{foo: [}") == '{"foo": []}'
68
-
69
-
70
- def test_general_edge_cases():
71
- assert repair_json('"') == ""
72
- assert repair_json("\n") == ""
73
- assert repair_json(" ") == ""
74
- assert repair_json("[[1\n\n]") == "[[1]]"
75
- assert repair_json("string") == ""
76
- assert repair_json("stringbeforeobject {}") == "{}"
77
- assert repair_json("/") == ""
78
-
79
-
80
- def test_mixed_data_types():
81
- assert repair_json(' {"key": true, "key2": false, "key3": null}') == '{"key": true, "key2": false, "key3": null}'
82
- assert repair_json('{"key": TRUE, "key2": FALSE, "key3": Null} ') == '{"key": true, "key2": false, "key3": null}'
83
-
84
-
85
- def test_missing_and_mixed_quotes():
86
- assert (
87
- repair_json("{'key': 'string', 'key2': false, \"key3\": null, \"key4\": unquoted}")
88
- == '{"key": "string", "key2": false, "key3": null, "key4": "unquoted"}'
89
- )
90
- assert (
91
- repair_json('{"name": "John", "age": 30, "city": "New York')
92
- == '{"name": "John", "age": 30, "city": "New York"}'
93
- )
94
- assert (
95
- repair_json('{"name": "John", "age": 30, city: "New York"}')
96
- == '{"name": "John", "age": 30, "city": "New York"}'
97
- )
98
- assert (
99
- repair_json('{"name": "John", "age": 30, "city": New York}')
100
- == '{"name": "John", "age": 30, "city": "New York"}'
101
- )
102
- assert (
103
- repair_json('{"name": John, "age": 30, "city": "New York"}')
104
- == '{"name": "John", "age": 30, "city": "New York"}'
105
- )
106
- assert repair_json('{“slanted_delimiter”: "value"}') == '{"slanted_delimiter": "value"}'
107
- assert repair_json('{"name": "John", "age": 30, "city": "New') == '{"name": "John", "age": 30, "city": "New"}'
108
- assert (
109
- repair_json('{"name": "John", "age": 30, "city": "New York, "gender": "male"}')
110
- == '{"name": "John", "age": 30, "city": "New York", "gender": "male"}'
111
- )
112
-
113
- assert (
114
- repair_json('[{"key": "value", COMMENT "notes": "lorem "ipsum", sic." }]')
115
- == '[{"key": "value", "notes": "lorem \\"ipsum\\", sic."}]'
116
- )
117
- assert repair_json('{"key": ""value"}') == '{"key": "value"}'
118
- assert repair_json('{"key": "value", 5: "value"}') == '{"key": "value", "5": "value"}'
119
- assert repair_json('{"foo": "\\"bar\\""') == '{"foo": "\\"bar\\""}'
120
- assert repair_json('{"" key":"val"') == '{" key": "val"}'
121
- assert repair_json('{"key": value "key2" : "value2" ') == '{"key": "value", "key2": "value2"}'
122
- assert (
123
- repair_json('{"key": "lorem ipsum ... "sic " tamet. ...}') == '{"key": "lorem ipsum ... \\"sic \\" tamet. ..."}'
124
- )
125
- assert repair_json('{"key": value , }') == '{"key": "value"}'
126
- assert (
127
- repair_json('{"comment": "lorem, "ipsum" sic "tamet". To improve"}')
128
- == '{"comment": "lorem, \\"ipsum\\" sic \\"tamet\\". To improve"}'
129
- )
130
- assert repair_json('{"key": "v"alu"e"} key:') == '{"key": "v\\"alu\\"e"}'
131
-
132
-
133
- def test_array_edge_cases():
134
- assert repair_json("[1, 2, 3,") == "[1, 2, 3]"
135
- assert repair_json("[1, 2, 3, ...]") == "[1, 2, 3]"
136
- assert repair_json("[1, 2, ... , 3]") == "[1, 2, 3]"
137
- assert repair_json("[1, 2, '...', 3]") == '[1, 2, "...", 3]'
138
- assert repair_json("[true, false, null, ...]") == "[true, false, null]"
139
- assert repair_json('["a" "b" "c" 1') == '["a", "b", "c", 1]'
140
- assert repair_json('{"employees":["John", "Anna",') == '{"employees": ["John", "Anna"]}'
141
- assert repair_json('{"employees":["John", "Anna", "Peter') == '{"employees": ["John", "Anna", "Peter"]}'
142
- assert repair_json('{"key1": {"key2": [1, 2, 3') == '{"key1": {"key2": [1, 2, 3]}}'
143
- assert repair_json('{"key": ["value]}') == '{"key": ["value"]}'
144
- assert repair_json('["lorem "ipsum" sic"]') == '["lorem \\"ipsum\\" sic"]'
145
- assert (
146
- repair_json('{"key1": ["value1", "value2"}, "key2": ["value3", "value4"]}')
147
- == '{"key1": ["value1", "value2"], "key2": ["value3", "value4"]}'
148
- )
149
- assert repair_json('{"key": ["value" "value1" "value2"]}') == '{"key": ["value", "value1", "value2"]}'
150
- assert (
151
- repair_json('{"key": ["lorem "ipsum" dolor "sit" amet, "consectetur" ", "lorem "ipsum" dolor", "lorem"]}')
152
- == '{"key": ["lorem \\"ipsum\\" dolor \\"sit\\" amet, \\"consectetur\\" ", "lorem \\"ipsum\\" dolor", "lorem"]}'
153
- )
154
- assert repair_json('{"k"e"y": "value"}') == '{"k\\"e\\"y": "value"}'
155
- assert repair_json('["key":"value"}]') == '[{"key": "value"}]'
156
-
157
-
158
- def test_escaping():
159
- assert repair_json("'\"'") == ""
160
- assert repair_json('{"key": \'string"\n\t\\le\'') == '{"key": "string\\"\\n\\t\\\\le"}'
161
- assert (
162
- repair_json(
163
- r'{"real_content": "Some string: Some other string \t Some string <a href=\"https://domain.com\">Some link</a>"'
164
- )
165
- == r'{"real_content": "Some string: Some other string \t Some string <a href=\"https://domain.com\">Some link</a>"}'
166
- )
167
- assert repair_json('{"key_1\n": "value"}') == '{"key_1": "value"}'
168
- assert repair_json('{"key\t_": "value"}') == '{"key\\t_": "value"}'
169
- assert repair_json("{\"key\": '\u0076\u0061\u006c\u0075\u0065'}") == '{"key": "value"}'
170
- assert repair_json('{"key": "\\u0076\\u0061\\u006C\\u0075\\u0065"}', skip_json_loads=True) == '{"key": "value"}'
171
-
172
-
173
- def test_object_edge_cases():
174
- assert repair_json("{ ") == "{}"
175
- assert repair_json('{"": "value"') == '{"": "value"}'
176
- assert repair_json('{"value_1": true, COMMENT "value_2": "data"}') == '{"value_1": true, "value_2": "data"}'
177
- assert (
178
- repair_json('{"value_1": true, SHOULD_NOT_EXIST "value_2": "data" AAAA }')
179
- == '{"value_1": true, "value_2": "data"}'
180
- )
181
- assert repair_json('{"" : true, "key2": "value2"}') == '{"": true, "key2": "value2"}'
182
- assert (
183
- repair_json("""{""answer"":[{""traits"":''Female aged 60+'',""answer1"":""5""}]}""")
184
- == '{"answer": [{"traits": "Female aged 60+", "answer1": "5"}]}'
185
- )
186
- assert (
187
- repair_json('{ "words": abcdef", "numbers": 12345", "words2": ghijkl" }')
188
- == '{"words": "abcdef", "numbers": 12345, "words2": "ghijkl"}'
189
- )
190
- assert (
191
- repair_json("""{"number": 1,"reason": "According...""ans": "YES"}""")
192
- == '{"number": 1, "reason": "According...", "ans": "YES"}'
193
- )
194
- assert repair_json("""{ "a" : "{ b": {} }" }""") == '{"a": "{ b"}'
195
- assert repair_json("""{"b": "xxxxx" true}""") == '{"b": "xxxxx"}'
196
- assert repair_json('{"key": "Lorem "ipsum" s,"}') == '{"key": "Lorem \\"ipsum\\" s,"}'
197
- assert repair_json('{"lorem": ipsum, sic, datum.",}') == '{"lorem": "ipsum, sic, datum."}'
198
- assert (
199
- repair_json('{"lorem": sic tamet. "ipsum": sic tamet, quick brown fox. "sic": ipsum}')
200
- == '{"lorem": "sic tamet.", "ipsum": "sic tamet", "sic": "ipsum"}'
201
- )
202
- assert (
203
- repair_json('{"lorem_ipsum": "sic tamet, quick brown fox. }')
204
- == '{"lorem_ipsum": "sic tamet, quick brown fox."}'
205
- )
206
- assert repair_json('{"key":value, " key2":"value2" }') == '{"key": "value", " key2": "value2"}'
207
- assert repair_json('{"key":value "key2":"value2" }') == '{"key": "value", "key2": "value2"}'
208
- assert (
209
- repair_json("{'text': 'words{words in brackets}more words'}")
210
- == '{"text": "words{words in brackets}more words"}'
211
- )
212
- assert repair_json("{text:words{words in brackets}}") == '{"text": "words{words in brackets}"}'
213
- assert repair_json("{text:words{words in brackets}m}") == '{"text": "words{words in brackets}m"}'
214
- assert repair_json('{"key": "value, value2"```') == '{"key": "value, value2"}'
215
- assert repair_json("{key:value,key2:value2}") == '{"key": "value", "key2": "value2"}'
216
- assert repair_json('{"key:"value"}') == '{"key": "value"}'
217
- assert repair_json('{"key:value}') == '{"key": "value"}'
218
- assert (
219
- repair_json('[{"lorem": {"ipsum": "sic"}, """" "lorem": {"ipsum": "sic"}]')
220
- == '[{"lorem": {"ipsum": "sic"}}, {"lorem": {"ipsum": "sic"}}]'
221
- )
222
- assert (
223
- repair_json('{ "key": ["arrayvalue"], ["arrayvalue1"], ["arrayvalue2"], "key3": "value3" }')
224
- == '{"key": ["arrayvalue", "arrayvalue1", "arrayvalue2"], "key3": "value3"}'
225
- )
226
- assert (
227
- repair_json('{ "key": ["arrayvalue"], "key3": "value3", ["arrayvalue1"] }')
228
- == '{"key": ["arrayvalue"], "key3": "value3", "arrayvalue1": ""}'
229
- )
230
- assert (
231
- repair_json('{"key": "{\\\\"key\\\\\\":[\\"value\\\\\\"],\\"key2\\":"value2"}"}')
232
- == '{"key": "{\\"key\\":[\\"value\\"],\\"key2\\":\\"value2\\"}"}'
233
- )
234
- assert repair_json('{"key": , "key2": "value2"}') == '{"key": "", "key2": "value2"}'
235
-
236
-
237
- def test_number_edge_cases():
238
- assert (
239
- repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'
240
- )
241
- assert repair_json('{"key": 1/3}') == '{"key": "1/3"}'
242
- assert repair_json('{"key": .25}') == '{"key": 0.25}'
243
- assert repair_json('{"here": "now", "key": 1/3, "foo": "bar"}') == '{"here": "now", "key": "1/3", "foo": "bar"}'
244
- assert repair_json('{"key": 12345/67890}') == '{"key": "12345/67890"}'
245
- assert repair_json("[105,12") == "[105, 12]"
246
- assert repair_json('{"key", 105,12,') == '{"key": "105,12"}'
247
- assert repair_json('{"key": 1/3, "foo": "bar"}') == '{"key": "1/3", "foo": "bar"}'
248
- assert repair_json('{"key": 10-20}') == '{"key": "10-20"}'
249
- assert repair_json('{"key": 1.1.1}') == '{"key": "1.1.1"}'
250
- assert repair_json("[- ") == "[]"
251
- assert repair_json('{"key": 1. }') == '{"key": 1.0}'
252
- assert repair_json('{"key": 1e10 }') == '{"key": 10000000000.0}'
253
- assert repair_json('{"key": 1e }') == '{"key": 1}'
254
- assert repair_json('{"key": 1notanumber }') == '{"key": "1notanumber"}'
255
- assert repair_json("[1, 2notanumber]") == '[1, "2notanumber"]'
256
-
257
-
258
- def test_markdown():
259
- assert (
260
- repair_json('{ "content": "[LINK]("https://google.com")" }')
261
- == '{"content": "[LINK](\\"https://google.com\\")"}'
262
- )
263
- assert repair_json('{ "content": "[LINK](" }') == '{"content": "[LINK]("}'
264
- assert repair_json('{ "content": "[LINK](", "key": true }') == '{"content": "[LINK](", "key": true}'
265
-
266
-
267
- def test_leading_trailing_characters():
268
- assert repair_json('````{ "key": "value" }```') == '{"key": "value"}'
269
- assert repair_json("""{ "a": "", "b": [ { "c": 1} ] \n}```""") == '{"a": "", "b": [{"c": 1}]}'
270
- assert (
271
- repair_json("Based on the information extracted, here is the filled JSON output: ```json { 'a': 'b' } ```")
272
- == '{"a": "b"}'
273
- )
274
- assert (
275
- repair_json("""
276
- The next 64 elements are:
277
- ```json
278
- { "key": "value" }
279
- ```""")
280
- == '{"key": "value"}'
281
- )
282
-
283
-
284
- def test_multiple_jsons():
285
- assert repair_json("[]{}") == "[[], {}]"
286
- assert repair_json("{}[]{}") == "[{}, [], {}]"
287
- assert repair_json('{"key":"value"}[1,2,3,True]') == '[{"key": "value"}, [1, 2, 3, true]]'
288
- assert (
289
- repair_json('lorem ```json {"key":"value"} ``` ipsum ```json [1,2,3,True] ``` 42')
290
- == '[{"key": "value"}, [1, 2, 3, true]]'
291
- )
292
- assert repair_json('[{"key":"value"}][{"key":"value_after"}]') == '[{"key": "value_after"}]'
293
-
294
-
295
- def test_repair_json_with_objects():
296
- # Test with valid JSON strings
297
- assert repair_json("[]", return_objects=True) == []
298
- assert repair_json("{}", return_objects=True) == {}
299
- assert repair_json('{"key": true, "key2": false, "key3": null}', return_objects=True) == {
300
- "key": True,
301
- "key2": False,
302
- "key3": None,
303
- }
304
- assert repair_json('{"name": "John", "age": 30, "city": "New York"}', return_objects=True) == {
305
- "name": "John",
306
- "age": 30,
307
- "city": "New York",
308
- }
309
- assert repair_json("[1, 2, 3, 4]", return_objects=True) == [1, 2, 3, 4]
310
- assert repair_json('{"employees":["John", "Anna", "Peter"]} ', return_objects=True) == {
311
- "employees": ["John", "Anna", "Peter"]
312
- }
313
- assert repair_json(
314
- """
315
- {
316
- "resourceType": "Bundle",
317
- "id": "1",
318
- "type": "collection",
319
- "entry": [
320
- {
321
- "resource": {
322
- "resourceType": "Patient",
323
- "id": "1",
324
- "name": [
325
- {"use": "official", "family": "Corwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."},
326
- {"use": "maiden", "family": "Goodwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."]}
327
- ]
328
- }
329
- }
330
- ]
331
- }
332
- """,
333
- return_objects=True,
334
- ) == {
335
- "resourceType": "Bundle",
336
- "id": "1",
337
- "type": "collection",
338
- "entry": [
339
- {
340
- "resource": {
341
- "resourceType": "Patient",
342
- "id": "1",
343
- "name": [
344
- {
345
- "use": "official",
346
- "family": "Corwin",
347
- "given": ["Keisha", "Sunny"],
348
- "prefix": ["Mrs."],
349
- },
350
- {
351
- "use": "maiden",
352
- "family": "Goodwin",
353
- "given": ["Keisha", "Sunny"],
354
- "prefix": ["Mrs."],
355
- },
356
- ],
357
- }
358
- }
359
- ],
360
- }
361
- assert repair_json(
362
- '{\n"html": "<h3 id="aaa">Waarom meer dan 200 Technical Experts - "Passie voor techniek"?</h3>"}',
363
- return_objects=True,
364
- ) == {"html": '<h3 id="aaa">Waarom meer dan 200 Technical Experts - "Passie voor techniek"?</h3>'}
365
- assert repair_json(
366
- """
367
- [
368
- {
369
- "foo": "Foo bar baz",
370
- "tag": "#foo-bar-baz"
371
- },
372
- {
373
- "foo": "foo bar "foobar" foo bar baz.",
374
- "tag": "#foo-bar-foobar"
375
- }
376
- ]
377
- """,
378
- return_objects=True,
379
- ) == [
380
- {"foo": "Foo bar baz", "tag": "#foo-bar-baz"},
381
- {"foo": 'foo bar "foobar" foo bar baz.', "tag": "#foo-bar-foobar"},
382
- ]
383
-
384
-
385
- def test_repair_json_skip_json_loads():
386
- assert (
387
- repair_json('{"key": true, "key2": false, "key3": null}', skip_json_loads=True)
388
- == '{"key": true, "key2": false, "key3": null}'
389
- )
390
- assert repair_json(
391
- '{"key": true, "key2": false, "key3": null}',
392
- return_objects=True,
393
- skip_json_loads=True,
394
- ) == {"key": True, "key2": False, "key3": None}
395
- assert (
396
- repair_json('{"key": true, "key2": false, "key3": }', skip_json_loads=True)
397
- == '{"key": true, "key2": false, "key3": ""}'
398
- )
399
- assert loads('{"key": true, "key2": false, "key3": }', skip_json_loads=True) == {
400
- "key": True,
401
- "key2": False,
402
- "key3": "",
403
- }
5
+ from src.json_repair.json_repair import from_file
404
6
 
405
7
 
406
8
  def test_repair_json_from_file():
@@ -891,83 +493,3 @@ def test_repair_json_from_file():
891
493
  finally:
892
494
  # Clean up - delete the temporary file
893
495
  os.remove(temp_path)
894
-
895
-
896
- def test_ensure_ascii():
897
- assert repair_json("{'test_中国人_ascii':'统一码'}", ensure_ascii=False) == '{"test_中国人_ascii": "统一码"}'
898
-
899
-
900
- def test_stream_stable():
901
- # default: stream_stable = False
902
- # When the json to be repaired is the accumulation of streaming json at a certain moment.
903
- # The default repair result is unstable.
904
- assert repair_json('{"key": "val\\', stream_stable=False) == '{"key": "val\\\\"}'
905
- assert repair_json('{"key": "val\\n', stream_stable=False) == '{"key": "val"}'
906
- assert (
907
- repair_json('{"key": "val\\n123,`key2:value2', stream_stable=False) == '{"key": "val\\n123", "key2": "value2"}'
908
- )
909
- assert repair_json('{"key": "val\\n123,`key2:value2`"}', stream_stable=True) == '{"key": "val\\n123,`key2:value2`"}'
910
- # stream_stable = True
911
- assert repair_json('{"key": "val\\', stream_stable=True) == '{"key": "val"}'
912
- assert repair_json('{"key": "val\\n', stream_stable=True) == '{"key": "val\\n"}'
913
- assert repair_json('{"key": "val\\n123,`key2:value2', stream_stable=True) == '{"key": "val\\n123,`key2:value2"}'
914
- assert repair_json('{"key": "val\\n123,`key2:value2`"}', stream_stable=True) == '{"key": "val\\n123,`key2:value2`"}'
915
-
916
-
917
- def test_cli(capsys):
918
- # Create a temporary file
919
- temp_fd, temp_path = tempfile.mkstemp(suffix=".json")
920
- try:
921
- # Write content to the temporary file
922
- with os.fdopen(temp_fd, "w") as tmp:
923
- tmp.write("{key:value")
924
- cli(inline_args=[temp_path, "--indent", 0, "--ensure_ascii"])
925
- captured = capsys.readouterr()
926
- assert captured.out == '{\n"key": "value"\n}\n'
927
-
928
- # Test the output option
929
- tempout_fd, tempout_path = tempfile.mkstemp(suffix=".json")
930
- cli(inline_args=[temp_path, "--indent", 0, "-o", tempout_path])
931
- with open(tempout_path) as tmp:
932
- out = tmp.read()
933
- assert out == '{\n"key": "value"\n}'
934
-
935
- # Test the inline option
936
- cli(inline_args=[temp_path, "--indent", 0, "-i"])
937
- with open(temp_path) as tmp:
938
- out = tmp.read()
939
- assert out == '{\n"key": "value"\n}'
940
-
941
- finally:
942
- # Clean up - delete the temporary file
943
- os.remove(temp_path)
944
- os.remove(tempout_path)
945
-
946
- # Prepare a JSON string that needs to be repaired.
947
- test_input = "{key:value"
948
- # Expected output when running cli with --indent 0.
949
- expected_output = '{\n"key": "value"\n}\n'
950
- # Patch sys.stdin so that cli() reads from it instead of a file.
951
- with patch("sys.stdin", new=io.StringIO(test_input)):
952
- cli(inline_args=["--indent", 0])
953
- captured = capsys.readouterr()
954
- assert captured.out == expected_output
955
-
956
-
957
- def test_cli_inline_requires_filename(capsys):
958
- """cli() should exit with an error when --inline is passed without a filename."""
959
- with pytest.raises(SystemExit) as exc:
960
- cli(inline_args=["--inline"])
961
- captured = capsys.readouterr()
962
- assert captured.err.strip() == "Error: Inline mode requires a filename"
963
- assert exc.value.code != 0
964
-
965
-
966
- def test_cli_inline_and_output_error(tmp_path, capsys):
967
- """cli() should exit with an error when --inline and --output are used together."""
968
- outfile = tmp_path / "out.json"
969
- with pytest.raises(SystemExit) as exc:
970
- cli(inline_args=["dummy.json", "--inline", "--output", str(outfile)])
971
- captured = capsys.readouterr()
972
- assert captured.err.strip() == "Error: You cannot pass both --inline and --output"
973
- assert exc.value.code != 0
File without changes
File without changes
File without changes