json-repair 0.17.0__tar.gz → 0.17.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.17.0
3
+ Version: 0.17.2
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -39,6 +39,12 @@ This simple package can be used to fix an invalid json string. To know all cases
39
39
 
40
40
  Inspired by https://github.com/josdejong/jsonrepair
41
41
 
42
+ ---
43
+ # Offer me a beer
44
+ If you find this library useful, you can help me by donating toward my monthly beer budget here: https://github.com/sponsors/mangiucugna
45
+
46
+ ---
47
+
42
48
  # Motivation
43
49
  Some LLMs are a bit iffy when it comes to returning well formed JSON data, sometimes they skip a parentheses and sometimes they add some words in it, because that's what an LLM does.
44
50
  Luckily, the mistakes LLMs make are simple enough to be fixed without destroying the content.
@@ -154,10 +160,6 @@ You will need owner access to this repository
154
160
  # Repair JSON in other programming languages
155
161
  - Typescript: https://github.com/josdejong/jsonrepair
156
162
  - Go: https://github.com/RealAlexandreAI/json-repair
157
- ---
158
- # Bonus Content
159
- If you need some good Custom Instructions (System Message) to improve your chatbot responses try https://gist.github.com/mangiucugna/7ec015c4266df11be8aa510be0110fe4
160
-
161
163
  ---
162
164
  ## Star History
163
165
 
@@ -2,6 +2,12 @@ This simple package can be used to fix an invalid json string. To know all cases
2
2
 
3
3
  Inspired by https://github.com/josdejong/jsonrepair
4
4
 
5
+ ---
6
+ # Offer me a beer
7
+ If you find this library useful, you can help me by donating toward my monthly beer budget here: https://github.com/sponsors/mangiucugna
8
+
9
+ ---
10
+
5
11
  # Motivation
6
12
  Some LLMs are a bit iffy when it comes to returning well formed JSON data, sometimes they skip a parentheses and sometimes they add some words in it, because that's what an LLM does.
7
13
  Luckily, the mistakes LLMs make are simple enough to be fixed without destroying the content.
@@ -117,10 +123,6 @@ You will need owner access to this repository
117
123
  # Repair JSON in other programming languages
118
124
  - Typescript: https://github.com/josdejong/jsonrepair
119
125
  - Go: https://github.com/RealAlexandreAI/json-repair
120
- ---
121
- # Bonus Content
122
- If you need some good Custom Instructions (System Message) to improve your chatbot responses try https://gist.github.com/mangiucugna/7ec015c4266df11be8aa510be0110fe4
123
-
124
126
  ---
125
127
  ## Star History
126
128
 
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
3
3
  build-backend = "setuptools.build_meta"
4
4
  [project]
5
5
  name = "json_repair"
6
- version = "0.17.0"
6
+ version = "0.17.2"
7
7
  license = {file = "LICENSE"}
8
8
  authors = [
9
9
  { name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
@@ -227,6 +227,10 @@ class JSONParser:
227
227
  self.index += 1
228
228
  char = self.get_char_at()
229
229
 
230
+ if not char:
231
+ # This is an empty string
232
+ return ""
233
+
230
234
  # Ensuring we use the right delimiter
231
235
  if char == "'":
232
236
  lstring_delimiter = rstring_delimiter = "'"
@@ -298,16 +302,6 @@ class JSONParser:
298
302
  string_acc += char
299
303
  self.index += 1
300
304
  char = self.get_char_at()
301
- # If the string contains an escaped character we should respect that or remove the escape
302
- if self.get_char_at(-1) == "\\":
303
- if char in [rstring_delimiter, "t", "n", "r", "b", "\\"]:
304
- string_acc += char
305
- self.index += 1
306
- char = self.get_char_at()
307
- else:
308
- # Remove this character from the final output
309
- string_acc = string_acc[:-2] + string_acc[-1:]
310
- self.index -= 1
311
305
  # ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
312
306
  if char == rstring_delimiter:
313
307
  # Special case here, in case of double quotes one after another
@@ -431,28 +425,40 @@ class JSONParser:
431
425
  return ""
432
426
 
433
427
  def get_char_at(self, count: int = 0) -> Union[str, bool]:
434
- if self.json_fd:
435
- self.json_fd.seek(self.index + count)
436
- char = self.json_fd.read(1)
437
- if char == "":
438
- return False
439
- return char
440
- else:
441
- # Why not use something simpler? Because we might be out of bounds and doing this check all the time is annoying
442
- try:
443
- return self.json_str[self.index + count]
444
- except IndexError:
428
+ # Why not use something simpler? Because try/except in python is a faster alternative to an "if" statement that is often True
429
+ try:
430
+ return self.json_str[self.index + count]
431
+ except IndexError:
432
+ if self.json_fd:
433
+ self.json_fd.seek(self.index + count)
434
+ char = self.json_fd.read(1)
435
+ if char == "":
436
+ return False
437
+ return char
438
+ else:
445
439
  return False
446
440
 
447
441
  def skip_whitespaces_at(self) -> None:
448
442
  """
449
443
  This function quickly iterates on whitespaces, syntactic sugar to make the code more concise
450
444
  """
451
-
452
- char = self.get_char_at()
453
- while char and char.isspace():
454
- self.index += 1
445
+ if self.json_fd:
455
446
  char = self.get_char_at()
447
+ while char and char.isspace():
448
+ self.index += 1
449
+ char = self.get_char_at()
450
+ else:
451
+ # If this is not a file stream, we do this monster here to make this function much much faster
452
+ try:
453
+ char = self.json_str[self.index]
454
+ except IndexError:
455
+ return
456
+ while char.isspace():
457
+ self.index += 1
458
+ try:
459
+ char = self.json_str[self.index]
460
+ except IndexError:
461
+ return
456
462
 
457
463
  def set_context(self, value: str) -> None:
458
464
  # If a value is provided update the context variable and save in stack
@@ -479,11 +485,17 @@ class JSONParser:
479
485
  context = self.json_fd.read(self.logger["window"] * 2)
480
486
  self.json_fd.seek(self.index)
481
487
  else:
482
- context = self.json_str[
483
- self.index
484
- - self.logger["window"] : self.index
485
- + self.logger["window"]
486
- ]
488
+ start = (
489
+ self.index - self.logger["window"]
490
+ if (self.index - self.logger["window"]) >= 0
491
+ else 0
492
+ )
493
+ end = (
494
+ self.index + self.logger["window"]
495
+ if (self.index + self.logger["window"]) <= len(self.json_str)
496
+ else len(self.json_str)
497
+ )
498
+ context = self.json_str[start:end]
487
499
  self.logger["log"].append(
488
500
  {
489
501
  "text": text,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.17.0
3
+ Version: 0.17.2
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -39,6 +39,12 @@ This simple package can be used to fix an invalid json string. To know all cases
39
39
 
40
40
  Inspired by https://github.com/josdejong/jsonrepair
41
41
 
42
+ ---
43
+ # Offer me a beer
44
+ If you find this library useful, you can help me by donating toward my monthly beer budget here: https://github.com/sponsors/mangiucugna
45
+
46
+ ---
47
+
42
48
  # Motivation
43
49
  Some LLMs are a bit iffy when it comes to returning well formed JSON data, sometimes they skip a parentheses and sometimes they add some words in it, because that's what an LLM does.
44
50
  Luckily, the mistakes LLMs make are simple enough to be fixed without destroying the content.
@@ -154,10 +160,6 @@ You will need owner access to this repository
154
160
  # Repair JSON in other programming languages
155
161
  - Typescript: https://github.com/josdejong/jsonrepair
156
162
  - Go: https://github.com/RealAlexandreAI/json-repair
157
- ---
158
- # Bonus Content
159
- If you need some good Custom Instructions (System Message) to improve your chatbot responses try https://gist.github.com/mangiucugna/7ec015c4266df11be8aa510be0110fe4
160
-
161
163
  ---
162
164
  ## Star History
163
165
 
@@ -0,0 +1,295 @@
1
+ from src.json_repair.json_repair import from_file, repair_json, loads
2
+
3
+
4
+ def test_repair_json():
5
+ # Test with valid JSON strings
6
+ assert repair_json("[]") == "[]"
7
+ assert repair_json("[{]") == "[]"
8
+ assert repair_json(" { } ") == "{}"
9
+ assert repair_json("\"") == '""'
10
+ assert repair_json("\n") == '""'
11
+ assert repair_json(' {"key": true, "key2": false, "key3": null}') == '{"key": true, "key2": false, "key3": null}'
12
+ assert repair_json('{"key": TRUE, "key2": FALSE, "key3": Null} ') == '{"key": true, "key2": false, "key3": null}'
13
+ assert repair_json("{'key': 'string', 'key2': false, \"key3\": null, \"key4\": unquoted}") == '{"key": "string", "key2": false, "key3": null, "key4": "unquoted"}'
14
+ assert (
15
+ repair_json('{"name": "John", "age": 30, "city": "New York"}')
16
+ == '{"name": "John", "age": 30, "city": "New York"}'
17
+ )
18
+ assert repair_json("[1, 2, 3, 4]") == "[1, 2, 3, 4]"
19
+ assert (
20
+ repair_json('{"employees":["John", "Anna", "Peter"]} ')
21
+ == '{"employees": ["John", "Anna", "Peter"]}'
22
+ )
23
+
24
+ # Test with invalid JSON strings
25
+ assert (
26
+ repair_json('{"name": "John", "age": 30, "city": "New York')
27
+ == '{"name": "John", "age": 30, "city": "New York"}'
28
+ )
29
+ assert (
30
+ repair_json('{"name": "John", "age": 30, city: "New York"}')
31
+ == '{"name": "John", "age": 30, "city": "New York"}'
32
+ )
33
+ assert (
34
+ repair_json('{"name": "John", "age": 30, "city": New York}')
35
+ == '{"name": "John", "age": 30, "city": "New York"}'
36
+ )
37
+ assert (
38
+ repair_json('{"name": John, "age": 30, "city": "New York"}')
39
+ == '{"name": "John", "age": 30, "city": "New York"}'
40
+ )
41
+ assert repair_json("[1, 2, 3,") == "[1, 2, 3]"
42
+ assert (
43
+ repair_json('{"employees":["John", "Anna",')
44
+ == '{"employees": ["John", "Anna"]}'
45
+ )
46
+
47
+ # Test with edge cases
48
+ assert repair_json(" ") == '""'
49
+ assert repair_json("[") == "[]"
50
+ assert repair_json("]") == '""'
51
+ assert repair_json("[[1\n\n]") == "[[1]]"
52
+ assert repair_json("{") == "{}"
53
+ assert repair_json("}") == '""'
54
+ assert repair_json("string") == '""'
55
+ assert repair_json("stringbeforeobject {}") == '{}'
56
+ assert repair_json('{"') == '{}'
57
+ assert repair_json('["') == '[]'
58
+ assert repair_json("'\"'") == '"\\\""'
59
+ assert repair_json("'string\"\n\t\le") == '"string\\\"\\n\\t\\\\le"'
60
+ assert repair_json('{foo: [}') == '{"foo": []}'
61
+ assert repair_json('''{ "a": "{ b": {} }" }''') == '{"a": "{ b"}'
62
+ assert repair_json('{"key": "value:value"}') == '{"key": "value:value"}'
63
+ assert repair_json('{“slanted_delimiter”: "value"}') == '{"slanted_delimiter": "value"}'
64
+ assert (
65
+ repair_json('{"name": "John", "age": 30, "city": "New')
66
+ == '{"name": "John", "age": 30, "city": "New"}'
67
+ )
68
+ assert (
69
+ repair_json('{"employees":["John", "Anna", "Peter')
70
+ == '{"employees": ["John", "Anna", "Peter"]}'
71
+ )
72
+ assert (
73
+ repair_json('{"employees":["John", "Anna", "Peter"]}')
74
+ == '{"employees": ["John", "Anna", "Peter"]}'
75
+ )
76
+ assert (
77
+ repair_json('{"text": "The quick brown fox,"}')
78
+ == '{"text": "The quick brown fox,"}'
79
+ )
80
+ assert (
81
+ repair_json('{"text": "The quick brown fox won\'t jump"}')
82
+ == '{"text": "The quick brown fox won\'t jump"}'
83
+ )
84
+ assert {
85
+ repair_json('{"value_1": "value_2": "data"}') == '{"value_1": "value_2", "data": ""}'
86
+ }
87
+ assert {
88
+ repair_json('{"value_1": true, COMMENT "value_2": "data"}') == '{"value_1": "value_2", "": "data"}'
89
+ }
90
+ # Test with garbage comments
91
+ assert repair_json('{"value_1": true, SHOULD_NOT_EXIST "value_2": "data" AAAA }') == '{"value_1": true, "value_2": "data"}'
92
+ assert {
93
+ repair_json('{"" : true, "key2": "value2"}') == '{" ": true, "key2": "value_2"}'
94
+ }
95
+ assert {
96
+ repair_json('{"": true, "key2": "value2"}') == '{"empty_placeholder": true, "key2": "value_2"}'
97
+ }
98
+ # Test a nasty corner case
99
+ assert repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'
100
+
101
+ #Test markdown stupidities from ChatGPT
102
+ assert repair_json('{ "content": "[LINK]("https://google.com")" }') == '{"content": "[LINK](\\"https://google.com\\")"}'
103
+ assert repair_json('{ "content": "[LINK](" }') == '{"content": "[LINK]("}'
104
+ assert repair_json('{ "content": "[LINK](", "key": true }') == '{"content": "[LINK](", "key": true}'
105
+ assert repair_json("""
106
+ ```json
107
+ { "key": "value" }
108
+ ```""") == '{"key": "value"}'
109
+ assert repair_json('````{ "key": "value" }```') == '{"key": "value"}'
110
+ assert repair_json(r'{"real_content": "Some string: Some other string Some string <a href=\"https://domain.com\">Some link</a>"') == r'{"real_content": "Some string: Some other string Some string <a href=\\\"https://domain.com\\\">Some link</a>"}'
111
+ assert repair_json('{"key_1\n": "value"}') == '{"key_1": "value"}'
112
+ assert repair_json('{"key\t_": "value"}') == '{"key\\t_": "value"}'
113
+ assert repair_json('{""answer"":[{""traits"":''Female aged 60+'',""answer1"":""5""}]}') == '{"answer": [{"traits": "Female aged 60+", "answer1": "5"}]}'
114
+ assert repair_json('{"key":""') == '{"key": ""}'
115
+ assert repair_json('{ "words": abcdef", "numbers": 12345", "words2": ghijkl" }') == '{"words": "abcdef", "numbers": 12345, "words2": "ghijkl"}'
116
+ assert repair_json('{"key": 1/3}') == '{"key": "1/3"}'
117
+ assert repair_json('{"key": .25}') == '{"key": 0.25}'
118
+ assert repair_json("""{ "a": "", "b": [ { "c": 1} ] \n}```""") == '{"a": "", "b": [{"c": 1}]}'
119
+ assert repair_json("Based on the information extracted, here is the filled JSON output: ```json { 'a': 'b' } ```") == '{"a": "b"}'
120
+ assert repair_json('''{"number": 1,"reason": "According...""ans": "YES"}''') == '{"number": 1, "reason": "According...", "ans": "YES"}'
121
+ assert repair_json('{"key": 1/3, "foo": "bar"}') == '{"key": "1/3", "foo": "bar"}'
122
+ assert repair_json('{"here": "now", "key": 1/3, "foo": "bar"}') == '{"here": "now", "key": "1/3", "foo": "bar"}'
123
+ assert repair_json('{"key": 12345/67890}') == '{"key": "12345/67890"}'
124
+
125
+
126
+ def test_repair_json_with_objects():
127
+ # Test with valid JSON strings
128
+ assert repair_json("[]", return_objects=True) == []
129
+ assert repair_json("{}", return_objects=True) == {}
130
+ assert repair_json('{"key": true, "key2": false, "key3": null}', return_objects=True) == {"key": True, "key2": False, "key3": None}
131
+ assert repair_json('{"name": "John", "age": 30, "city": "New York"}', return_objects=True) == {
132
+ "name": "John",
133
+ "age": 30,
134
+ "city": "New York",
135
+ }
136
+ assert repair_json("[1, 2, 3, 4]", return_objects=True) == [1, 2, 3, 4]
137
+ assert repair_json('{"employees":["John", "Anna", "Peter"]} ', return_objects=True) == {
138
+ "employees": ["John", "Anna", "Peter"]
139
+ }
140
+
141
+ # Test with invalid JSON strings
142
+ assert repair_json('{"name": "John", "age": 30, "city": "New York', return_objects=True) == {
143
+ "name": "John",
144
+ "age": 30,
145
+ "city": "New York",
146
+ }
147
+ assert repair_json('{"name": "John", "age": 30, city: "New York"}', return_objects=True) == {
148
+ "name": "John",
149
+ "age": 30,
150
+ "city": "New York",
151
+ }
152
+ assert repair_json('{"name": "John", "age": 30, "city": New York}', return_objects=True) == {
153
+ "name": "John",
154
+ "age": 30,
155
+ "city": "New York",
156
+ }
157
+ assert repair_json("[1, 2, 3,", return_objects=True) == [1, 2, 3]
158
+ assert repair_json('{"employees":["John", "Anna",', return_objects=True) == {
159
+ "employees": ["John", "Anna"]
160
+ }
161
+
162
+ # Test with edge cases
163
+ assert repair_json(" ", return_objects=True) == ""
164
+ assert repair_json("[", return_objects=True) == []
165
+ assert repair_json("{", return_objects=True) == {}
166
+ assert repair_json('{"key": "value:value"}', return_objects=True) == {"key": "value:value"}
167
+ assert repair_json("{'key': 'string', 'key2': false, \"key3\": null, \"key4\": unquoted}", return_objects=True) == {"key": "string", "key2": False, "key3": None, "key4": "unquoted"}
168
+ assert repair_json('{"name": "John", "age": 30, "city": "New', return_objects=True) == {
169
+ "name": "John",
170
+ "age": 30,
171
+ "city": "New",
172
+ }
173
+ assert repair_json('{"employees":["John", "Anna", "Peter', return_objects=True) == {
174
+ "employees": ["John", "Anna", "Peter"]
175
+ }
176
+
177
+ #Test with garbage comments
178
+ assert repair_json('{"value_1": true, SHOULD_NOT_EXIST "value_2": "data" AAAA }', return_objects=True) == {'value_1': True, 'value_2': 'data'}
179
+
180
+ #Test markdown stupidities from ChatGPT
181
+ assert repair_json('{ "content": "[LINK]("https://google.com")" }', return_objects=True) == { "content": "[LINK](\"https://google.com\")"}
182
+ assert repair_json('''
183
+ {
184
+ "resourceType": "Bundle",
185
+ "id": "1",
186
+ "type": "collection",
187
+ "entry": [
188
+ {
189
+ "resource": {
190
+ "resourceType": "Patient",
191
+ "id": "1",
192
+ "name": [
193
+ {"use": "official", "family": "Corwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."},
194
+ {"use": "maiden", "family": "Goodwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."]}
195
+ ]
196
+ }
197
+ }
198
+ ]
199
+ }
200
+ ''', return_objects=True) == {"resourceType": "Bundle", "id": "1", "type": "collection", "entry": [{"resource": {"resourceType": "Patient", "id": "1", "name": [{"use": "official", "family": "Corwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."]}, {"use": "maiden", "family": "Goodwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."]}]}}]}
201
+ assert repair_json('{\n"html": "<h3 id="aaa">Waarom meer dan 200 Technical Experts - "Passie voor techniek"?</h3>"}', return_objects=True) == {'html': '<h3 id="aaa">Waarom meer dan 200 Technical Experts - "Passie voor techniek"?</h3>'}
202
+ assert repair_json("""
203
+ [
204
+ {
205
+ "foo": "Foo bar baz",
206
+ "tag": "#foo-bar-baz"
207
+ },
208
+ {
209
+ "foo": "foo bar "foobar" foo bar baz.",
210
+ "tag": "#foo-bar-foobar"
211
+ }
212
+ ]
213
+ """, return_objects=True) == [{"foo": "Foo bar baz", "tag": "#foo-bar-baz"},{"foo": "foo bar \"foobar\" foo bar baz.", "tag": "#foo-bar-foobar" }]
214
+
215
+
216
+ def test_repair_json_corner_cases_generate_by_gpt():
217
+ # Test with nested JSON
218
+ assert (
219
+ repair_json('{"key1": {"key2": [1, 2, 3]}}') == '{"key1": {"key2": [1, 2, 3]}}'
220
+ )
221
+ assert repair_json('{"key1": {"key2": [1, 2, 3') == '{"key1": {"key2": [1, 2, 3]}}'
222
+
223
+ # Test with empty keys
224
+ assert repair_json('{"": "value"}') == '{"": "value"}'
225
+
226
+ # Test with Unicode characters
227
+ assert repair_json('{"key": "value\u263A"}') == '{"key": "value\\u263a"}'
228
+
229
+ # Test with special characters
230
+ assert repair_json('{"key": "value\\nvalue"}') == '{"key": "value\\nvalue"}'
231
+
232
+ # Test with large numbers
233
+ assert (
234
+ repair_json('{"key": 12345678901234567890}') == '{"key": 12345678901234567890}'
235
+ )
236
+
237
+ # Test with whitespace
238
+ assert repair_json(' { "key" : "value" } ') == '{"key": "value"}'
239
+
240
+ # Test with null values
241
+ assert repair_json('{"key": null}') == '{"key": null}'
242
+
243
+ # Test with numeric-like values
244
+ assert repair_json('{"key": 10-20}') == '{"key": "10-20"}'
245
+ assert repair_json('{"key": 1.1.1}') == '{"key": "1.1.1"}'
246
+
247
+
248
+ def test_repair_json_corner_cases_generate_by_gpt_with_objects():
249
+ # Test with nested JSON
250
+ assert repair_json('{"key1": {"key2": [1, 2, 3]}}', return_objects=True) == {
251
+ "key1": {"key2": [1, 2, 3]}
252
+ }
253
+ assert repair_json('{"key1": {"key2": [1, 2, 3', return_objects=True) == {
254
+ "key1": {"key2": [1, 2, 3]}
255
+ }
256
+
257
+ # Test with empty keys
258
+ assert repair_json('{"": "value"}', return_objects=True) == {"": "value"}
259
+
260
+ # Test with Unicode characters
261
+ assert repair_json('{"key": "value\u263A"}', return_objects=True) == {"key": "value☺"}
262
+
263
+ # Test with special characters
264
+ assert repair_json('{"key": "value\\nvalue"}', return_objects=True) == {"key": "value\nvalue"}
265
+
266
+ # Test with large numbers
267
+ assert repair_json('{"key": 12345678901234567890}', return_objects=True) == {
268
+ "key": 12345678901234567890
269
+ }
270
+
271
+ # Test with whitespace
272
+ assert repair_json(' { "key" : "value" } ', return_objects=True) == {"key": "value"}
273
+
274
+ # Test with null values
275
+ assert repair_json('{"key": null}', return_objects=True) == {"key": None}
276
+
277
+ # Test with numeric-like values
278
+ assert repair_json('{"key": 10-20}', return_objects=True) == {"key": "10-20"}
279
+ assert repair_json('{"key": 1.1.1}', return_objects=True) == {"key": "1.1.1"}
280
+
281
+
282
+ def test_repair_json_skip_json_loads():
283
+ assert repair_json('{"key": true, "key2": false, "key3": null}', skip_json_loads=True) == '{"key": true, "key2": false, "key3": null}'
284
+ assert repair_json('{"key": true, "key2": false, "key3": null}', return_objects=True, skip_json_loads=True) == {"key": True, "key2": False, "key3": None}
285
+ assert repair_json('{"key": true, "key2": false, "key3": }', skip_json_loads=True) == '{"key": true, "key2": false, "key3": ""}'
286
+ assert loads('{"key": true, "key2": false, "key3": }', skip_json_loads=True) == {"key": True, "key2": False, "key3": ""}
287
+
288
+
289
+ def test_repair_json_from_file():
290
+
291
+ import os.path
292
+ import pathlib
293
+ path = pathlib.Path(__file__).parent.resolve()
294
+
295
+ assert(from_file(os.path.join(path,"invalid.json"))) == '[{"_id": "655b66256574f09bdae8abe8", "index": 0, "guid": "31082ae3-b0f3-4406-90f4-cc450bd4379d", "isActive": false, "balance": "$2,562.78", "picture": "http://placehold.it/32x32", "age": 32, "eyeColor": "brown", "name": "Glover Rivas", "gender": "male", "company": "EMPIRICA", "email": "gloverrivas@empirica.com", "phone": "+1 (842) 507-3063", "address": "536 Montague Terrace, Jenkinsville, Kentucky, 2235", "about": "Mollit consectetur excepteur voluptate tempor dolore ullamco enim irure ullamco non enim officia. Voluptate occaecat proident laboris ea Lorem cupidatat reprehenderit nisi nisi aliqua. Amet nulla ipsum deserunt excepteur amet ad aute aute ex. Et enim minim sit veniam est quis dolor nisi sunt quis eiusmod in. Amet eiusmod cillum sunt occaecat dolor laboris voluptate in eiusmod irure aliqua duis.", "registered": "2023-11-18T09:32:36 -01:00", "latitude": 36.26102, "longitude": -91.304608, "tags": ["non", "tempor", "do", "ullamco", "dolore", "sunt", "ipsum"], "friends": [{"id": 0, "name": "Cara Shepherd"}, {"id": 1, "name": "Mason Farley"}, {"id": 2, "name": "Harriet Cochran"}], "greeting": "Hello, Glover Rivas! You have 7 unread messages.", "favoriteFruit": "strawberry"}, {"_id": "655b662585364bc57278bb6f", "index": 1, "guid": "0dea7a3a-f812-4dde-b78d-7a9b58e5da05", "isActive": true, "balance": "$1,359.48", "picture": "http://placehold.it/32x32", "age": 38, "eyeColor": "brown", "name": "Brandi Moreno", "gender": "female", "company": "MARQET", "email": "brandimoreno@marqet.com", "phone": "+1 (850) 434-2077", "address": "537 Doone Court, Waiohinu, Michigan, 3215", "about": "Irure proident adipisicing do Lorem do incididunt in laborum in eiusmod eiusmod ad elit proident. Eiusmod dolor ex magna magna occaecat. Nulla deserunt velit ex exercitation et irure sunt. Cupidatat ut excepteur ea quis labore sint cupidatat incididunt amet eu consectetur cillum ipsum proident. Occaecat exercitation aute laborum dolor proident reprehenderit laborum in voluptate culpa. Exercitation nulla adipisicing culpa aute est deserunt ea nisi deserunt consequat occaecat ut et non. Incididunt ex exercitation dolor dolor anim cillum dolore.", "registered": "2015-09-03T11:47:15 -02:00", "latitude": -19.768953, "longitude": 8.948458, "tags": ["laboris", "occaecat", "laborum", "laborum", "ex", "cillum", "occaecat"], "friends": [{"id": 0, "name": "Erna Kelly"}, {"id": 1, "name": "Black Mays"}, {"id": 2, "name": "Davis Buck"}], "greeting": "Hello, Brandi Moreno! You have 1 unread messages.", "favoriteFruit": "apple"}, {"_id": "655b6625870da431bcf5e0c2", "index": 2, "guid": "b17f6e3f-c898-4334-abbf-05cf222f143b", "isActive": false, "balance": "$1,493.77", "picture": "http://placehold.it/32x32", "age": 20, "eyeColor": "brown", "name": "Moody Meadows", "gender": "male", "company": "OPTIQUE", "email": "moodymeadows@optique.com", "phone": "+1 (993) 566-3041", "address": "766 Osborn Street, Bath, Maine, 7666", "about": "Non commodo excepteur nostrud qui adipisicing aliquip dolor minim nulla culpa proident. In ad cupidatat ea mollit ex est do deserunt proident nostrud. Cillum id id eiusmod amet exercitation nostrud cillum sunt deserunt dolore deserunt eiusmod mollit. Ut ex tempor ad laboris voluptate labore id officia fugiat exercitation amet.", "registered": "2015-01-16T02:48:28 -01:00", "latitude": -25.847327, "longitude": 63.95991, "tags": ["aute", "commodo", "adipisicing", "nostrud", "duis", "mollit", "ut"], "friends": [{"id": 0, "name": "Lacey Cash"}, {"id": 1, "name": "Gabrielle Harmon"}, {"id": 2, "name": "Ellis Lambert"}], "greeting": "Hello, Moody Meadows! You have 4 unread messages.", "favoriteFruit": "strawberry"}, {"_id": "655b6625f3e1bf422220854e", "index": 3, "guid": "92229883-2bfd-4974-a08c-1b506b372e46", "isActive": false, "balance": "$2,215.34", "picture": "http://placehold.it/32x32", "age": 22, "eyeColor": "brown", "name": "Heath Nguyen", "gender": "male", "company": "BLEENDOT", "email": "heathnguyen@bleendot.com", "phone": "+1 (989) 512-2797", "address": "135 Milton Street, Graniteville, Nebraska, 276", "about": "Consequat aliquip irure Lorem cupidatat nulla magna ullamco nulla voluptate adipisicing anim consectetur tempor aliquip. Magna aliqua nulla eu tempor esse proident. Proident fugiat ad ex Lorem reprehenderit dolor aliquip labore labore aliquip. Deserunt aute enim ea minim officia anim culpa sint commodo. Cillum consectetur excepteur aliqua exercitation Lorem veniam voluptate.", "registered": "2016-07-06T01:31:07 -02:00", "latitude": -60.997048, "longitude": -102.397885, "tags": ["do", "ad", "consequat", "irure", "tempor", "elit", "minim"], "friends": [{"id": 0, "name": "Walker Hernandez"}, {"id": 1, "name": "Maria Lane"}, {"id": 2, "name": "Mcknight Barron"}], "greeting": "Hello, Heath Nguyen! You have 4 unread messages.", "favoriteFruit": "apple"}, {"_id": "655b6625519a5b5e4b6742bf", "index": 4, "guid": "c5dc685f-6d0d-4173-b4cf-f5df29a1e8ef", "isActive": true, "balance": "$1,358.90", "picture": "http://placehold.it/32x32", "age": 33, "eyeColor": "brown", "name": "Deidre Duke", "gender": "female", "company": "OATFARM", "email": "deidreduke@oatfarm.com", "phone": "+1 (875) 587-3256", "address": "487 Schaefer Street, Wattsville, West Virginia, 4506", "about": "Laboris eu nulla esse magna sit eu deserunt non est aliqua exercitation commodo. Ad occaecat qui qui laborum dolore anim Lorem. Est qui occaecat irure enim deserunt enim aliqua ex deserunt incididunt esse. Quis in minim laboris proident non mollit. Magna ea do labore commodo. Et elit esse esse occaecat officia ipsum nisi.", "registered": "2021-09-12T04:17:08 -02:00", "latitude": 68.609781, "longitude": -87.509134, "tags": ["mollit", "cupidatat", "irure", "sit", "consequat", "anim", "fugiat"], "friends": [{"id": 0, "name": "Bean Paul"}, {"id": 1, "name": "Cochran Hubbard"}, {"id": 2, "name": "Rodgers Atkinson"}], "greeting": "Hello, Deidre Duke! You have 6 unread messages.", "favoriteFruit": "apple"}, {"_id": "655b6625a19b3f7e5f82f0ea", "index": 5, "guid": "75f3c264-baa1-47a0-b21c-4edac23d9935", "isActive": true, "balance": "$3,554.36", "picture": "http://placehold.it/32x32", "age": 26, "eyeColor": "blue", "name": "Lydia Holland", "gender": "female", "company": "ESCENTA", "email": "lydiaholland@escenta.com", "phone": "+1 (927) 482-3436", "address": "554 Rockaway Parkway, Kohatk, Montana, 6316", "about": "Consectetur ea est labore commodo laborum mollit pariatur non enim. Est dolore et non laboris tempor. Ea incididunt ut adipisicing cillum labore officia tempor eiusmod commodo. Cillum fugiat ex consectetur ut nostrud anim nostrud exercitation ut duis in ea. Eu et id fugiat est duis eiusmod ullamco quis officia minim sint ea nisi in.", "registered": "2018-03-13T01:48:56 -01:00", "latitude": -88.495799, "longitude": 71.840667, "tags": ["veniam", "minim", "consequat", "consequat", "incididunt", "consequat", "elit"], "friends": [{"id": 0, "name": "Debra Massey"}, {"id": 1, "name": "Weiss Savage"}, {"id": 2, "name": "Shannon Guerra"}], "greeting": "Hello, Lydia Holland! You have 5 unread messages.", "favoriteFruit": "banana"}]'
@@ -0,0 +1,103 @@
1
+ from src.json_repair import repair_json
2
+
3
+ import os.path
4
+ import pathlib
5
+ path = pathlib.Path(__file__).parent.resolve()
6
+
7
+ fd = open(os.path.join(path,"valid.json"))
8
+ correct_json = fd.read()
9
+ fd.close()
10
+
11
+ fd = open(os.path.join(path,"invalid.json"))
12
+ incorrect_json = fd.read()
13
+ fd.close()
14
+
15
+ def test_true_true_correct(benchmark):
16
+ benchmark(repair_json, correct_json, return_objects=True, skip_json_loads=True)
17
+
18
+ # Retrieve the median execution time
19
+ mean_time = benchmark.stats.get("median")
20
+
21
+ # Define your time threshold in seconds
22
+ max_time = 13 / 10 ** 4 # 1.3 millisecond
23
+
24
+ # Assert that the average time is below the threshold
25
+ assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
26
+
27
+ def test_true_true_incorrect(benchmark):
28
+ benchmark(repair_json, incorrect_json, return_objects=True, skip_json_loads=True)
29
+
30
+ # Retrieve the median execution time
31
+ mean_time = benchmark.stats.get("median")
32
+
33
+ # Define your time threshold in seconds
34
+ max_time = 13 / 10 ** 4 # 1.3 millisecond
35
+
36
+ # Assert that the average time is below the threshold
37
+ assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
38
+
39
+ def test_true_false_correct(benchmark):
40
+ benchmark(repair_json, correct_json, return_objects=True, skip_json_loads=False)
41
+ # Retrieve the median execution time
42
+ mean_time = benchmark.stats.get("median")
43
+
44
+ # Define your time threshold in seconds
45
+ max_time = 20 * (1 / 10 ** 6) # 20 microsecond
46
+
47
+ # Assert that the average time is below the threshold
48
+ assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
49
+
50
+ def test_true_false_incorrect(benchmark):
51
+ benchmark(repair_json, incorrect_json, return_objects=True, skip_json_loads=False)
52
+ # Retrieve the median execution time
53
+ mean_time = benchmark.stats.get("median")
54
+
55
+ # Define your time threshold in seconds
56
+ max_time = 13 / 10 ** 4 # 1.3 millisecond
57
+
58
+ # Assert that the average time is below the threshold
59
+ assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
60
+
61
+ def test_false_true_correct(benchmark):
62
+ benchmark(repair_json, correct_json, return_objects=False, skip_json_loads=True)
63
+ # Retrieve the median execution time
64
+ mean_time = benchmark.stats.get("median")
65
+
66
+ # Define your time threshold in seconds
67
+ max_time = 13 / 10 ** 4 # 1.3 millisecond
68
+
69
+ # Assert that the average time is below the threshold
70
+ assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
71
+
72
+ def test_false_true_incorrect(benchmark):
73
+ benchmark(repair_json, incorrect_json, return_objects=False, skip_json_loads=True)
74
+ # Retrieve the median execution time
75
+ mean_time = benchmark.stats.get("median")
76
+
77
+ # Define your time threshold in seconds
78
+ max_time = 13 / 10 ** 4 # 1.3 millisecond
79
+
80
+ # Assert that the average time is below the threshold
81
+ assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
82
+
83
+ def test_false_false_correct(benchmark):
84
+ benchmark(repair_json, correct_json, return_objects=False, skip_json_loads=False)
85
+ # Retrieve the median execution time
86
+ mean_time = benchmark.stats.get("median")
87
+
88
+ # Define your time threshold in seconds
89
+ max_time = 50 / 10 ** 6 # 50 microsecond
90
+
91
+ # Assert that the average time is below the threshold
92
+ assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
93
+
94
+ def test_false_false_incorrect(benchmark):
95
+ benchmark(repair_json, incorrect_json, return_objects=False, skip_json_loads=False)
96
+ # Retrieve the median execution time
97
+ mean_time = benchmark.stats.get("median")
98
+
99
+ # Define your time threshold in seconds
100
+ max_time = 14 / 10 ** 4 # 1.4 millisecond
101
+
102
+ # Assert that the average time is below the threshold
103
+ assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"