json-repair 0.5.0__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {json_repair-0.5.0/src/json_repair.egg-info → json_repair-0.6.0}/PKG-INFO +1 -1
- {json_repair-0.5.0 → json_repair-0.6.0}/pyproject.toml +1 -1
- {json_repair-0.5.0 → json_repair-0.6.0}/src/json_repair/json_repair.py +17 -0
- {json_repair-0.5.0 → json_repair-0.6.0/src/json_repair.egg-info}/PKG-INFO +1 -1
- {json_repair-0.5.0 → json_repair-0.6.0}/tests/test_json_repair.py +14 -0
- {json_repair-0.5.0 → json_repair-0.6.0}/tests/test_performance.py +4 -4
- {json_repair-0.5.0 → json_repair-0.6.0}/LICENSE +0 -0
- {json_repair-0.5.0 → json_repair-0.6.0}/README.md +0 -0
- {json_repair-0.5.0 → json_repair-0.6.0}/setup.cfg +0 -0
- {json_repair-0.5.0 → json_repair-0.6.0}/src/json_repair/__init__.py +0 -0
- {json_repair-0.5.0 → json_repair-0.6.0}/src/json_repair.egg-info/SOURCES.txt +0 -0
- {json_repair-0.5.0 → json_repair-0.6.0}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.5.0 → json_repair-0.6.0}/src/json_repair.egg-info/top_level.txt +0 -0
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.
|
6
|
+
version = "0.6.0"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -111,6 +111,11 @@ class JSONParser:
|
|
111
111
|
use_single_quotes=(self.json_str[self.index] == "'")
|
112
112
|
)
|
113
113
|
|
114
|
+
# This can happen sometimes like { "": "value" }
|
115
|
+
if key == "" and self.get_char_at() == ":":
|
116
|
+
key = "empty_placeholder"
|
117
|
+
break
|
118
|
+
|
114
119
|
# We reached the end here
|
115
120
|
if key == "}":
|
116
121
|
continue
|
@@ -197,6 +202,7 @@ class JSONParser:
|
|
197
202
|
# * It iterated over the entire sequence
|
198
203
|
# * If we are fixing missing quotes in an object, when it finds the special terminators
|
199
204
|
char = self.get_char_at()
|
205
|
+
fix_broken_markdown_link = False
|
200
206
|
while char and char != string_terminator:
|
201
207
|
if fixed_quotes:
|
202
208
|
if self.context == "object_key" and (char == ":" or char.isspace()):
|
@@ -205,6 +211,17 @@ class JSONParser:
|
|
205
211
|
break
|
206
212
|
self.index += 1
|
207
213
|
char = self.get_char_at()
|
214
|
+
# ChatGPT sometimes forget to quote links in markdown like: { "content": "[LINK]("https://google.com")" }
|
215
|
+
if char == string_terminator and (
|
216
|
+
fix_broken_markdown_link
|
217
|
+
or (
|
218
|
+
self.index - 2 > 0
|
219
|
+
and self.json_str[self.index - 2 : self.index] == "]("
|
220
|
+
)
|
221
|
+
):
|
222
|
+
fix_broken_markdown_link = not fix_broken_markdown_link
|
223
|
+
self.index += 1
|
224
|
+
char = self.get_char_at()
|
208
225
|
|
209
226
|
if char and fixed_quotes and self.context == "object_key" and char.isspace():
|
210
227
|
self.skip_whitespaces_at()
|
@@ -49,6 +49,8 @@ def test_repair_json():
|
|
49
49
|
assert repair_json("{") == "{}"
|
50
50
|
assert repair_json('{"') == '{"": ""}'
|
51
51
|
assert repair_json('["') == '[]'
|
52
|
+
assert repair_json("'\"'") == '"\\\""'
|
53
|
+
assert repair_json("'string\"") == '"string\\\""'
|
52
54
|
assert repair_json('{foo: [}') == '{"foo": []}'
|
53
55
|
assert repair_json('{"key": "value:value"}') == '{"key": "value:value"}'
|
54
56
|
assert (
|
@@ -79,6 +81,15 @@ def test_repair_json():
|
|
79
81
|
}
|
80
82
|
# Test with garbage comments
|
81
83
|
assert repair_json('{"value_1": true, SHOULD_NOT_EXIST "value_2": "data" AAAA }') == '{"value_1": true, "value_2": "data"}'
|
84
|
+
assert {
|
85
|
+
repair_json('{"" : true, "key2": "value2"}') == '{" ": true, "key2": "value_2"}'
|
86
|
+
}
|
87
|
+
assert {
|
88
|
+
repair_json('{"": true, "key2": "value2"}') == '{"empty_placeholder": true, "key2": "value_2"}'
|
89
|
+
}
|
90
|
+
|
91
|
+
#Test markdown stupidities from ChatGPT
|
92
|
+
assert repair_json('{ "content": "[LINK]("https://google.com")" }') == '{"content": "[LINK](\\"https://google.com\\")"}'
|
82
93
|
|
83
94
|
|
84
95
|
|
@@ -137,6 +148,9 @@ def test_repair_json_with_objects():
|
|
137
148
|
#Test with garbage comments
|
138
149
|
assert repair_json('{"value_1": true, SHOULD_NOT_EXIST "value_2": "data" AAAA }', True) == {'value_1': True, 'value_2': 'data'}
|
139
150
|
|
151
|
+
#Test markdown stupidities from ChatGPT
|
152
|
+
assert repair_json('{ "content": "[LINK]("https://google.com")" }', True) == { "content": "[LINK](\"https://google.com\")"}
|
153
|
+
|
140
154
|
|
141
155
|
def test_repair_json_corner_cases_generate_by_gpt():
|
142
156
|
# Test with nested JSON
|
@@ -282,7 +282,7 @@ def test_true_true(benchmark):
|
|
282
282
|
mean_time = benchmark.stats.get("median")
|
283
283
|
|
284
284
|
# Define your time threshold in seconds (100ms in this case)
|
285
|
-
max_time = 1 / 10 ** 6 # 1 microsecond
|
285
|
+
max_time = 1.1 / 10 ** 6 # 1.1 microsecond
|
286
286
|
|
287
287
|
# Assert that the average time is below the threshold
|
288
288
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -293,7 +293,7 @@ def test_true_false(benchmark):
|
|
293
293
|
mean_time = benchmark.stats.get("median")
|
294
294
|
|
295
295
|
# Define your time threshold in seconds (100ms in this case)
|
296
|
-
max_time =
|
296
|
+
max_time = 180 * (1 / 10 ** 6) # 180 microsecond
|
297
297
|
|
298
298
|
# Assert that the average time is below the threshold
|
299
299
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -304,7 +304,7 @@ def test_false_true(benchmark):
|
|
304
304
|
mean_time = benchmark.stats.get("median")
|
305
305
|
|
306
306
|
# Define your time threshold in seconds (ms in this case)
|
307
|
-
max_time =
|
307
|
+
max_time = 1 / 10 ** 3 # 1 millisecond
|
308
308
|
|
309
309
|
# Assert that the average time is below the threshold
|
310
310
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -315,7 +315,7 @@ def test_false_false(benchmark):
|
|
315
315
|
mean_time = benchmark.stats.get("median")
|
316
316
|
|
317
317
|
# Define your time threshold in seconds (100ms in this case)
|
318
|
-
max_time =
|
318
|
+
max_time = 210 * (1 / 10 ** 6) # 210 microsecond
|
319
319
|
|
320
320
|
# Assert that the average time is below the threshold
|
321
321
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|