json-repair 0.23.1__tar.gz → 0.25.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {json_repair-0.23.1/src/json_repair.egg-info → json_repair-0.25.0}/PKG-INFO +7 -1
- {json_repair-0.23.1 → json_repair-0.25.0}/README.md +6 -0
- {json_repair-0.23.1 → json_repair-0.25.0}/pyproject.toml +1 -1
- {json_repair-0.23.1 → json_repair-0.25.0}/src/json_repair/json_repair.py +64 -4
- {json_repair-0.23.1 → json_repair-0.25.0/src/json_repair.egg-info}/PKG-INFO +7 -1
- {json_repair-0.23.1 → json_repair-0.25.0}/tests/test_json_repair.py +6 -4
- {json_repair-0.23.1 → json_repair-0.25.0}/tests/test_performance.py +1 -1
- {json_repair-0.23.1 → json_repair-0.25.0}/LICENSE +0 -0
- {json_repair-0.23.1 → json_repair-0.25.0}/setup.cfg +0 -0
- {json_repair-0.23.1 → json_repair-0.25.0}/src/json_repair/__init__.py +0 -0
- {json_repair-0.23.1 → json_repair-0.25.0}/src/json_repair.egg-info/SOURCES.txt +0 -0
- {json_repair-0.23.1 → json_repair-0.25.0}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.23.1 → json_repair-0.25.0}/src/json_repair.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.25.0
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -45,6 +45,11 @@ If you find this library useful, you can help me by donating toward my monthly b
|
|
45
45
|
|
46
46
|
---
|
47
47
|
|
48
|
+
# Demo
|
49
|
+
If you are unsure if this library will fix your specific problem, or simply want your json validated online, you can visit the demo site on GitHub pages: https://mangiucugna.github.io/json_repair/
|
50
|
+
|
51
|
+
---
|
52
|
+
|
48
53
|
# Motivation
|
49
54
|
Some LLMs are a bit iffy when it comes to returning well formed JSON data, sometimes they skip a parentheses and sometimes they add some words in it, because that's what an LLM does.
|
50
55
|
Luckily, the mistakes LLMs make are simple enough to be fixed without destroying the content.
|
@@ -160,6 +165,7 @@ You will need owner access to this repository
|
|
160
165
|
# Repair JSON in other programming languages
|
161
166
|
- Typescript: https://github.com/josdejong/jsonrepair
|
162
167
|
- Go: https://github.com/RealAlexandreAI/json-repair
|
168
|
+
- Ruby: https://github.com/sashazykov/json-repair-rb
|
163
169
|
---
|
164
170
|
## Star History
|
165
171
|
|
@@ -8,6 +8,11 @@ If you find this library useful, you can help me by donating toward my monthly b
|
|
8
8
|
|
9
9
|
---
|
10
10
|
|
11
|
+
# Demo
|
12
|
+
If you are unsure if this library will fix your specific problem, or simply want your json validated online, you can visit the demo site on GitHub pages: https://mangiucugna.github.io/json_repair/
|
13
|
+
|
14
|
+
---
|
15
|
+
|
11
16
|
# Motivation
|
12
17
|
Some LLMs are a bit iffy when it comes to returning well formed JSON data, sometimes they skip a parentheses and sometimes they add some words in it, because that's what an LLM does.
|
13
18
|
Luckily, the mistakes LLMs make are simple enough to be fixed without destroying the content.
|
@@ -123,6 +128,7 @@ You will need owner access to this repository
|
|
123
128
|
# Repair JSON in other programming languages
|
124
129
|
- Typescript: https://github.com/josdejong/jsonrepair
|
125
130
|
- Go: https://github.com/RealAlexandreAI/json-repair
|
131
|
+
- Ruby: https://github.com/sashazykov/json-repair-rb
|
126
132
|
---
|
127
133
|
## Star History
|
128
134
|
|
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.
|
6
|
+
version = "0.25.0"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -91,6 +91,10 @@ class JSONParser:
|
|
91
91
|
) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
|
92
92
|
json = self.parse_json()
|
93
93
|
if self.index < len(self.json_str):
|
94
|
+
self.log(
|
95
|
+
"The parser returned early, checking if there's more json elements",
|
96
|
+
"info",
|
97
|
+
)
|
94
98
|
json = [json]
|
95
99
|
last_index = self.index
|
96
100
|
while self.index < len(self.json_str):
|
@@ -100,10 +104,13 @@ class JSONParser:
|
|
100
104
|
if self.index == last_index:
|
101
105
|
self.index += 1
|
102
106
|
last_index = self.index
|
107
|
+
# If nothing extra was found, don't return an array
|
103
108
|
if len(json) == 1:
|
109
|
+
self.log(
|
110
|
+
"There were no more elements, returning the element without the array",
|
111
|
+
"info",
|
112
|
+
)
|
104
113
|
json = json[0]
|
105
|
-
elif len(json) == 0:
|
106
|
-
json = ""
|
107
114
|
if self.logger.log_level == "none":
|
108
115
|
return json
|
109
116
|
else:
|
@@ -363,9 +370,34 @@ class JSONParser:
|
|
363
370
|
if self.get_context() == "object_key" and (
|
364
371
|
char == ":" or char.isspace()
|
365
372
|
):
|
373
|
+
self.log(
|
374
|
+
"While parsing a string missing the left delimiter in object key context, we found a :, stopping here",
|
375
|
+
"info",
|
376
|
+
)
|
366
377
|
break
|
367
378
|
elif self.get_context() == "object_value" and char in [",", "}"]:
|
368
|
-
|
379
|
+
rstring_delimiter_missing = True
|
380
|
+
# check if this is a case in which the closing comma is NOT missing instead
|
381
|
+
i = 1
|
382
|
+
next_c = self.get_char_at(i)
|
383
|
+
while next_c and next_c != rstring_delimiter:
|
384
|
+
i += 1
|
385
|
+
next_c = self.get_char_at(i)
|
386
|
+
if next_c:
|
387
|
+
i += 1
|
388
|
+
next_c = self.get_char_at(i)
|
389
|
+
# found a delimiter, now we need to check that is followed strictly by a comma or brace
|
390
|
+
while next_c and next_c.isspace():
|
391
|
+
i += 1
|
392
|
+
next_c = self.get_char_at(i)
|
393
|
+
if next_c and next_c in [",", "}"]:
|
394
|
+
rstring_delimiter_missing = False
|
395
|
+
if rstring_delimiter_missing:
|
396
|
+
self.log(
|
397
|
+
"While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn't determine that a right delimiter was present. Stopping here",
|
398
|
+
"info",
|
399
|
+
)
|
400
|
+
break
|
369
401
|
string_acc += char
|
370
402
|
self.index += 1
|
371
403
|
char = self.get_char_at()
|
@@ -386,6 +418,33 @@ class JSONParser:
|
|
386
418
|
"While parsing a string, we found a doubled quote, ignoring it",
|
387
419
|
"info",
|
388
420
|
)
|
421
|
+
elif missing_quotes and self.get_context() == "object_value":
|
422
|
+
# In case of missing starting quote I need to check if the delimeter is the end or the beginning of a key
|
423
|
+
i = 1
|
424
|
+
next_c = self.get_char_at(i)
|
425
|
+
while next_c and next_c not in [
|
426
|
+
rstring_delimiter,
|
427
|
+
lstring_delimiter,
|
428
|
+
]:
|
429
|
+
i += 1
|
430
|
+
next_c = self.get_char_at(i)
|
431
|
+
if next_c:
|
432
|
+
# We found a quote, now let's make sure there's a ":" following
|
433
|
+
i += 1
|
434
|
+
next_c = self.get_char_at(i)
|
435
|
+
# found a delimiter, now we need to check that is followed strictly by a comma or brace
|
436
|
+
while next_c and next_c.isspace():
|
437
|
+
i += 1
|
438
|
+
next_c = self.get_char_at(i)
|
439
|
+
if next_c and next_c == ":":
|
440
|
+
# Reset the cursor
|
441
|
+
self.index -= 1
|
442
|
+
char = self.get_char_at()
|
443
|
+
self.log(
|
444
|
+
"In a string with missing quotes and object value context, I found a delimeter but it turns out it was the beginning on the next key. Stopping here.",
|
445
|
+
"info",
|
446
|
+
)
|
447
|
+
break
|
389
448
|
else:
|
390
449
|
# Check if eventually there is a rstring delimiter, otherwise we bail
|
391
450
|
i = 1
|
@@ -496,7 +555,8 @@ class JSONParser:
|
|
496
555
|
number_str = ""
|
497
556
|
number_chars = set("0123456789-.eE/,")
|
498
557
|
char = self.get_char_at()
|
499
|
-
|
558
|
+
is_array = self.get_context() == "array"
|
559
|
+
while char and char in number_chars and (char != "," or not is_array):
|
500
560
|
number_str += char
|
501
561
|
self.index += 1
|
502
562
|
char = self.get_char_at()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.25.0
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -45,6 +45,11 @@ If you find this library useful, you can help me by donating toward my monthly b
|
|
45
45
|
|
46
46
|
---
|
47
47
|
|
48
|
+
# Demo
|
49
|
+
If you are unsure if this library will fix your specific problem, or simply want your json validated online, you can visit the demo site on GitHub pages: https://mangiucugna.github.io/json_repair/
|
50
|
+
|
51
|
+
---
|
52
|
+
|
48
53
|
# Motivation
|
49
54
|
Some LLMs are a bit iffy when it comes to returning well formed JSON data, sometimes they skip a parentheses and sometimes they add some words in it, because that's what an LLM does.
|
50
55
|
Luckily, the mistakes LLMs make are simple enough to be fixed without destroying the content.
|
@@ -160,6 +165,7 @@ You will need owner access to this repository
|
|
160
165
|
# Repair JSON in other programming languages
|
161
166
|
- Typescript: https://github.com/josdejong/jsonrepair
|
162
167
|
- Go: https://github.com/RealAlexandreAI/json-repair
|
168
|
+
- Ruby: https://github.com/sashazykov/json-repair-rb
|
163
169
|
---
|
164
170
|
## Star History
|
165
171
|
|
@@ -141,6 +141,8 @@ def test_object_edge_cases():
|
|
141
141
|
assert repair_json('''{ "a" : "{ b": {} }" }''') == '{"a": "{ b"}'
|
142
142
|
assert repair_json("""{"b": "xxxxx" true}""") == '{"b": "xxxxx"}'
|
143
143
|
assert repair_json('{"key": "Lorem "ipsum" s,"}') == '{"key": "Lorem \\"ipsum\\" s,"}'
|
144
|
+
assert repair_json('{"lorem": ipsum, sic, datum.",}') == '{"lorem": "ipsum, sic, datum."}'
|
145
|
+
assert repair_json('{"lorem": sic tamet. "ipsum": sic tamet, quick brown fox. "sic": ipsum}') == '{"lorem": "sic tamet.", "ipsum": "sic tamet", "sic": "ipsum"}'
|
144
146
|
|
145
147
|
def test_number_edge_cases():
|
146
148
|
assert repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'
|
@@ -148,7 +150,7 @@ def test_number_edge_cases():
|
|
148
150
|
assert repair_json('{"key": .25}') == '{"key": 0.25}'
|
149
151
|
assert repair_json('{"here": "now", "key": 1/3, "foo": "bar"}') == '{"here": "now", "key": "1/3", "foo": "bar"}'
|
150
152
|
assert repair_json('{"key": 12345/67890}') == '{"key": "12345/67890"}'
|
151
|
-
assert repair_json('[105,12') == '[
|
153
|
+
assert repair_json('[105,12') == '[105, 12]'
|
152
154
|
assert repair_json('{"key", 105,12,') == '{"key": "105,12"}'
|
153
155
|
assert repair_json('{"key": 1/3, "foo": "bar"}') == '{"key": "1/3", "foo": "bar"}'
|
154
156
|
assert repair_json('{"key": 10-20}') == '{"key": "10-20"}'
|
@@ -171,8 +173,8 @@ def test_leading_trailing_characters():
|
|
171
173
|
def test_multiple_jsons():
|
172
174
|
assert repair_json("[]{}") == "[[], {}]"
|
173
175
|
assert repair_json("{}[]{}") == "[{}, [], {}]"
|
174
|
-
assert repair_json('{"key":"value"}[1,2,3,True]') == '[{"key": "value"}, [
|
175
|
-
assert repair_json('lorem ```json {"key":"value"} ``` ipsum ```json [1,2,3,True] ``` 42') == '[{"key": "value"}, [
|
176
|
+
assert repair_json('{"key":"value"}[1,2,3,True]') == '[{"key": "value"}, [1, 2, 3, true]]'
|
177
|
+
assert repair_json('lorem ```json {"key":"value"} ``` ipsum ```json [1,2,3,True] ``` 42') == '[{"key": "value"}, [1, 2, 3, true]]'
|
176
178
|
|
177
179
|
def test_repair_json_with_objects():
|
178
180
|
# Test with valid JSON strings
|
@@ -242,7 +244,7 @@ def test_repair_json_from_file():
|
|
242
244
|
# Write content to the temporary file
|
243
245
|
with os.fdopen(temp_fd, 'w') as tmp:
|
244
246
|
tmp.write("{key:value}")
|
245
|
-
assert(from_file(temp_path, logging=True)) == ({'key': 'value'}, [{'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'text': 'While parsing a string, we found no starting quote. Will add the quote back', 'context': '{key:value}'}, {'text': 'While parsing a string, we missed the closing quote, ignoring', 'context': '{key:value}'}, {'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'text': 'While parsing a string, we found no starting quote. Will add the quote back', 'context': '{key:value}'}, {'text': 'While parsing a string, we missed the closing quote, ignoring', 'context': '{key:value}'}])
|
247
|
+
assert(from_file(temp_path, logging=True)) == ({'key': 'value'}, [{'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'text': 'While parsing a string, we found no starting quote. Will add the quote back', 'context': '{key:value}'}, {'context': '{key:value}', 'text': 'While parsing a string missing the left delimiter in object key context, we found a :, stopping here',}, {'text': 'While parsing a string, we missed the closing quote, ignoring', 'context': '{key:value}'}, {'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'text': 'While parsing a string, we found no starting quote. Will add the quote back', 'context': '{key:value}'}, {'context': '{key:value}', 'text': 'While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn\'t determine that a right delimiter was present. Stopping here'}, {'text': 'While parsing a string, we missed the closing quote, ignoring', 'context': '{key:value}'}])
|
246
248
|
finally:
|
247
249
|
# Clean up - delete the temporary file
|
248
250
|
os.remove(temp_path)
|
@@ -97,7 +97,7 @@ def test_false_false_incorrect(benchmark):
|
|
97
97
|
mean_time = benchmark.stats.get("median")
|
98
98
|
|
99
99
|
# Define your time threshold in seconds
|
100
|
-
max_time =
|
100
|
+
max_time = 15 / 10 ** 4 # 1.5 millisecond
|
101
101
|
|
102
102
|
# Assert that the average time is below the threshold
|
103
103
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|