json-repair 0.27.1__tar.gz → 0.28.0__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {json_repair-0.27.1/src/json_repair.egg-info → json_repair-0.28.0}/PKG-INFO +3 -2
- {json_repair-0.27.1 → json_repair-0.28.0}/README.md +1 -1
- {json_repair-0.27.1 → json_repair-0.28.0}/pyproject.toml +6 -2
- {json_repair-0.27.1 → json_repair-0.28.0}/src/json_repair/json_repair.py +24 -63
- json_repair-0.28.0/src/json_repair/py.typed +0 -0
- {json_repair-0.27.1 → json_repair-0.28.0/src/json_repair.egg-info}/PKG-INFO +3 -2
- {json_repair-0.27.1 → json_repair-0.28.0}/src/json_repair.egg-info/SOURCES.txt +2 -0
- json_repair-0.28.0/tests/test_coverage.py +18 -0
- {json_repair-0.27.1 → json_repair-0.28.0}/tests/test_json_repair.py +13 -23
- {json_repair-0.27.1 → json_repair-0.28.0}/LICENSE +0 -0
- {json_repair-0.27.1 → json_repair-0.28.0}/setup.cfg +0 -0
- {json_repair-0.27.1 → json_repair-0.28.0}/src/json_repair/__init__.py +0 -0
- {json_repair-0.27.1 → json_repair-0.28.0}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.27.1 → json_repair-0.28.0}/src/json_repair.egg-info/top_level.txt +0 -0
- {json_repair-0.27.1 → json_repair-0.28.0}/tests/test_performance.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.28.0
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -27,6 +27,7 @@ License: MIT License
|
|
27
27
|
|
28
28
|
Project-URL: Homepage, https://github.com/mangiucugna/json_repair/
|
29
29
|
Project-URL: Bug Tracker, https://github.com/mangiucugna/json_repair/issues
|
30
|
+
Project-URL: Live demo, https://mangiucugna.github.io/json_repair/
|
30
31
|
Keywords: JSON,REPAIR,LLM,PARSER
|
31
32
|
Classifier: Programming Language :: Python :: 3
|
32
33
|
Classifier: License :: OSI Approved :: MIT License
|
@@ -38,7 +39,7 @@ License-File: LICENSE
|
|
38
39
|
[data:image/s3,"s3://crabby-images/24cc2/24cc27e92d35c47c6c4cb73f8131696d29065dd2" alt="PyPI"](https://pypi.org/project/json-repair/)
|
39
40
|
data:image/s3,"s3://crabby-images/7c195/7c195ec0ce8f4bd0be15fa58d3802cbebdbf1b37" alt="Python version"
|
40
41
|
[data:image/s3,"s3://crabby-images/8b55b/8b55be8502970b06016864be9ce046f1c0a7ec33" alt="PyPI downloads"](https://pypi.org/project/json-repair/)
|
41
|
-
|
42
|
+
[data:image/s3,"s3://crabby-images/bd5f7/bd5f772be6c859b9655a69ddd1f9967aefd77400" alt="Github Sponsors"](https://github.com/sponsors/mangiucugna)
|
42
43
|
|
43
44
|
This simple package can be used to fix an invalid json string. To know all cases in which this package will work, check out the unit test.
|
44
45
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
[data:image/s3,"s3://crabby-images/24cc2/24cc27e92d35c47c6c4cb73f8131696d29065dd2" alt="PyPI"](https://pypi.org/project/json-repair/)
|
2
2
|
data:image/s3,"s3://crabby-images/7c195/7c195ec0ce8f4bd0be15fa58d3802cbebdbf1b37" alt="Python version"
|
3
3
|
[data:image/s3,"s3://crabby-images/8b55b/8b55be8502970b06016864be9ce046f1c0a7ec33" alt="PyPI downloads"](https://pypi.org/project/json-repair/)
|
4
|
-
|
4
|
+
[data:image/s3,"s3://crabby-images/bd5f7/bd5f772be6c859b9655a69ddd1f9967aefd77400" alt="Github Sponsors"](https://github.com/sponsors/mangiucugna)
|
5
5
|
|
6
6
|
This simple package can be used to fix an invalid json string. To know all cases in which this package will work, check out the unit test.
|
7
7
|
|
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.
|
6
|
+
version = "0.28.0"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -17,11 +17,15 @@ classifiers = [
|
|
17
17
|
"License :: OSI Approved :: MIT License",
|
18
18
|
"Operating System :: OS Independent",
|
19
19
|
]
|
20
|
-
|
21
20
|
[project.urls]
|
22
21
|
"Homepage" = "https://github.com/mangiucugna/json_repair/"
|
23
22
|
"Bug Tracker" = "https://github.com/mangiucugna/json_repair/issues"
|
23
|
+
"Live demo" = "https://mangiucugna.github.io/json_repair/"
|
24
24
|
[tool.pytest.ini_options]
|
25
25
|
pythonpath = [
|
26
26
|
"."
|
27
27
|
]
|
28
|
+
[tool.setuptools.package-data]
|
29
|
+
"pkgname" = ["py.typed"]
|
30
|
+
[tool.setuptools.packages.find]
|
31
|
+
where = ["src"]
|
@@ -51,9 +51,6 @@ class StringFileWrapper:
|
|
51
51
|
self.fd.seek(current_position)
|
52
52
|
return self.length
|
53
53
|
|
54
|
-
def __setitem__(self) -> None:
|
55
|
-
raise Exception("This is read-only!")
|
56
|
-
|
57
54
|
|
58
55
|
class LoggerConfig:
|
59
56
|
# This is a type class to simplify the declaration
|
@@ -179,21 +176,12 @@ class JSONParser:
|
|
179
176
|
|
180
177
|
# <member> starts with a <string>
|
181
178
|
key = ""
|
182
|
-
while
|
183
|
-
current_index = self.index
|
179
|
+
while self.get_char_at():
|
184
180
|
key = self.parse_string()
|
185
181
|
|
186
|
-
|
187
|
-
|
188
|
-
key = "empty_placeholder"
|
189
|
-
self.log(
|
190
|
-
"While parsing an object we found an empty key, replacing with empty_placeholder",
|
191
|
-
"info",
|
192
|
-
)
|
182
|
+
if key != "" or (key == "" and self.get_char_at() == ":"):
|
183
|
+
# If the string is empty but there is a object divider, we are done here
|
193
184
|
break
|
194
|
-
elif key == "" and self.index == current_index:
|
195
|
-
# Sometimes the string search might not move the index at all, that might lead us to an infinite loop
|
196
|
-
self.index += 1
|
197
185
|
|
198
186
|
self.skip_whitespaces_at()
|
199
187
|
|
@@ -226,13 +214,6 @@ class JSONParser:
|
|
226
214
|
# Remove trailing spaces
|
227
215
|
self.skip_whitespaces_at()
|
228
216
|
|
229
|
-
# Especially at the end of an LLM generated json you might miss the last "}"
|
230
|
-
if (self.get_char_at() or "}") != "}":
|
231
|
-
self.log(
|
232
|
-
"While parsing an object, we couldn't find the closing }, ignoring",
|
233
|
-
"info",
|
234
|
-
)
|
235
|
-
|
236
217
|
self.index += 1
|
237
218
|
return obj
|
238
219
|
|
@@ -261,13 +242,6 @@ class JSONParser:
|
|
261
242
|
while char and (char.isspace() or char == ","):
|
262
243
|
self.index += 1
|
263
244
|
char = self.get_char_at()
|
264
|
-
# If this is the right value of an object and we are closing the object, it means the array is over
|
265
|
-
if self.get_context() == "object_value" and char == "}":
|
266
|
-
self.log(
|
267
|
-
"While parsing an array inside an object, we got to the end without finding a ]. Stopped parsing",
|
268
|
-
"info",
|
269
|
-
)
|
270
|
-
break
|
271
245
|
|
272
246
|
# Especially at the end of an LLM generated json you might miss the last "]"
|
273
247
|
char = self.get_char_at()
|
@@ -275,14 +249,6 @@ class JSONParser:
|
|
275
249
|
self.log(
|
276
250
|
"While parsing an array we missed the closing ], adding it back", "info"
|
277
251
|
)
|
278
|
-
# Sometimes when you fix a missing "]" you'll have a trailing "," there that makes the JSON invalid
|
279
|
-
if char == ",":
|
280
|
-
# Remove trailing "," before adding the "]"
|
281
|
-
self.log(
|
282
|
-
"While parsing an array, found a trailing , before adding ]",
|
283
|
-
"info",
|
284
|
-
)
|
285
|
-
|
286
252
|
self.index -= 1
|
287
253
|
|
288
254
|
self.index += 1
|
@@ -337,7 +303,11 @@ class JSONParser:
|
|
337
303
|
|
338
304
|
# There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
|
339
305
|
if self.get_char_at() == lstring_delimiter:
|
340
|
-
#
|
306
|
+
# If it's an empty key, this was easy
|
307
|
+
if self.get_context() == "object_key" and self.get_char_at(1) == ":":
|
308
|
+
self.index += 1
|
309
|
+
return ""
|
310
|
+
# Find the next delimiter
|
341
311
|
i = 1
|
342
312
|
next_c = self.get_char_at(i)
|
343
313
|
while next_c and next_c != rstring_delimiter:
|
@@ -429,6 +399,7 @@ class JSONParser:
|
|
429
399
|
"While parsing a string, we found a doubled quote, ignoring it",
|
430
400
|
"info",
|
431
401
|
)
|
402
|
+
self.index += 1
|
432
403
|
elif missing_quotes and self.get_context() == "object_value":
|
433
404
|
# In case of missing starting quote I need to check if the delimeter is the end or the beginning of a key
|
434
405
|
i = 1
|
@@ -467,7 +438,7 @@ class JSONParser:
|
|
467
438
|
]:
|
468
439
|
# This is a bit of a weird workaround, essentially in object_value context we don't always break on commas
|
469
440
|
# This is because the routine after will make sure to correct any bad guess and this solves a corner case
|
470
|
-
if next_c.isalpha():
|
441
|
+
if check_comma_in_object_value and next_c.isalpha():
|
471
442
|
check_comma_in_object_value = False
|
472
443
|
# If we are in an object context, let's check for the right delimiters
|
473
444
|
if (
|
@@ -575,22 +546,18 @@ class JSONParser:
|
|
575
546
|
# The number ends with a non valid character for a number/currency, rolling back one
|
576
547
|
number_str = number_str[:-1]
|
577
548
|
self.index -= 1
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
return number_str
|
591
|
-
else:
|
592
|
-
# If nothing works, let's skip and keep parsing
|
593
|
-
return self.parse_json()
|
549
|
+
try:
|
550
|
+
if "," in number_str:
|
551
|
+
return str(number_str)
|
552
|
+
if "." in number_str or "e" in number_str or "E" in number_str:
|
553
|
+
return float(number_str)
|
554
|
+
elif number_str == "-":
|
555
|
+
# If there is a stray "-" this will throw an exception, throw away this character
|
556
|
+
return self.parse_json()
|
557
|
+
else:
|
558
|
+
return int(number_str)
|
559
|
+
except ValueError:
|
560
|
+
return number_str
|
594
561
|
|
595
562
|
def parse_boolean_or_null(self) -> Union[bool, str, None]:
|
596
563
|
# <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
|
@@ -644,16 +611,10 @@ class JSONParser:
|
|
644
611
|
self.context.append(value)
|
645
612
|
|
646
613
|
def reset_context(self) -> None:
|
647
|
-
|
648
|
-
self.context.pop()
|
649
|
-
except Exception:
|
650
|
-
return
|
614
|
+
self.context.pop()
|
651
615
|
|
652
616
|
def get_context(self) -> str:
|
653
|
-
|
654
|
-
return self.context[-1]
|
655
|
-
except Exception:
|
656
|
-
return ""
|
617
|
+
return self.context[-1]
|
657
618
|
|
658
619
|
def log(self, text: str, level: str) -> None:
|
659
620
|
if level == self.logger.log_level:
|
File without changes
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.28.0
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -27,6 +27,7 @@ License: MIT License
|
|
27
27
|
|
28
28
|
Project-URL: Homepage, https://github.com/mangiucugna/json_repair/
|
29
29
|
Project-URL: Bug Tracker, https://github.com/mangiucugna/json_repair/issues
|
30
|
+
Project-URL: Live demo, https://mangiucugna.github.io/json_repair/
|
30
31
|
Keywords: JSON,REPAIR,LLM,PARSER
|
31
32
|
Classifier: Programming Language :: Python :: 3
|
32
33
|
Classifier: License :: OSI Approved :: MIT License
|
@@ -38,7 +39,7 @@ License-File: LICENSE
|
|
38
39
|
[data:image/s3,"s3://crabby-images/24cc2/24cc27e92d35c47c6c4cb73f8131696d29065dd2" alt="PyPI"](https://pypi.org/project/json-repair/)
|
39
40
|
data:image/s3,"s3://crabby-images/7c195/7c195ec0ce8f4bd0be15fa58d3802cbebdbf1b37" alt="Python version"
|
40
41
|
[data:image/s3,"s3://crabby-images/8b55b/8b55be8502970b06016864be9ce046f1c0a7ec33" alt="PyPI downloads"](https://pypi.org/project/json-repair/)
|
41
|
-
|
42
|
+
[data:image/s3,"s3://crabby-images/bd5f7/bd5f772be6c859b9655a69ddd1f9967aefd77400" alt="Github Sponsors"](https://github.com/sponsors/mangiucugna)
|
42
43
|
|
43
44
|
This simple package can be used to fix an invalid json string. To know all cases in which this package will work, check out the unit test.
|
44
45
|
|
@@ -3,9 +3,11 @@ README.md
|
|
3
3
|
pyproject.toml
|
4
4
|
src/json_repair/__init__.py
|
5
5
|
src/json_repair/json_repair.py
|
6
|
+
src/json_repair/py.typed
|
6
7
|
src/json_repair.egg-info/PKG-INFO
|
7
8
|
src/json_repair.egg-info/SOURCES.txt
|
8
9
|
src/json_repair.egg-info/dependency_links.txt
|
9
10
|
src/json_repair.egg-info/top_level.txt
|
11
|
+
tests/test_coverage.py
|
10
12
|
tests/test_json_repair.py
|
11
13
|
tests/test_performance.py
|
@@ -0,0 +1,18 @@
|
|
1
|
+
import coverage
|
2
|
+
import sys
|
3
|
+
|
4
|
+
COVERAGE_THRESHOLD = 100
|
5
|
+
|
6
|
+
cov = coverage.Coverage()
|
7
|
+
cov.start()
|
8
|
+
|
9
|
+
import pytest
|
10
|
+
retcode = pytest.main(["./tests/test_json_repair.py", "--cov-config=.coveragerc"])
|
11
|
+
|
12
|
+
cov.stop()
|
13
|
+
cov.save()
|
14
|
+
coverage_percent = cov.report(show_missing=True)
|
15
|
+
|
16
|
+
if coverage_percent < COVERAGE_THRESHOLD:
|
17
|
+
print(f"ERROR: Coverage {coverage_percent:.2f}% is below the threshold of {COVERAGE_THRESHOLD}%")
|
18
|
+
sys.exit(1) # This will prevent the commit/push
|
@@ -94,10 +94,12 @@ def test_missing_and_mixed_quotes():
|
|
94
94
|
repair_json('{"name": "John", "age": 30, "city": "New')
|
95
95
|
== '{"name": "John", "age": 30, "city": "New"}'
|
96
96
|
)
|
97
|
-
assert repair_json('[{"key": "value", COMMENT "notes": "lorem "ipsum", sic."}]') == '[{"key": "value", "notes": "lorem \\"ipsum\\", sic."}]'
|
97
|
+
assert repair_json('[{"key": "value", COMMENT "notes": "lorem "ipsum", sic." }]') == '[{"key": "value", "notes": "lorem \\"ipsum\\", sic."}]'
|
98
98
|
assert repair_json('{"key": ""value"}') == '{"key": "value"}'
|
99
99
|
assert repair_json('{"key": "value", 5: "value"}') == '{"key": "value", "5": "value"}'
|
100
100
|
assert repair_json('{"foo": "\\"bar\\""') == '{"foo": "\\"bar\\""}'
|
101
|
+
assert repair_json('{"" key":"val"') == '{" key": "val"}'
|
102
|
+
assert repair_json('{"key": value "key2" : "value2" ') == '{"key": "value", "key2": "value2"}'
|
101
103
|
|
102
104
|
def test_array_edge_cases():
|
103
105
|
assert repair_json("[1, 2, 3,") == "[1, 2, 3]"
|
@@ -106,16 +108,9 @@ def test_array_edge_cases():
|
|
106
108
|
assert repair_json("[1, 2, '...', 3]") == '[1, 2, "...", 3]'
|
107
109
|
assert repair_json("[true, false, null, ...]") == '[true, false, null]'
|
108
110
|
assert repair_json('["a" "b" "c" 1') == '["a", "b", "c", 1]'
|
109
|
-
assert (
|
110
|
-
|
111
|
-
== '{"employees": ["John", "Anna"]}'
|
112
|
-
)
|
113
|
-
assert (
|
114
|
-
repair_json('{"employees":["John", "Anna", "Peter')
|
115
|
-
== '{"employees": ["John", "Anna", "Peter"]}'
|
116
|
-
)
|
111
|
+
assert repair_json('{"employees":["John", "Anna",') == '{"employees": ["John", "Anna"]}'
|
112
|
+
assert repair_json('{"employees":["John", "Anna", "Peter') == '{"employees": ["John", "Anna", "Peter"]}'
|
117
113
|
assert repair_json('{"key1": {"key2": [1, 2, 3') == '{"key1": {"key2": [1, 2, 3]}}'
|
118
|
-
|
119
114
|
|
120
115
|
def test_escaping():
|
121
116
|
assert repair_json("'\"'") == '""'
|
@@ -125,20 +120,12 @@ def test_escaping():
|
|
125
120
|
assert repair_json('{"key\t_": "value"}') == '{"key\\t_": "value"}'
|
126
121
|
|
127
122
|
|
128
|
-
def test_object_edge_cases():
|
129
|
-
assert {
|
130
|
-
|
131
|
-
}
|
132
|
-
assert {
|
133
|
-
repair_json('{"value_1": true, COMMENT "value_2": "data"}') == '{"value_1": "value_2", "": "data"}'
|
134
|
-
}
|
123
|
+
def test_object_edge_cases():
|
124
|
+
assert repair_json('{ ') == '{}'
|
125
|
+
assert repair_json('{"": "value"') == '{"": "value"}'
|
126
|
+
assert repair_json('{"value_1": true, COMMENT "value_2": "data"}') == '{"value_1": true, "value_2": "data"}'
|
135
127
|
assert repair_json('{"value_1": true, SHOULD_NOT_EXIST "value_2": "data" AAAA }') == '{"value_1": true, "value_2": "data"}'
|
136
|
-
assert {
|
137
|
-
repair_json('{"" : true, "key2": "value2"}') == '{" ": true, "key2": "value_2"}'
|
138
|
-
}
|
139
|
-
assert {
|
140
|
-
repair_json('{"": true, "key2": "value2"}') == '{"empty_placeholder": true, "key2": "value_2"}'
|
141
|
-
}
|
128
|
+
assert repair_json('{"" : true, "key2": "value2"}') == '{"": true, "key2": "value2"}'
|
142
129
|
assert repair_json('{""answer"":[{""traits"":''Female aged 60+'',""answer1"":""5""}]}') == '{"answer": [{"traits": "Female aged 60+", "answer1": "5"}]}'
|
143
130
|
assert repair_json('{ "words": abcdef", "numbers": 12345", "words2": ghijkl" }') == '{"words": "abcdef", "numbers": 12345, "words2": "ghijkl"}'
|
144
131
|
assert repair_json('''{"number": 1,"reason": "According...""ans": "YES"}''') == '{"number": 1, "reason": "According...", "ans": "YES"}'
|
@@ -147,6 +134,8 @@ def test_object_edge_cases():
|
|
147
134
|
assert repair_json('{"key": "Lorem "ipsum" s,"}') == '{"key": "Lorem \\"ipsum\\" s,"}'
|
148
135
|
assert repair_json('{"lorem": ipsum, sic, datum.",}') == '{"lorem": "ipsum, sic, datum."}'
|
149
136
|
assert repair_json('{"lorem": sic tamet. "ipsum": sic tamet, quick brown fox. "sic": ipsum}') == '{"lorem": "sic tamet.", "ipsum": "sic tamet", "sic": "ipsum"}'
|
137
|
+
assert repair_json('{"key":value, " key2":"value2" }') == '{"key": "value", " key2": "value2"}'
|
138
|
+
assert repair_json('{"key":value "key2":"value2" }') == '{"key": "value", "key2": "value2"}'
|
150
139
|
|
151
140
|
def test_number_edge_cases():
|
152
141
|
assert repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'
|
@@ -159,6 +148,7 @@ def test_number_edge_cases():
|
|
159
148
|
assert repair_json('{"key": 1/3, "foo": "bar"}') == '{"key": "1/3", "foo": "bar"}'
|
160
149
|
assert repair_json('{"key": 10-20}') == '{"key": "10-20"}'
|
161
150
|
assert repair_json('{"key": 1.1.1}') == '{"key": "1.1.1"}'
|
151
|
+
assert repair_json('[- ') == '[]'
|
162
152
|
|
163
153
|
def test_markdown():
|
164
154
|
assert repair_json('{ "content": "[LINK]("https://google.com")" }') == '{"content": "[LINK](\\"https://google.com\\")"}'
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|