json-repair 0.27.0__tar.gz → 0.27.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {json_repair-0.27.0/src/json_repair.egg-info → json_repair-0.27.2}/PKG-INFO +2 -2
- {json_repair-0.27.0 → json_repair-0.27.2}/README.md +1 -1
- {json_repair-0.27.0 → json_repair-0.27.2}/pyproject.toml +1 -1
- {json_repair-0.27.0 → json_repair-0.27.2}/src/json_repair/json_repair.py +22 -51
- {json_repair-0.27.0 → json_repair-0.27.2/src/json_repair.egg-info}/PKG-INFO +2 -2
- {json_repair-0.27.0 → json_repair-0.27.2}/tests/test_json_repair.py +6 -13
- {json_repair-0.27.0 → json_repair-0.27.2}/LICENSE +0 -0
- {json_repair-0.27.0 → json_repair-0.27.2}/setup.cfg +0 -0
- {json_repair-0.27.0 → json_repair-0.27.2}/src/json_repair/__init__.py +0 -0
- {json_repair-0.27.0 → json_repair-0.27.2}/src/json_repair.egg-info/SOURCES.txt +0 -0
- {json_repair-0.27.0 → json_repair-0.27.2}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.27.0 → json_repair-0.27.2}/src/json_repair.egg-info/top_level.txt +0 -0
- {json_repair-0.27.0 → json_repair-0.27.2}/tests/test_performance.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.27.
|
3
|
+
Version: 0.27.2
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -38,7 +38,7 @@ License-File: LICENSE
|
|
38
38
|
[data:image/s3,"s3://crabby-images/24cc2/24cc27e92d35c47c6c4cb73f8131696d29065dd2" alt="PyPI"](https://pypi.org/project/json-repair/)
|
39
39
|
data:image/s3,"s3://crabby-images/7c195/7c195ec0ce8f4bd0be15fa58d3802cbebdbf1b37" alt="Python version"
|
40
40
|
[data:image/s3,"s3://crabby-images/8b55b/8b55be8502970b06016864be9ce046f1c0a7ec33" alt="PyPI downloads"](https://pypi.org/project/json-repair/)
|
41
|
-
|
41
|
+
[data:image/s3,"s3://crabby-images/bd5f7/bd5f772be6c859b9655a69ddd1f9967aefd77400" alt="Github Sponsors"](https://github.com/sponsors/mangiucugna)
|
42
42
|
|
43
43
|
This simple package can be used to fix an invalid json string. To know all cases in which this package will work, check out the unit test.
|
44
44
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
[data:image/s3,"s3://crabby-images/24cc2/24cc27e92d35c47c6c4cb73f8131696d29065dd2" alt="PyPI"](https://pypi.org/project/json-repair/)
|
2
2
|
data:image/s3,"s3://crabby-images/7c195/7c195ec0ce8f4bd0be15fa58d3802cbebdbf1b37" alt="Python version"
|
3
3
|
[data:image/s3,"s3://crabby-images/8b55b/8b55be8502970b06016864be9ce046f1c0a7ec33" alt="PyPI downloads"](https://pypi.org/project/json-repair/)
|
4
|
-
|
4
|
+
[data:image/s3,"s3://crabby-images/bd5f7/bd5f772be6c859b9655a69ddd1f9967aefd77400" alt="Github Sponsors"](https://github.com/sponsors/mangiucugna)
|
5
5
|
|
6
6
|
This simple package can be used to fix an invalid json string. To know all cases in which this package will work, check out the unit test.
|
7
7
|
|
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.27.
|
6
|
+
version = "0.27.2"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -179,21 +179,12 @@ class JSONParser:
|
|
179
179
|
|
180
180
|
# <member> starts with a <string>
|
181
181
|
key = ""
|
182
|
-
while
|
183
|
-
current_index = self.index
|
182
|
+
while self.get_char_at():
|
184
183
|
key = self.parse_string()
|
185
184
|
|
186
|
-
|
187
|
-
|
188
|
-
key = "empty_placeholder"
|
189
|
-
self.log(
|
190
|
-
"While parsing an object we found an empty key, replacing with empty_placeholder",
|
191
|
-
"info",
|
192
|
-
)
|
185
|
+
if key != "" or (key == "" and self.get_char_at() == ":"):
|
186
|
+
# If the string is empty but there is a object divider, we are done here
|
193
187
|
break
|
194
|
-
elif key == "" and self.index == current_index:
|
195
|
-
# Sometimes the string search might not move the index at all, that might lead us to an infinite loop
|
196
|
-
self.index += 1
|
197
188
|
|
198
189
|
self.skip_whitespaces_at()
|
199
190
|
|
@@ -226,13 +217,6 @@ class JSONParser:
|
|
226
217
|
# Remove trailing spaces
|
227
218
|
self.skip_whitespaces_at()
|
228
219
|
|
229
|
-
# Especially at the end of an LLM generated json you might miss the last "}"
|
230
|
-
if (self.get_char_at() or "}") != "}":
|
231
|
-
self.log(
|
232
|
-
"While parsing an object, we couldn't find the closing }, ignoring",
|
233
|
-
"info",
|
234
|
-
)
|
235
|
-
|
236
220
|
self.index += 1
|
237
221
|
return obj
|
238
222
|
|
@@ -261,13 +245,6 @@ class JSONParser:
|
|
261
245
|
while char and (char.isspace() or char == ","):
|
262
246
|
self.index += 1
|
263
247
|
char = self.get_char_at()
|
264
|
-
# If this is the right value of an object and we are closing the object, it means the array is over
|
265
|
-
if self.get_context() == "object_value" and char == "}":
|
266
|
-
self.log(
|
267
|
-
"While parsing an array inside an object, we got to the end without finding a ]. Stopped parsing",
|
268
|
-
"info",
|
269
|
-
)
|
270
|
-
break
|
271
248
|
|
272
249
|
# Especially at the end of an LLM generated json you might miss the last "]"
|
273
250
|
char = self.get_char_at()
|
@@ -275,14 +252,6 @@ class JSONParser:
|
|
275
252
|
self.log(
|
276
253
|
"While parsing an array we missed the closing ], adding it back", "info"
|
277
254
|
)
|
278
|
-
# Sometimes when you fix a missing "]" you'll have a trailing "," there that makes the JSON invalid
|
279
|
-
if char == ",":
|
280
|
-
# Remove trailing "," before adding the "]"
|
281
|
-
self.log(
|
282
|
-
"While parsing an array, found a trailing , before adding ]",
|
283
|
-
"info",
|
284
|
-
)
|
285
|
-
|
286
255
|
self.index -= 1
|
287
256
|
|
288
257
|
self.index += 1
|
@@ -337,6 +306,11 @@ class JSONParser:
|
|
337
306
|
|
338
307
|
# There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
|
339
308
|
if self.get_char_at() == lstring_delimiter:
|
309
|
+
# If it's an empty key, this was easy
|
310
|
+
if self.get_context() == "object_key" and self.get_char_at(1) == ":":
|
311
|
+
self.index += 1
|
312
|
+
return ""
|
313
|
+
|
340
314
|
# This is a valid exception only if it's closed by a double delimiter again
|
341
315
|
i = 1
|
342
316
|
next_c = self.get_char_at(i)
|
@@ -412,7 +386,7 @@ class JSONParser:
|
|
412
386
|
string_acc += char
|
413
387
|
self.index += 1
|
414
388
|
char = self.get_char_at()
|
415
|
-
if len(string_acc) >
|
389
|
+
if len(string_acc) > 0 and string_acc[-1] == "\\":
|
416
390
|
# This is a special case, if people use real strings this might happen
|
417
391
|
self.log("Found a stray escape sequence, normalizing it", "info")
|
418
392
|
string_acc = string_acc[:-1]
|
@@ -429,6 +403,7 @@ class JSONParser:
|
|
429
403
|
"While parsing a string, we found a doubled quote, ignoring it",
|
430
404
|
"info",
|
431
405
|
)
|
406
|
+
self.index += 1
|
432
407
|
elif missing_quotes and self.get_context() == "object_value":
|
433
408
|
# In case of missing starting quote I need to check if the delimeter is the end or the beginning of a key
|
434
409
|
i = 1
|
@@ -575,22 +550,18 @@ class JSONParser:
|
|
575
550
|
# The number ends with a non valid character for a number/currency, rolling back one
|
576
551
|
number_str = number_str[:-1]
|
577
552
|
self.index -= 1
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
return number_str
|
591
|
-
else:
|
592
|
-
# If nothing works, let's skip and keep parsing
|
593
|
-
return self.parse_json()
|
553
|
+
try:
|
554
|
+
if "," in number_str:
|
555
|
+
return str(number_str)
|
556
|
+
if "." in number_str or "e" in number_str or "E" in number_str:
|
557
|
+
return float(number_str)
|
558
|
+
elif number_str == "-":
|
559
|
+
# If there is a stray "-" this will throw an exception, throw away this character
|
560
|
+
return self.parse_json()
|
561
|
+
else:
|
562
|
+
return int(number_str)
|
563
|
+
except ValueError:
|
564
|
+
return number_str
|
594
565
|
|
595
566
|
def parse_boolean_or_null(self) -> Union[bool, str, None]:
|
596
567
|
# <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.27.
|
3
|
+
Version: 0.27.2
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -38,7 +38,7 @@ License-File: LICENSE
|
|
38
38
|
[data:image/s3,"s3://crabby-images/24cc2/24cc27e92d35c47c6c4cb73f8131696d29065dd2" alt="PyPI"](https://pypi.org/project/json-repair/)
|
39
39
|
data:image/s3,"s3://crabby-images/7c195/7c195ec0ce8f4bd0be15fa58d3802cbebdbf1b37" alt="Python version"
|
40
40
|
[data:image/s3,"s3://crabby-images/8b55b/8b55be8502970b06016864be9ce046f1c0a7ec33" alt="PyPI downloads"](https://pypi.org/project/json-repair/)
|
41
|
-
|
41
|
+
[data:image/s3,"s3://crabby-images/bd5f7/bd5f772be6c859b9655a69ddd1f9967aefd77400" alt="Github Sponsors"](https://github.com/sponsors/mangiucugna)
|
42
42
|
|
43
43
|
This simple package can be used to fix an invalid json string. To know all cases in which this package will work, check out the unit test.
|
44
44
|
|
@@ -97,6 +97,7 @@ def test_missing_and_mixed_quotes():
|
|
97
97
|
assert repair_json('[{"key": "value", COMMENT "notes": "lorem "ipsum", sic."}]') == '[{"key": "value", "notes": "lorem \\"ipsum\\", sic."}]'
|
98
98
|
assert repair_json('{"key": ""value"}') == '{"key": "value"}'
|
99
99
|
assert repair_json('{"key": "value", 5: "value"}') == '{"key": "value", "5": "value"}'
|
100
|
+
assert repair_json('{"foo": "\\"bar\\""') == '{"foo": "\\"bar\\""}'
|
100
101
|
|
101
102
|
def test_array_edge_cases():
|
102
103
|
assert repair_json("[1, 2, 3,") == "[1, 2, 3]"
|
@@ -124,20 +125,11 @@ def test_escaping():
|
|
124
125
|
assert repair_json('{"key\t_": "value"}') == '{"key\\t_": "value"}'
|
125
126
|
|
126
127
|
|
127
|
-
def test_object_edge_cases():
|
128
|
-
assert {
|
129
|
-
|
130
|
-
}
|
131
|
-
assert {
|
132
|
-
repair_json('{"value_1": true, COMMENT "value_2": "data"}') == '{"value_1": "value_2", "": "data"}'
|
133
|
-
}
|
128
|
+
def test_object_edge_cases():
|
129
|
+
assert repair_json('{ ') == '{}'
|
130
|
+
assert repair_json('{"value_1": true, COMMENT "value_2": "data"}') == '{"value_1": true, "value_2": "data"}'
|
134
131
|
assert repair_json('{"value_1": true, SHOULD_NOT_EXIST "value_2": "data" AAAA }') == '{"value_1": true, "value_2": "data"}'
|
135
|
-
assert {
|
136
|
-
repair_json('{"" : true, "key2": "value2"}') == '{" ": true, "key2": "value_2"}'
|
137
|
-
}
|
138
|
-
assert {
|
139
|
-
repair_json('{"": true, "key2": "value2"}') == '{"empty_placeholder": true, "key2": "value_2"}'
|
140
|
-
}
|
132
|
+
assert repair_json('{"" : true, "key2": "value2"}') == '{"": true, "key2": "value2"}'
|
141
133
|
assert repair_json('{""answer"":[{""traits"":''Female aged 60+'',""answer1"":""5""}]}') == '{"answer": [{"traits": "Female aged 60+", "answer1": "5"}]}'
|
142
134
|
assert repair_json('{ "words": abcdef", "numbers": 12345", "words2": ghijkl" }') == '{"words": "abcdef", "numbers": 12345, "words2": "ghijkl"}'
|
143
135
|
assert repair_json('''{"number": 1,"reason": "According...""ans": "YES"}''') == '{"number": 1, "reason": "According...", "ans": "YES"}'
|
@@ -158,6 +150,7 @@ def test_number_edge_cases():
|
|
158
150
|
assert repair_json('{"key": 1/3, "foo": "bar"}') == '{"key": "1/3", "foo": "bar"}'
|
159
151
|
assert repair_json('{"key": 10-20}') == '{"key": "10-20"}'
|
160
152
|
assert repair_json('{"key": 1.1.1}') == '{"key": "1.1.1"}'
|
153
|
+
assert repair_json('[- ') == '[]'
|
161
154
|
|
162
155
|
def test_markdown():
|
163
156
|
assert repair_json('{ "content": "[LINK]("https://google.com")" }') == '{"content": "[LINK](\\"https://google.com\\")"}'
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|