json-repair 0.25.3__tar.gz → 0.27.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {json_repair-0.25.3/src/json_repair.egg-info → json_repair-0.27.0}/PKG-INFO +3 -3
- {json_repair-0.25.3 → json_repair-0.27.0}/README.md +1 -1
- {json_repair-0.25.3 → json_repair-0.27.0}/pyproject.toml +2 -2
- {json_repair-0.25.3 → json_repair-0.27.0}/src/json_repair/json_repair.py +17 -7
- {json_repair-0.25.3 → json_repair-0.27.0/src/json_repair.egg-info}/PKG-INFO +3 -3
- {json_repair-0.25.3 → json_repair-0.27.0}/tests/test_json_repair.py +7 -3
- {json_repair-0.25.3 → json_repair-0.27.0}/LICENSE +0 -0
- {json_repair-0.25.3 → json_repair-0.27.0}/setup.cfg +0 -0
- {json_repair-0.25.3 → json_repair-0.27.0}/src/json_repair/__init__.py +0 -0
- {json_repair-0.25.3 → json_repair-0.27.0}/src/json_repair.egg-info/SOURCES.txt +0 -0
- {json_repair-0.25.3 → json_repair-0.27.0}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.25.3 → json_repair-0.27.0}/src/json_repair.egg-info/top_level.txt +0 -0
- {json_repair-0.25.3 → json_repair-0.27.0}/tests/test_performance.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.27.0
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -31,12 +31,12 @@ Keywords: JSON,REPAIR,LLM,PARSER
|
|
31
31
|
Classifier: Programming Language :: Python :: 3
|
32
32
|
Classifier: License :: OSI Approved :: MIT License
|
33
33
|
Classifier: Operating System :: OS Independent
|
34
|
-
Requires-Python: >=3.
|
34
|
+
Requires-Python: >=3.8
|
35
35
|
Description-Content-Type: text/markdown
|
36
36
|
License-File: LICENSE
|
37
37
|
|
38
38
|
[data:image/s3,"s3://crabby-images/24cc2/24cc27e92d35c47c6c4cb73f8131696d29065dd2" alt="PyPI"](https://pypi.org/project/json-repair/)
|
39
|
-
data:image/s3,"s3://crabby-images/6e94e/6e94e525a1db169e4a3a48dc0fc237713eb1772e" alt="Python version"
|
40
40
|
[data:image/s3,"s3://crabby-images/8b55b/8b55be8502970b06016864be9ce046f1c0a7ec33" alt="PyPI downloads"](https://pypi.org/project/json-repair/)
|
41
41
|
|
42
42
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
[data:image/s3,"s3://crabby-images/24cc2/24cc27e92d35c47c6c4cb73f8131696d29065dd2" alt="PyPI"](https://pypi.org/project/json-repair/)
|
2
|
-
data:image/s3,"s3://crabby-images/6c057/6c057c73a0e33a14d477bfc3426d2a8612dea3a1" alt="Python version"
|
3
3
|
[data:image/s3,"s3://crabby-images/8b55b/8b55be8502970b06016864be9ce046f1c0a7ec33" alt="PyPI downloads"](https://pypi.org/project/json-repair/)
|
4
4
|
|
5
5
|
|
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.
|
6
|
+
version = "0.27.0"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -11,7 +11,7 @@ authors = [
|
|
11
11
|
description = "A package to repair broken json strings"
|
12
12
|
keywords = ["JSON", "REPAIR", "LLM", "PARSER"]
|
13
13
|
readme = "README.md"
|
14
|
-
requires-python = ">=3.
|
14
|
+
requires-python = ">=3.8"
|
15
15
|
classifiers = [
|
16
16
|
"Programming Language :: Python :: 3",
|
17
17
|
"License :: OSI Approved :: MIT License",
|
@@ -301,7 +301,7 @@ class JSONParser:
|
|
301
301
|
|
302
302
|
char = self.get_char_at()
|
303
303
|
# A valid string can only start with a valid quote or, in our case, with a literal
|
304
|
-
while char and char not in ['"', "'", "“"] and not char.
|
304
|
+
while char and char not in ['"', "'", "“"] and not char.isalnum():
|
305
305
|
self.index += 1
|
306
306
|
char = self.get_char_at()
|
307
307
|
|
@@ -315,7 +315,7 @@ class JSONParser:
|
|
315
315
|
elif char == "“":
|
316
316
|
lstring_delimiter = "“"
|
317
317
|
rstring_delimiter = "”"
|
318
|
-
elif char.
|
318
|
+
elif char.isalnum():
|
319
319
|
# This could be a <boolean> and not a string. Because (T)rue or (F)alse or (N)ull are valid
|
320
320
|
# But remember, object keys are only of type string
|
321
321
|
if char.lower() in ["t", "f", "n"] and self.get_context() != "object_key":
|
@@ -675,6 +675,7 @@ def repair_json(
|
|
675
675
|
skip_json_loads: Optional[bool] = False,
|
676
676
|
logging: Optional[bool] = False,
|
677
677
|
json_fd: Optional[TextIO] = None,
|
678
|
+
ensure_ascii: Optional[bool] = True,
|
678
679
|
) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
|
679
680
|
"""
|
680
681
|
Given a json formatted string, it will try to decode it and, if it fails, it will try to fix it.
|
@@ -697,11 +698,13 @@ def repair_json(
|
|
697
698
|
# It's useful to return the actual object instead of the json string, it allows this lib to be a replacement of the json library
|
698
699
|
if return_objects or logging:
|
699
700
|
return parsed_json
|
700
|
-
return json.dumps(parsed_json)
|
701
|
+
return json.dumps(parsed_json, ensure_ascii=ensure_ascii)
|
701
702
|
|
702
703
|
|
703
704
|
def loads(
|
704
|
-
json_str: str,
|
705
|
+
json_str: str,
|
706
|
+
skip_json_loads: Optional[bool] = False,
|
707
|
+
logging: Optional[bool] = False,
|
705
708
|
) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
|
706
709
|
"""
|
707
710
|
This function works like `json.loads()` except that it will fix your JSON in the process.
|
@@ -716,17 +719,24 @@ def loads(
|
|
716
719
|
|
717
720
|
|
718
721
|
def load(
|
719
|
-
fd: TextIO, skip_json_loads: bool = False, logging: bool = False
|
722
|
+
fd: TextIO, skip_json_loads: Optional[bool] = False, logging: Optional[bool] = False
|
720
723
|
) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
|
721
724
|
"""
|
722
725
|
This function works like `json.load()` except that it will fix your JSON in the process.
|
723
726
|
It is a wrapper around the `repair_json()` function with `json_fd=fd` and `return_objects=True`.
|
724
727
|
"""
|
725
|
-
return repair_json(
|
728
|
+
return repair_json(
|
729
|
+
json_fd=fd,
|
730
|
+
return_objects=True,
|
731
|
+
skip_json_loads=skip_json_loads,
|
732
|
+
logging=logging,
|
733
|
+
)
|
726
734
|
|
727
735
|
|
728
736
|
def from_file(
|
729
|
-
filename: str,
|
737
|
+
filename: str,
|
738
|
+
skip_json_loads: Optional[bool] = False,
|
739
|
+
logging: Optional[bool] = False,
|
730
740
|
) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
|
731
741
|
"""
|
732
742
|
This function is a wrapper around `load()` so you can pass the filename as string
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.27.0
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -31,12 +31,12 @@ Keywords: JSON,REPAIR,LLM,PARSER
|
|
31
31
|
Classifier: Programming Language :: Python :: 3
|
32
32
|
Classifier: License :: OSI Approved :: MIT License
|
33
33
|
Classifier: Operating System :: OS Independent
|
34
|
-
Requires-Python: >=3.
|
34
|
+
Requires-Python: >=3.8
|
35
35
|
Description-Content-Type: text/markdown
|
36
36
|
License-File: LICENSE
|
37
37
|
|
38
38
|
[data:image/s3,"s3://crabby-images/24cc2/24cc27e92d35c47c6c4cb73f8131696d29065dd2" alt="PyPI"](https://pypi.org/project/json-repair/)
|
39
|
-
data:image/s3,"s3://crabby-images/6e94e/6e94e525a1db169e4a3a48dc0fc237713eb1772e" alt="Python version"
|
40
40
|
[data:image/s3,"s3://crabby-images/8b55b/8b55be8502970b06016864be9ce046f1c0a7ec33" alt="PyPI downloads"](https://pypi.org/project/json-repair/)
|
41
41
|
|
42
42
|
|
@@ -96,6 +96,7 @@ def test_missing_and_mixed_quotes():
|
|
96
96
|
)
|
97
97
|
assert repair_json('[{"key": "value", COMMENT "notes": "lorem "ipsum", sic."}]') == '[{"key": "value", "notes": "lorem \\"ipsum\\", sic."}]'
|
98
98
|
assert repair_json('{"key": ""value"}') == '{"key": "value"}'
|
99
|
+
assert repair_json('{"key": "value", 5: "value"}') == '{"key": "value", "5": "value"}'
|
99
100
|
|
100
101
|
def test_array_edge_cases():
|
101
102
|
assert repair_json("[1, 2, 3,") == "[1, 2, 3]"
|
@@ -237,7 +238,7 @@ def test_repair_json_from_file():
|
|
237
238
|
import pathlib
|
238
239
|
path = pathlib.Path(__file__).parent.resolve()
|
239
240
|
|
240
|
-
assert
|
241
|
+
assert from_file(os.path.join(path,"invalid.json")) == [{"_id": "655b66256574f09bdae8abe8", "index": 0, "guid": "31082ae3-b0f3-4406-90f4-cc450bd4379d", "isActive": False, "balance": "$2,562.78", "picture": "http://placehold.it/32x32", "age": 32, "eyeColor": "brown", "name": "Glover Rivas", "gender": "male", "company": "EMPIRICA", "email": "gloverrivas@empirica.com", "phone": "+1 (842) 507-3063", "address": "536 Montague Terrace, Jenkinsville, Kentucky, 2235", "about": "Mollit consectetur excepteur voluptate tempor dolore ullamco enim irure ullamco non enim officia. Voluptate occaecat proident laboris ea Lorem cupidatat reprehenderit nisi nisi aliqua. Amet nulla ipsum deserunt excepteur amet ad aute aute ex. Et enim minim sit veniam est quis dolor nisi sunt quis eiusmod in. Amet eiusmod cillum sunt occaecat dolor laboris voluptate in eiusmod irure aliqua duis.", "registered": "2023-11-18T09:32:36 -01:00", "latitude": 36.26102, "longitude": -91.304608, "tags": ["non", "tempor", "do", "ullamco", "dolore", "sunt", "ipsum"], "friends": [{"id": 0, "name": "Cara Shepherd"}, {"id": 1, "name": "Mason Farley"}, {"id": 2, "name": "Harriet Cochran"}], "greeting": "Hello, Glover Rivas! You have 7 unread messages.", "favoriteFruit": "strawberry"}, {"_id": "655b662585364bc57278bb6f", "index": 1, "guid": "0dea7a3a-f812-4dde-b78d-7a9b58e5da05", "isActive": True, "balance": "$1,359.48", "picture": "http://placehold.it/32x32", "age": 38, "eyeColor": "brown", "name": "Brandi Moreno", "gender": "female", "company": "MARQET", "email": "brandimoreno@marqet.com", "phone": "+1 (850) 434-2077", "address": "537 Doone Court, Waiohinu, Michigan, 3215", "about": "Irure proident adipisicing do Lorem do incididunt in laborum in eiusmod eiusmod ad elit proident. Eiusmod dolor ex magna magna occaecat. Nulla deserunt velit ex exercitation et irure sunt. Cupidatat ut excepteur ea quis labore sint cupidatat incididunt amet eu consectetur cillum ipsum proident. Occaecat exercitation aute laborum dolor proident reprehenderit laborum in voluptate culpa. Exercitation nulla adipisicing culpa aute est deserunt ea nisi deserunt consequat occaecat ut et non. Incididunt ex exercitation dolor dolor anim cillum dolore.", "registered": "2015-09-03T11:47:15 -02:00", "latitude": -19.768953, "longitude": 8.948458, "tags": ["laboris", "occaecat", "laborum", "laborum", "ex", "cillum", "occaecat"], "friends": [{"id": 0, "name": "Erna Kelly"}, {"id": 1, "name": "Black Mays"}, {"id": 2, "name": "Davis Buck"}], "greeting": "Hello, Brandi Moreno! You have 1 unread messages.", "favoriteFruit": "apple"}, {"_id": "655b6625870da431bcf5e0c2", "index": 2, "guid": "b17f6e3f-c898-4334-abbf-05cf222f143b", "isActive": False, "balance": "$1,493.77", "picture": "http://placehold.it/32x32", "age": 20, "eyeColor": "brown", "name": "Moody Meadows", "gender": "male", "company": "OPTIQUE", "email": "moodymeadows@optique.com", "phone": "+1 (993) 566-3041", "address": "766 Osborn Street, Bath, Maine, 7666", "about": "Non commodo excepteur nostrud qui adipisicing aliquip dolor minim nulla culpa proident. In ad cupidatat ea mollit ex est do deserunt proident nostrud. Cillum id id eiusmod amet exercitation nostrud cillum sunt deserunt dolore deserunt eiusmod mollit. Ut ex tempor ad laboris voluptate labore id officia fugiat exercitation amet.", "registered": "2015-01-16T02:48:28 -01:00", "latitude": -25.847327, "longitude": 63.95991, "tags": ["aute", "commodo", "adipisicing", "nostrud", "duis", "mollit", "ut"], "friends": [{"id": 0, "name": "Lacey Cash"}, {"id": 1, "name": "Gabrielle Harmon"}, {"id": 2, "name": "Ellis Lambert"}], "greeting": "Hello, Moody Meadows! You have 4 unread messages.", "favoriteFruit": "strawberry"}, {"_id": "655b6625f3e1bf422220854e", "index": 3, "guid": "92229883-2bfd-4974-a08c-1b506b372e46", "isActive": False, "balance": "$2,215.34", "picture": "http://placehold.it/32x32", "age": 22, "eyeColor": "brown", "name": "Heath Nguyen", "gender": "male", "company": "BLEENDOT", "email": "heathnguyen@bleendot.com", "phone": "+1 (989) 512-2797", "address": "135 Milton Street, Graniteville, Nebraska, 276", "about": "Consequat aliquip irure Lorem cupidatat nulla magna ullamco nulla voluptate adipisicing anim consectetur tempor aliquip. Magna aliqua nulla eu tempor esse proident. Proident fugiat ad ex Lorem reprehenderit dolor aliquip labore labore aliquip. Deserunt aute enim ea minim officia anim culpa sint commodo. Cillum consectetur excepteur aliqua exercitation Lorem veniam voluptate.", "registered": "2016-07-06T01:31:07 -02:00", "latitude": -60.997048, "longitude": -102.397885, "tags": ["do", "ad", "consequat", "irure", "tempor", "elit", "minim"], "friends": [{"id": 0, "name": "Walker Hernandez"}, {"id": 1, "name": "Maria Lane"}, {"id": 2, "name": "Mcknight Barron"}], "greeting": "Hello, Heath Nguyen! You have 4 unread messages.", "favoriteFruit": "apple"}, {"_id": "655b6625519a5b5e4b6742bf", "index": 4, "guid": "c5dc685f-6d0d-4173-b4cf-f5df29a1e8ef", "isActive": True, "balance": "$1,358.90", "picture": "http://placehold.it/32x32", "age": 33, "eyeColor": "brown", "name": "Deidre Duke", "gender": "female", "company": "OATFARM", "email": "deidreduke@oatfarm.com", "phone": "+1 (875) 587-3256", "address": "487 Schaefer Street, Wattsville, West Virginia, 4506", "about": "Laboris eu nulla esse magna sit eu deserunt non est aliqua exercitation commodo. Ad occaecat qui qui laborum dolore anim Lorem. Est qui occaecat irure enim deserunt enim aliqua ex deserunt incididunt esse. Quis in minim laboris proident non mollit. Magna ea do labore commodo. Et elit esse esse occaecat officia ipsum nisi.", "registered": "2021-09-12T04:17:08 -02:00", "latitude": 68.609781, "longitude": -87.509134, "tags": ["mollit", "cupidatat", "irure", "sit", "consequat", "anim", "fugiat"], "friends": [{"id": 0, "name": "Bean Paul"}, {"id": 1, "name": "Cochran Hubbard"}, {"id": 2, "name": "Rodgers Atkinson"}], "greeting": "Hello, Deidre Duke! You have 6 unread messages.", "favoriteFruit": "apple"}, {"_id": "655b6625a19b3f7e5f82f0ea", "index": 5, "guid": "75f3c264-baa1-47a0-b21c-4edac23d9935", "isActive": True, "balance": "$3,554.36", "picture": "http://placehold.it/32x32", "age": 26, "eyeColor": "blue", "name": "Lydia Holland", "gender": "female", "company": "ESCENTA", "email": "lydiaholland@escenta.com", "phone": "+1 (927) 482-3436", "address": "554 Rockaway Parkway, Kohatk, Montana, 6316", "about": "Consectetur ea est labore commodo laborum mollit pariatur non enim. Est dolore et non laboris tempor. Ea incididunt ut adipisicing cillum labore officia tempor eiusmod commodo. Cillum fugiat ex consectetur ut nostrud anim nostrud exercitation ut duis in ea. Eu et id fugiat est duis eiusmod ullamco quis officia minim sint ea nisi in.", "registered": "2018-03-13T01:48:56 -01:00", "latitude": -88.495799, "longitude": 71.840667, "tags": ["veniam", "minim", "consequat", "consequat", "incididunt", "consequat", "elit"], "friends": [{"id": 0, "name": "Debra Massey"}, {"id": 1, "name": "Weiss Savage"}, {"id": 2, "name": "Shannon Guerra"}], "greeting": "Hello, Lydia Holland! You have 5 unread messages.", "favoriteFruit": "banana"}]
|
241
242
|
|
242
243
|
import tempfile
|
243
244
|
# Create a temporary file
|
@@ -246,7 +247,10 @@ def test_repair_json_from_file():
|
|
246
247
|
# Write content to the temporary file
|
247
248
|
with os.fdopen(temp_fd, 'w') as tmp:
|
248
249
|
tmp.write("{key:value}")
|
249
|
-
assert
|
250
|
+
assert from_file(temp_path, logging=True) == ({'key': 'value'}, [{'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'text': 'While parsing a string, we found no starting quote. Will add the quote back', 'context': '{key:value}'}, {'context': '{key:value}', 'text': 'While parsing a string missing the left delimiter in object key context, we found a :, stopping here',}, {'text': 'While parsing a string, we missed the closing quote, ignoring', 'context': '{key:value}'}, {'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'text': 'While parsing a string, we found no starting quote. Will add the quote back', 'context': '{key:value}'}, {'context': '{key:value}', 'text': 'While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn\'t determine that a right delimiter was present. Stopping here'}, {'text': 'While parsing a string, we missed the closing quote, ignoring', 'context': '{key:value}'}])
|
250
251
|
finally:
|
251
252
|
# Clean up - delete the temporary file
|
252
|
-
os.remove(temp_path)
|
253
|
+
os.remove(temp_path)
|
254
|
+
|
255
|
+
def test_ensure_ascii():
|
256
|
+
assert repair_json("{'test_中国人_ascii':'统一码'}", ensure_ascii=False) == '{"test_中国人_ascii": "统一码"}'
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|