json-repair 0.30.0__py3-none-any.whl → 0.30.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- json_repair/json_context.py +3 -5
- json_repair/json_parser.py +10 -1
- json_repair/json_repair.py +68 -16
- {json_repair-0.30.0.dist-info → json_repair-0.30.1.dist-info}/METADATA +35 -4
- json_repair-0.30.1.dist-info/RECORD +13 -0
- {json_repair-0.30.0.dist-info → json_repair-0.30.1.dist-info}/WHEEL +1 -1
- json_repair-0.30.0.dist-info/RECORD +0 -13
- {json_repair-0.30.0.dist-info → json_repair-0.30.1.dist-info}/LICENSE +0 -0
- {json_repair-0.30.0.dist-info → json_repair-0.30.1.dist-info}/entry_points.txt +0 -0
- {json_repair-0.30.0.dist-info → json_repair-0.30.1.dist-info}/top_level.txt +0 -0
json_repair/json_context.py
CHANGED
@@ -24,11 +24,9 @@ class JsonContext:
|
|
24
24
|
Returns:
|
25
25
|
None
|
26
26
|
"""
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
self.current = value
|
31
|
-
self.empty = False
|
27
|
+
self.context.append(value)
|
28
|
+
self.current = value
|
29
|
+
self.empty = False
|
32
30
|
|
33
31
|
def reset(self) -> None:
|
34
32
|
"""
|
json_repair/json_parser.py
CHANGED
@@ -314,10 +314,19 @@ class JSONParser:
|
|
314
314
|
if next_c:
|
315
315
|
i += 1
|
316
316
|
# found a delimiter, now we need to check that is followed strictly by a comma or brace
|
317
|
+
# or the string ended
|
317
318
|
i = self.skip_whitespaces_at(idx=i, move_main_index=False)
|
318
319
|
next_c = self.get_char_at(i)
|
319
|
-
if next_c
|
320
|
+
if not next_c or next_c in [",", "}"]:
|
320
321
|
rstring_delimiter_missing = False
|
322
|
+
else:
|
323
|
+
# OK but this could still be some garbage at the end of the string
|
324
|
+
# So we need to check if we find a new lstring_delimiter afterwards
|
325
|
+
# If we do, this is a missing delimiter
|
326
|
+
i = self.skip_to_character(character=lstring_delimiter, idx=i)
|
327
|
+
next_c = self.get_char_at(i)
|
328
|
+
if not next_c:
|
329
|
+
rstring_delimiter_missing = False
|
321
330
|
else:
|
322
331
|
# skip any whitespace first
|
323
332
|
i = self.skip_whitespaces_at(idx=1, move_main_index=False)
|
json_repair/json_repair.py
CHANGED
@@ -41,10 +41,18 @@ def repair_json(
|
|
41
41
|
) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
|
42
42
|
"""
|
43
43
|
Given a json formatted string, it will try to decode it and, if it fails, it will try to fix it.
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
44
|
+
|
45
|
+
Args:
|
46
|
+
json_str (str, optional): The JSON string to repair. Defaults to an empty string.
|
47
|
+
return_objects (bool, optional): If True, return the decoded data structure. Defaults to False.
|
48
|
+
skip_json_loads (bool, optional): If True, skip calling the built-in json.loads() function to verify that the json is valid before attempting to repair. Defaults to False.
|
49
|
+
logging (bool, optional): If True, return a tuple with the repaired json and a log of all repair actions. Defaults to False.
|
50
|
+
json_fd (Optional[TextIO], optional): File descriptor for JSON input. Do not use! Use `from_file` or `load` instead. Defaults to None.
|
51
|
+
ensure_ascii (bool, optional): Set to False to avoid converting non-latin characters to ascii (for example when using chinese characters). Defaults to True. Ignored if `skip_json_loads` is True.
|
52
|
+
chunk_length (int, optional): Size in bytes of the file chunks to read at once. Ignored if `json_fd` is None. Do not use! Use `from_file` or `load` instead. Defaults to 1MB.
|
53
|
+
|
54
|
+
Returns:
|
55
|
+
Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: The repaired JSON or a tuple with the repaired JSON and repair log.
|
48
56
|
"""
|
49
57
|
parser = JSONParser(json_str, json_fd, logging, chunk_length)
|
50
58
|
if skip_json_loads:
|
@@ -72,6 +80,14 @@ def loads(
|
|
72
80
|
"""
|
73
81
|
This function works like `json.loads()` except that it will fix your JSON in the process.
|
74
82
|
It is a wrapper around the `repair_json()` function with `return_objects=True`.
|
83
|
+
|
84
|
+
Args:
|
85
|
+
json_str (str): The JSON string to load and repair.
|
86
|
+
skip_json_loads (bool, optional): If True, skip calling the built-in json.loads() function to verify that the json is valid before attempting to repair. Defaults to False.
|
87
|
+
logging (bool, optional): If True, return a tuple with the repaired json and a log of all repair actions. Defaults to False.
|
88
|
+
|
89
|
+
Returns:
|
90
|
+
Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: The repaired JSON object or a tuple with the repaired JSON object and repair log.
|
75
91
|
"""
|
76
92
|
return repair_json(
|
77
93
|
json_str=json_str,
|
@@ -90,6 +106,15 @@ def load(
|
|
90
106
|
"""
|
91
107
|
This function works like `json.load()` except that it will fix your JSON in the process.
|
92
108
|
It is a wrapper around the `repair_json()` function with `json_fd=fd` and `return_objects=True`.
|
109
|
+
|
110
|
+
Args:
|
111
|
+
fd (TextIO): File descriptor for JSON input.
|
112
|
+
skip_json_loads (bool, optional): If True, skip calling the built-in json.loads() function to verify that the json is valid before attempting to repair. Defaults to False.
|
113
|
+
logging (bool, optional): If True, return a tuple with the repaired json and a log of all repair actions. Defaults to False.
|
114
|
+
chunk_length (int, optional): Size in bytes of the file chunks to read at once. Defaults to 1MB.
|
115
|
+
|
116
|
+
Returns:
|
117
|
+
Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: The repaired JSON object or a tuple with the repaired JSON object and repair log.
|
93
118
|
"""
|
94
119
|
return repair_json(
|
95
120
|
json_fd=fd,
|
@@ -108,20 +133,48 @@ def from_file(
|
|
108
133
|
) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
|
109
134
|
"""
|
110
135
|
This function is a wrapper around `load()` so you can pass the filename as string
|
136
|
+
|
137
|
+
Args:
|
138
|
+
filename (str): The name of the file containing JSON data to load and repair.
|
139
|
+
skip_json_loads (bool, optional): If True, skip calling the built-in json.loads() function to verify that the json is valid before attempting to repair. Defaults to False.
|
140
|
+
logging (bool, optional): If True, return a tuple with the repaired json and a log of all repair actions. Defaults to False.
|
141
|
+
chunk_length (int, optional): Size in bytes of the file chunks to read at once. Defaults to 1MB.
|
142
|
+
|
143
|
+
Returns:
|
144
|
+
Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: The repaired JSON object or a tuple with the repaired JSON object and repair log.
|
111
145
|
"""
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
fd.close()
|
146
|
+
with open(filename) as fd:
|
147
|
+
jsonobj = load(
|
148
|
+
fd=fd,
|
149
|
+
skip_json_loads=skip_json_loads,
|
150
|
+
logging=logging,
|
151
|
+
chunk_length=chunk_length,
|
152
|
+
)
|
120
153
|
|
121
154
|
return jsonobj
|
122
155
|
|
123
156
|
|
124
157
|
def cli(inline_args: Optional[List[str]] = None) -> int:
|
158
|
+
"""
|
159
|
+
Command-line interface for repairing and parsing JSON files.
|
160
|
+
|
161
|
+
Args:
|
162
|
+
inline_args (Optional[List[str]]): List of command-line arguments for testing purposes. Defaults to None.
|
163
|
+
- filename (str): The JSON file to repair
|
164
|
+
- -i, --inline (bool): Replace the file inline instead of returning the output to stdout.
|
165
|
+
- -o, --output TARGET (str): If specified, the output will be written to TARGET filename instead of stdout.
|
166
|
+
- --ensure_ascii (bool): Pass ensure_ascii=True to json.dumps(). Will pass False otherwise.
|
167
|
+
- --indent INDENT (int): Number of spaces for indentation (Default 2).
|
168
|
+
|
169
|
+
Returns:
|
170
|
+
int: Exit code of the CLI operation.
|
171
|
+
|
172
|
+
Raises:
|
173
|
+
Exception: Any exception that occurs during file processing.
|
174
|
+
|
175
|
+
Example:
|
176
|
+
>>> cli(['example.json', '--indent', '4'])
|
177
|
+
"""
|
125
178
|
parser = argparse.ArgumentParser(description="Repair and parse JSON files.")
|
126
179
|
parser.add_argument("filename", help="The JSON file to repair")
|
127
180
|
parser.add_argument(
|
@@ -167,14 +220,13 @@ def cli(inline_args: Optional[List[str]] = None) -> int:
|
|
167
220
|
result = from_file(args.filename)
|
168
221
|
|
169
222
|
if args.inline or args.output:
|
170
|
-
|
171
|
-
|
172
|
-
fd.close()
|
223
|
+
with open(args.output or args.filename, mode="w") as fd:
|
224
|
+
json.dump(result, fd, indent=args.indent, ensure_ascii=ensure_ascii)
|
173
225
|
else:
|
174
226
|
print(json.dumps(result, indent=args.indent, ensure_ascii=ensure_ascii))
|
175
227
|
except Exception as e: # pragma: no cover
|
176
228
|
print(f"Error: {str(e)}", file=sys.stderr)
|
177
|
-
|
229
|
+
return 1
|
178
230
|
|
179
231
|
return 0 # Success
|
180
232
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.30.
|
3
|
+
Version: 0.30.1
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -32,19 +32,23 @@ Keywords: JSON,REPAIR,LLM,PARSER
|
|
32
32
|
Classifier: Programming Language :: Python :: 3
|
33
33
|
Classifier: License :: OSI Approved :: MIT License
|
34
34
|
Classifier: Operating System :: OS Independent
|
35
|
-
Requires-Python: >=3.
|
35
|
+
Requires-Python: >=3.9
|
36
36
|
Description-Content-Type: text/markdown
|
37
37
|
License-File: LICENSE
|
38
38
|
|
39
39
|
[](https://pypi.org/project/json-repair/)
|
40
|
-

|
41
41
|
[](https://pypi.org/project/json-repair/)
|
42
42
|
[](https://github.com/sponsors/mangiucugna)
|
43
|
+
[](https://github.com/mangiucugna/json_repair/stargazers)
|
44
|
+
|
43
45
|
|
44
46
|
This simple package can be used to fix an invalid json string. To know all cases in which this package will work, check out the unit test.
|
45
47
|
|
46
48
|
Inspired by https://github.com/josdejong/jsonrepair
|
47
49
|
|
50
|
+

|
51
|
+
|
48
52
|
---
|
49
53
|
# Offer me a beer
|
50
54
|
If you find this library useful, you can help me by donating toward my monthly beer budget here: https://github.com/sponsors/mangiucugna
|
@@ -54,6 +58,8 @@ If you find this library useful, you can help me by donating toward my monthly b
|
|
54
58
|
# Demo
|
55
59
|
If you are unsure if this library will fix your specific problem, or simply want your json validated online, you can visit the demo site on GitHub pages: https://mangiucugna.github.io/json_repair/
|
56
60
|
|
61
|
+
Or hear an [audio deepdive generate by Google's NotebookLM](https://notebooklm.google.com/notebook/05312bb3-f6f3-4e49-a99b-bd51db64520b/audio) for an introduction to the module
|
62
|
+
|
57
63
|
---
|
58
64
|
|
59
65
|
# Motivation
|
@@ -64,6 +70,11 @@ I searched for a lightweight python package that was able to reliably fix this p
|
|
64
70
|
|
65
71
|
*So I wrote one*
|
66
72
|
|
73
|
+
### Wouldn't GPT-4o Structured Output make this library outdated?
|
74
|
+
|
75
|
+
As part of my job we use OpenAI APIs and we noticed that even with structured output sometimes the result isn't a fully valid json.
|
76
|
+
So we still use this library to cover those outliers.
|
77
|
+
|
67
78
|
# Supported use cases
|
68
79
|
|
69
80
|
### Fixing Syntax Errors in JSON
|
@@ -144,6 +155,26 @@ and another method to read from a file:
|
|
144
155
|
|
145
156
|
Keep in mind that the library will not catch any IO-related exception and those will need to be managed by you
|
146
157
|
|
158
|
+
### Non-Latin characters
|
159
|
+
|
160
|
+
When working with non-Latin characters (such as Chinese, Japanese, or Korean), you need to pass `ensure_ascii=False` to `repair_json()` in order to preserve the non-Latin characters in the output.
|
161
|
+
|
162
|
+
Here's an example using Chinese characters:
|
163
|
+
|
164
|
+
repair_json("{'test_chinese_ascii':'统一码'}")
|
165
|
+
|
166
|
+
will return
|
167
|
+
|
168
|
+
{"test_chinese_ascii": "\u7edf\u4e00\u7801"}
|
169
|
+
|
170
|
+
Instead passing `ensure_ascii=False`:
|
171
|
+
|
172
|
+
repair_json("{'test_chinese_ascii':'统一码'}", ensure_ascii=False)
|
173
|
+
|
174
|
+
will return
|
175
|
+
|
176
|
+
{"test_chinese_ascii": "统一码"}
|
177
|
+
|
147
178
|
### Performance considerations
|
148
179
|
If you find this library too slow because is using `json.loads()` you can skip that by passing `skip_json_loads=True` to `repair_json`. Like:
|
149
180
|
|
@@ -226,7 +257,7 @@ This module will parse the JSON file following the BNF definition:
|
|
226
257
|
<object> ::= '{' [ <member> *(', ' <member>) ] '}' ; A sequence of 'members'
|
227
258
|
<member> ::= <string> ': ' <json> ; A pair consisting of a name, and a JSON value
|
228
259
|
|
229
|
-
If something is wrong (a missing
|
260
|
+
If something is wrong (a missing parentheses or quotes for example) it will use a few simple heuristics to fix the JSON string:
|
230
261
|
- Add the missing parentheses if the parser believes that the array or object should be closed
|
231
262
|
- Quote strings or add missing single quotes
|
232
263
|
- Adjust whitespaces and remove line breaks
|
@@ -0,0 +1,13 @@
|
|
1
|
+
json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
|
2
|
+
json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
|
3
|
+
json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
|
4
|
+
json_repair/json_parser.py,sha256=yZ3UHy5xVTJolhtuthiKEkQGhJg0186GeynIWieoejU,28086
|
5
|
+
json_repair/json_repair.py,sha256=LINLSJBs3cJMfs1YRDaIpfWR5PJLs87Oe06G5yQjY18,9729
|
6
|
+
json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
+
json_repair/string_file_wrapper.py,sha256=EHLhNBWoyUitzT08thytYJiNZh_klEFwfT8zutPSdb4,3905
|
8
|
+
json_repair-0.30.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
9
|
+
json_repair-0.30.1.dist-info/METADATA,sha256=Tijwlxi1zqpsWwhTmUYiPmeaI0YdOgQ4S2yqcmh9BDk,11847
|
10
|
+
json_repair-0.30.1.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
11
|
+
json_repair-0.30.1.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
|
12
|
+
json_repair-0.30.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
13
|
+
json_repair-0.30.1.dist-info/RECORD,,
|
@@ -1,13 +0,0 @@
|
|
1
|
-
json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
|
2
|
-
json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
|
3
|
-
json_repair/json_context.py,sha256=DdJu3DJR-ANvr8KrWfJqdtOE3uI6_B0VQidKvE3PjJA,1080
|
4
|
-
json_repair/json_parser.py,sha256=UQgXtXTRo0oLb4N7GhPAELWtS0E9zGSPBXfnnGOCgfo,27527
|
5
|
-
json_repair/json_repair.py,sha256=Er6klw5GgmdnLmNM9GXD9gfTi8Mn9cvvTUiVITFA-1E,6101
|
6
|
-
json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
-
json_repair/string_file_wrapper.py,sha256=EHLhNBWoyUitzT08thytYJiNZh_klEFwfT8zutPSdb4,3905
|
8
|
-
json_repair-0.30.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
9
|
-
json_repair-0.30.0.dist-info/METADATA,sha256=72pBek4v2f-1zqIwWXjaPcG8reqdX9zpy1dkjT9yspI,10686
|
10
|
-
json_repair-0.30.0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
11
|
-
json_repair-0.30.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
|
12
|
-
json_repair-0.30.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
13
|
-
json_repair-0.30.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|