json-repair 0.30.0__py3-none-any.whl → 0.30.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_context.py +3 -5
- json_repair/json_parser.py +10 -1
- json_repair/json_repair.py +68 -16
- {json_repair-0.30.0.dist-info → json_repair-0.30.1.dist-info}/METADATA +35 -4
- json_repair-0.30.1.dist-info/RECORD +13 -0
- {json_repair-0.30.0.dist-info → json_repair-0.30.1.dist-info}/WHEEL +1 -1
- json_repair-0.30.0.dist-info/RECORD +0 -13
- {json_repair-0.30.0.dist-info → json_repair-0.30.1.dist-info}/LICENSE +0 -0
- {json_repair-0.30.0.dist-info → json_repair-0.30.1.dist-info}/entry_points.txt +0 -0
- {json_repair-0.30.0.dist-info → json_repair-0.30.1.dist-info}/top_level.txt +0 -0
json_repair/json_context.py
CHANGED
@@ -24,11 +24,9 @@ class JsonContext:
|
|
24
24
|
Returns:
|
25
25
|
None
|
26
26
|
"""
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
self.current = value
|
31
|
-
self.empty = False
|
27
|
+
self.context.append(value)
|
28
|
+
self.current = value
|
29
|
+
self.empty = False
|
32
30
|
|
33
31
|
def reset(self) -> None:
|
34
32
|
"""
|
json_repair/json_parser.py
CHANGED
@@ -314,10 +314,19 @@ class JSONParser:
|
|
314
314
|
if next_c:
|
315
315
|
i += 1
|
316
316
|
# found a delimiter, now we need to check that is followed strictly by a comma or brace
|
317
|
+
# or the string ended
|
317
318
|
i = self.skip_whitespaces_at(idx=i, move_main_index=False)
|
318
319
|
next_c = self.get_char_at(i)
|
319
|
-
if next_c
|
320
|
+
if not next_c or next_c in [",", "}"]:
|
320
321
|
rstring_delimiter_missing = False
|
322
|
+
else:
|
323
|
+
# OK but this could still be some garbage at the end of the string
|
324
|
+
# So we need to check if we find a new lstring_delimiter afterwards
|
325
|
+
# If we do, this is a missing delimiter
|
326
|
+
i = self.skip_to_character(character=lstring_delimiter, idx=i)
|
327
|
+
next_c = self.get_char_at(i)
|
328
|
+
if not next_c:
|
329
|
+
rstring_delimiter_missing = False
|
321
330
|
else:
|
322
331
|
# skip any whitespace first
|
323
332
|
i = self.skip_whitespaces_at(idx=1, move_main_index=False)
|
json_repair/json_repair.py
CHANGED
@@ -41,10 +41,18 @@ def repair_json(
|
|
41
41
|
) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
|
42
42
|
"""
|
43
43
|
Given a json formatted string, it will try to decode it and, if it fails, it will try to fix it.
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
44
|
+
|
45
|
+
Args:
|
46
|
+
json_str (str, optional): The JSON string to repair. Defaults to an empty string.
|
47
|
+
return_objects (bool, optional): If True, return the decoded data structure. Defaults to False.
|
48
|
+
skip_json_loads (bool, optional): If True, skip calling the built-in json.loads() function to verify that the json is valid before attempting to repair. Defaults to False.
|
49
|
+
logging (bool, optional): If True, return a tuple with the repaired json and a log of all repair actions. Defaults to False.
|
50
|
+
json_fd (Optional[TextIO], optional): File descriptor for JSON input. Do not use! Use `from_file` or `load` instead. Defaults to None.
|
51
|
+
ensure_ascii (bool, optional): Set to False to avoid converting non-latin characters to ascii (for example when using chinese characters). Defaults to True. Ignored if `skip_json_loads` is True.
|
52
|
+
chunk_length (int, optional): Size in bytes of the file chunks to read at once. Ignored if `json_fd` is None. Do not use! Use `from_file` or `load` instead. Defaults to 1MB.
|
53
|
+
|
54
|
+
Returns:
|
55
|
+
Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: The repaired JSON or a tuple with the repaired JSON and repair log.
|
48
56
|
"""
|
49
57
|
parser = JSONParser(json_str, json_fd, logging, chunk_length)
|
50
58
|
if skip_json_loads:
|
@@ -72,6 +80,14 @@ def loads(
|
|
72
80
|
"""
|
73
81
|
This function works like `json.loads()` except that it will fix your JSON in the process.
|
74
82
|
It is a wrapper around the `repair_json()` function with `return_objects=True`.
|
83
|
+
|
84
|
+
Args:
|
85
|
+
json_str (str): The JSON string to load and repair.
|
86
|
+
skip_json_loads (bool, optional): If True, skip calling the built-in json.loads() function to verify that the json is valid before attempting to repair. Defaults to False.
|
87
|
+
logging (bool, optional): If True, return a tuple with the repaired json and a log of all repair actions. Defaults to False.
|
88
|
+
|
89
|
+
Returns:
|
90
|
+
Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: The repaired JSON object or a tuple with the repaired JSON object and repair log.
|
75
91
|
"""
|
76
92
|
return repair_json(
|
77
93
|
json_str=json_str,
|
@@ -90,6 +106,15 @@ def load(
|
|
90
106
|
"""
|
91
107
|
This function works like `json.load()` except that it will fix your JSON in the process.
|
92
108
|
It is a wrapper around the `repair_json()` function with `json_fd=fd` and `return_objects=True`.
|
109
|
+
|
110
|
+
Args:
|
111
|
+
fd (TextIO): File descriptor for JSON input.
|
112
|
+
skip_json_loads (bool, optional): If True, skip calling the built-in json.loads() function to verify that the json is valid before attempting to repair. Defaults to False.
|
113
|
+
logging (bool, optional): If True, return a tuple with the repaired json and a log of all repair actions. Defaults to False.
|
114
|
+
chunk_length (int, optional): Size in bytes of the file chunks to read at once. Defaults to 1MB.
|
115
|
+
|
116
|
+
Returns:
|
117
|
+
Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: The repaired JSON object or a tuple with the repaired JSON object and repair log.
|
93
118
|
"""
|
94
119
|
return repair_json(
|
95
120
|
json_fd=fd,
|
@@ -108,20 +133,48 @@ def from_file(
|
|
108
133
|
) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
|
109
134
|
"""
|
110
135
|
This function is a wrapper around `load()` so you can pass the filename as string
|
136
|
+
|
137
|
+
Args:
|
138
|
+
filename (str): The name of the file containing JSON data to load and repair.
|
139
|
+
skip_json_loads (bool, optional): If True, skip calling the built-in json.loads() function to verify that the json is valid before attempting to repair. Defaults to False.
|
140
|
+
logging (bool, optional): If True, return a tuple with the repaired json and a log of all repair actions. Defaults to False.
|
141
|
+
chunk_length (int, optional): Size in bytes of the file chunks to read at once. Defaults to 1MB.
|
142
|
+
|
143
|
+
Returns:
|
144
|
+
Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: The repaired JSON object or a tuple with the repaired JSON object and repair log.
|
111
145
|
"""
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
fd.close()
|
146
|
+
with open(filename) as fd:
|
147
|
+
jsonobj = load(
|
148
|
+
fd=fd,
|
149
|
+
skip_json_loads=skip_json_loads,
|
150
|
+
logging=logging,
|
151
|
+
chunk_length=chunk_length,
|
152
|
+
)
|
120
153
|
|
121
154
|
return jsonobj
|
122
155
|
|
123
156
|
|
124
157
|
def cli(inline_args: Optional[List[str]] = None) -> int:
|
158
|
+
"""
|
159
|
+
Command-line interface for repairing and parsing JSON files.
|
160
|
+
|
161
|
+
Args:
|
162
|
+
inline_args (Optional[List[str]]): List of command-line arguments for testing purposes. Defaults to None.
|
163
|
+
- filename (str): The JSON file to repair
|
164
|
+
- -i, --inline (bool): Replace the file inline instead of returning the output to stdout.
|
165
|
+
- -o, --output TARGET (str): If specified, the output will be written to TARGET filename instead of stdout.
|
166
|
+
- --ensure_ascii (bool): Pass ensure_ascii=True to json.dumps(). Will pass False otherwise.
|
167
|
+
- --indent INDENT (int): Number of spaces for indentation (Default 2).
|
168
|
+
|
169
|
+
Returns:
|
170
|
+
int: Exit code of the CLI operation.
|
171
|
+
|
172
|
+
Raises:
|
173
|
+
Exception: Any exception that occurs during file processing.
|
174
|
+
|
175
|
+
Example:
|
176
|
+
>>> cli(['example.json', '--indent', '4'])
|
177
|
+
"""
|
125
178
|
parser = argparse.ArgumentParser(description="Repair and parse JSON files.")
|
126
179
|
parser.add_argument("filename", help="The JSON file to repair")
|
127
180
|
parser.add_argument(
|
@@ -167,14 +220,13 @@ def cli(inline_args: Optional[List[str]] = None) -> int:
|
|
167
220
|
result = from_file(args.filename)
|
168
221
|
|
169
222
|
if args.inline or args.output:
|
170
|
-
|
171
|
-
|
172
|
-
fd.close()
|
223
|
+
with open(args.output or args.filename, mode="w") as fd:
|
224
|
+
json.dump(result, fd, indent=args.indent, ensure_ascii=ensure_ascii)
|
173
225
|
else:
|
174
226
|
print(json.dumps(result, indent=args.indent, ensure_ascii=ensure_ascii))
|
175
227
|
except Exception as e: # pragma: no cover
|
176
228
|
print(f"Error: {str(e)}", file=sys.stderr)
|
177
|
-
|
229
|
+
return 1
|
178
230
|
|
179
231
|
return 0 # Success
|
180
232
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.30.
|
3
|
+
Version: 0.30.1
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -32,19 +32,23 @@ Keywords: JSON,REPAIR,LLM,PARSER
|
|
32
32
|
Classifier: Programming Language :: Python :: 3
|
33
33
|
Classifier: License :: OSI Approved :: MIT License
|
34
34
|
Classifier: Operating System :: OS Independent
|
35
|
-
Requires-Python: >=3.
|
35
|
+
Requires-Python: >=3.9
|
36
36
|
Description-Content-Type: text/markdown
|
37
37
|
License-File: LICENSE
|
38
38
|
|
39
39
|
[](https://pypi.org/project/json-repair/)
|
40
|
-

|
41
41
|
[](https://pypi.org/project/json-repair/)
|
42
42
|
[](https://github.com/sponsors/mangiucugna)
|
43
|
+
[](https://github.com/mangiucugna/json_repair/stargazers)
|
44
|
+
|
43
45
|
|
44
46
|
This simple package can be used to fix an invalid json string. To know all cases in which this package will work, check out the unit test.
|
45
47
|
|
46
48
|
Inspired by https://github.com/josdejong/jsonrepair
|
47
49
|
|
50
|
+

|
51
|
+
|
48
52
|
---
|
49
53
|
# Offer me a beer
|
50
54
|
If you find this library useful, you can help me by donating toward my monthly beer budget here: https://github.com/sponsors/mangiucugna
|
@@ -54,6 +58,8 @@ If you find this library useful, you can help me by donating toward my monthly b
|
|
54
58
|
# Demo
|
55
59
|
If you are unsure if this library will fix your specific problem, or simply want your json validated online, you can visit the demo site on GitHub pages: https://mangiucugna.github.io/json_repair/
|
56
60
|
|
61
|
+
Or hear an [audio deepdive generate by Google's NotebookLM](https://notebooklm.google.com/notebook/05312bb3-f6f3-4e49-a99b-bd51db64520b/audio) for an introduction to the module
|
62
|
+
|
57
63
|
---
|
58
64
|
|
59
65
|
# Motivation
|
@@ -64,6 +70,11 @@ I searched for a lightweight python package that was able to reliably fix this p
|
|
64
70
|
|
65
71
|
*So I wrote one*
|
66
72
|
|
73
|
+
### Wouldn't GPT-4o Structured Output make this library outdated?
|
74
|
+
|
75
|
+
As part of my job we use OpenAI APIs and we noticed that even with structured output sometimes the result isn't a fully valid json.
|
76
|
+
So we still use this library to cover those outliers.
|
77
|
+
|
67
78
|
# Supported use cases
|
68
79
|
|
69
80
|
### Fixing Syntax Errors in JSON
|
@@ -144,6 +155,26 @@ and another method to read from a file:
|
|
144
155
|
|
145
156
|
Keep in mind that the library will not catch any IO-related exception and those will need to be managed by you
|
146
157
|
|
158
|
+
### Non-Latin characters
|
159
|
+
|
160
|
+
When working with non-Latin characters (such as Chinese, Japanese, or Korean), you need to pass `ensure_ascii=False` to `repair_json()` in order to preserve the non-Latin characters in the output.
|
161
|
+
|
162
|
+
Here's an example using Chinese characters:
|
163
|
+
|
164
|
+
repair_json("{'test_chinese_ascii':'统一码'}")
|
165
|
+
|
166
|
+
will return
|
167
|
+
|
168
|
+
{"test_chinese_ascii": "\u7edf\u4e00\u7801"}
|
169
|
+
|
170
|
+
Instead passing `ensure_ascii=False`:
|
171
|
+
|
172
|
+
repair_json("{'test_chinese_ascii':'统一码'}", ensure_ascii=False)
|
173
|
+
|
174
|
+
will return
|
175
|
+
|
176
|
+
{"test_chinese_ascii": "统一码"}
|
177
|
+
|
147
178
|
### Performance considerations
|
148
179
|
If you find this library too slow because is using `json.loads()` you can skip that by passing `skip_json_loads=True` to `repair_json`. Like:
|
149
180
|
|
@@ -226,7 +257,7 @@ This module will parse the JSON file following the BNF definition:
|
|
226
257
|
<object> ::= '{' [ <member> *(', ' <member>) ] '}' ; A sequence of 'members'
|
227
258
|
<member> ::= <string> ': ' <json> ; A pair consisting of a name, and a JSON value
|
228
259
|
|
229
|
-
If something is wrong (a missing
|
260
|
+
If something is wrong (a missing parentheses or quotes for example) it will use a few simple heuristics to fix the JSON string:
|
230
261
|
- Add the missing parentheses if the parser believes that the array or object should be closed
|
231
262
|
- Quote strings or add missing single quotes
|
232
263
|
- Adjust whitespaces and remove line breaks
|
@@ -0,0 +1,13 @@
|
|
1
|
+
json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
|
2
|
+
json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
|
3
|
+
json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
|
4
|
+
json_repair/json_parser.py,sha256=yZ3UHy5xVTJolhtuthiKEkQGhJg0186GeynIWieoejU,28086
|
5
|
+
json_repair/json_repair.py,sha256=LINLSJBs3cJMfs1YRDaIpfWR5PJLs87Oe06G5yQjY18,9729
|
6
|
+
json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
+
json_repair/string_file_wrapper.py,sha256=EHLhNBWoyUitzT08thytYJiNZh_klEFwfT8zutPSdb4,3905
|
8
|
+
json_repair-0.30.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
9
|
+
json_repair-0.30.1.dist-info/METADATA,sha256=Tijwlxi1zqpsWwhTmUYiPmeaI0YdOgQ4S2yqcmh9BDk,11847
|
10
|
+
json_repair-0.30.1.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
11
|
+
json_repair-0.30.1.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
|
12
|
+
json_repair-0.30.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
13
|
+
json_repair-0.30.1.dist-info/RECORD,,
|
@@ -1,13 +0,0 @@
|
|
1
|
-
json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
|
2
|
-
json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
|
3
|
-
json_repair/json_context.py,sha256=DdJu3DJR-ANvr8KrWfJqdtOE3uI6_B0VQidKvE3PjJA,1080
|
4
|
-
json_repair/json_parser.py,sha256=UQgXtXTRo0oLb4N7GhPAELWtS0E9zGSPBXfnnGOCgfo,27527
|
5
|
-
json_repair/json_repair.py,sha256=Er6klw5GgmdnLmNM9GXD9gfTi8Mn9cvvTUiVITFA-1E,6101
|
6
|
-
json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
-
json_repair/string_file_wrapper.py,sha256=EHLhNBWoyUitzT08thytYJiNZh_klEFwfT8zutPSdb4,3905
|
8
|
-
json_repair-0.30.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
9
|
-
json_repair-0.30.0.dist-info/METADATA,sha256=72pBek4v2f-1zqIwWXjaPcG8reqdX9zpy1dkjT9yspI,10686
|
10
|
-
json_repair-0.30.0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
11
|
-
json_repair-0.30.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
|
12
|
-
json_repair-0.30.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
13
|
-
json_repair-0.30.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|