json-repair 0.9.0__tar.gz → 0.10.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {json_repair-0.9.0/src/json_repair.egg-info → json_repair-0.10.1}/PKG-INFO +6 -6
- {json_repair-0.9.0 → json_repair-0.10.1}/README.md +5 -5
- {json_repair-0.9.0 → json_repair-0.10.1}/pyproject.toml +1 -1
- {json_repair-0.9.0 → json_repair-0.10.1}/src/json_repair/json_repair.py +22 -15
- {json_repair-0.9.0 → json_repair-0.10.1/src/json_repair.egg-info}/PKG-INFO +6 -6
- {json_repair-0.9.0 → json_repair-0.10.1}/tests/test_json_repair.py +3 -1
- {json_repair-0.9.0 → json_repair-0.10.1}/tests/test_performance.py +2 -2
- {json_repair-0.9.0 → json_repair-0.10.1}/LICENSE +0 -0
- {json_repair-0.9.0 → json_repair-0.10.1}/setup.cfg +0 -0
- {json_repair-0.9.0 → json_repair-0.10.1}/src/json_repair/__init__.py +0 -0
- {json_repair-0.9.0 → json_repair-0.10.1}/src/json_repair.egg-info/SOURCES.txt +0 -0
- {json_repair-0.9.0 → json_repair-0.10.1}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.9.0 → json_repair-0.10.1}/src/json_repair.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.10.1
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -56,20 +56,20 @@ I searched for a lightweight python package that was able to reliably fix this p
|
|
56
56
|
You can use this library to completely replace `json.loads()`:
|
57
57
|
|
58
58
|
import json_repair
|
59
|
-
|
59
|
+
|
60
60
|
decoded_object = json_repair.loads(json_string)
|
61
61
|
|
62
62
|
or just
|
63
63
|
|
64
64
|
import json_repair
|
65
|
-
|
65
|
+
|
66
66
|
decoded_object = json_repair.repair_json(json_string, return_objects=True)
|
67
|
-
|
67
|
+
|
68
68
|
### Performance considerations
|
69
69
|
If you find this library too slow because is using `json.loads()` you can skip that by passing `skip_json_loads=True` to `repair_json`. Like:
|
70
70
|
|
71
71
|
from json_repair import repair_json
|
72
|
-
|
72
|
+
|
73
73
|
good_json_string = repair_json(bad_json_string, skip_json_loads=True)
|
74
74
|
|
75
75
|
I made a choice of not using any fast json library to avoid having any external dependency, so that anybody can use it regardless of their stack.
|
@@ -77,7 +77,7 @@ I made a choice of not using any fast json library to avoid having any external
|
|
77
77
|
Some rules of thumb to use:
|
78
78
|
- Setting `return_objects=True` will always be faster because the parser returns an object already and it doesn't have serialize that object to JSON
|
79
79
|
- `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
|
80
|
-
|
80
|
+
- If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
|
81
81
|
## Adding to requirements
|
82
82
|
**Please pin this library only on the major version!**
|
83
83
|
|
@@ -19,20 +19,20 @@ I searched for a lightweight python package that was able to reliably fix this p
|
|
19
19
|
You can use this library to completely replace `json.loads()`:
|
20
20
|
|
21
21
|
import json_repair
|
22
|
-
|
22
|
+
|
23
23
|
decoded_object = json_repair.loads(json_string)
|
24
24
|
|
25
25
|
or just
|
26
26
|
|
27
27
|
import json_repair
|
28
|
-
|
28
|
+
|
29
29
|
decoded_object = json_repair.repair_json(json_string, return_objects=True)
|
30
|
-
|
30
|
+
|
31
31
|
### Performance considerations
|
32
32
|
If you find this library too slow because is using `json.loads()` you can skip that by passing `skip_json_loads=True` to `repair_json`. Like:
|
33
33
|
|
34
34
|
from json_repair import repair_json
|
35
|
-
|
35
|
+
|
36
36
|
good_json_string = repair_json(bad_json_string, skip_json_loads=True)
|
37
37
|
|
38
38
|
I made a choice of not using any fast json library to avoid having any external dependency, so that anybody can use it regardless of their stack.
|
@@ -40,7 +40,7 @@ I made a choice of not using any fast json library to avoid having any external
|
|
40
40
|
Some rules of thumb to use:
|
41
41
|
- Setting `return_objects=True` will always be faster because the parser returns an object already and it doesn't have serialize that object to JSON
|
42
42
|
- `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
|
43
|
-
|
43
|
+
- If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
|
44
44
|
## Adding to requirements
|
45
45
|
**Please pin this library only on the major version!**
|
46
46
|
|
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.
|
6
|
+
version = "0.10.1"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -61,7 +61,9 @@ class JSONParser:
|
|
61
61
|
elif char == '"':
|
62
62
|
return self.parse_string()
|
63
63
|
elif char == "'":
|
64
|
-
return self.parse_string(
|
64
|
+
return self.parse_string(string_quotes="'")
|
65
|
+
elif char == "“":
|
66
|
+
return self.parse_string(string_quotes=["“", "”"])
|
65
67
|
# <number> starts with [0-9] or minus
|
66
68
|
elif char.isdigit() or char == "-":
|
67
69
|
return self.parse_number()
|
@@ -102,9 +104,7 @@ class JSONParser:
|
|
102
104
|
# <member> starts with a <string>
|
103
105
|
key = ""
|
104
106
|
while key == "" and self.get_char_at():
|
105
|
-
key = self.
|
106
|
-
use_single_quotes=(self.json_str[self.index] == "'")
|
107
|
-
)
|
107
|
+
key = self.parse_json()
|
108
108
|
|
109
109
|
# This can happen sometimes like { "": "value" }
|
110
110
|
if key == "" and self.get_char_at() == ":":
|
@@ -112,7 +112,7 @@ class JSONParser:
|
|
112
112
|
break
|
113
113
|
|
114
114
|
# We reached the end here
|
115
|
-
if
|
115
|
+
if (self.get_char_at() or "}") == "}":
|
116
116
|
continue
|
117
117
|
|
118
118
|
# An extreme case of missing ":" after a key
|
@@ -170,19 +170,22 @@ class JSONParser:
|
|
170
170
|
self.index += 1
|
171
171
|
return arr
|
172
172
|
|
173
|
-
def parse_string(self,
|
173
|
+
def parse_string(self, string_quotes=False) -> str:
|
174
174
|
# <string> is a string of valid characters enclosed in quotes
|
175
175
|
# i.e. { name: "John" }
|
176
176
|
# Somehow all weird cases in an invalid JSON happen to be resolved in this function, so be careful here
|
177
177
|
|
178
178
|
# Flag to manage corner cases related to missing starting quote
|
179
179
|
fixed_quotes = False
|
180
|
-
|
181
|
-
if
|
182
|
-
|
180
|
+
lstring_delimiter = rstring_delimiter = '"'
|
181
|
+
if isinstance(string_quotes, list):
|
182
|
+
lstring_delimiter = string_quotes[0]
|
183
|
+
rstring_delimiter = string_quotes[1]
|
184
|
+
elif isinstance(string_quotes, str):
|
185
|
+
lstring_delimiter = rstring_delimiter = string_quotes
|
183
186
|
char = self.get_char_at()
|
184
|
-
if char !=
|
185
|
-
self.insert_char_at(
|
187
|
+
if char != lstring_delimiter:
|
188
|
+
self.insert_char_at(lstring_delimiter)
|
186
189
|
fixed_quotes = True
|
187
190
|
else:
|
188
191
|
self.index += 1
|
@@ -198,7 +201,7 @@ class JSONParser:
|
|
198
201
|
# * If we are fixing missing quotes in an object, when it finds the special terminators
|
199
202
|
char = self.get_char_at()
|
200
203
|
fix_broken_markdown_link = False
|
201
|
-
while char and char !=
|
204
|
+
while char and char != rstring_delimiter:
|
202
205
|
if fixed_quotes:
|
203
206
|
if self.context == "object_key" and (char == ":" or char.isspace()):
|
204
207
|
break
|
@@ -206,9 +209,13 @@ class JSONParser:
|
|
206
209
|
break
|
207
210
|
self.index += 1
|
208
211
|
char = self.get_char_at()
|
212
|
+
# If the string contains escaped delimiters we should respect that
|
213
|
+
if char == rstring_delimiter and self.get_char_at(-1) == "\\":
|
214
|
+
self.index += 1
|
215
|
+
char = self.get_char_at()
|
209
216
|
# ChatGPT sometimes forget to quote links in markdown like: { "content": "[LINK]("https://google.com")" }
|
210
217
|
if (
|
211
|
-
char ==
|
218
|
+
char == rstring_delimiter
|
212
219
|
# Next character is not a comma
|
213
220
|
and self.get_char_at(1) != ","
|
214
221
|
and (
|
@@ -228,8 +235,8 @@ class JSONParser:
|
|
228
235
|
end = self.index
|
229
236
|
|
230
237
|
# A fallout of the previous special case in the while loop, we need to update the index only if we had a closing quote
|
231
|
-
if char !=
|
232
|
-
self.insert_char_at(
|
238
|
+
if char != rstring_delimiter:
|
239
|
+
self.insert_char_at(rstring_delimiter)
|
233
240
|
else:
|
234
241
|
self.index += 1
|
235
242
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.10.1
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -56,20 +56,20 @@ I searched for a lightweight python package that was able to reliably fix this p
|
|
56
56
|
You can use this library to completely replace `json.loads()`:
|
57
57
|
|
58
58
|
import json_repair
|
59
|
-
|
59
|
+
|
60
60
|
decoded_object = json_repair.loads(json_string)
|
61
61
|
|
62
62
|
or just
|
63
63
|
|
64
64
|
import json_repair
|
65
|
-
|
65
|
+
|
66
66
|
decoded_object = json_repair.repair_json(json_string, return_objects=True)
|
67
|
-
|
67
|
+
|
68
68
|
### Performance considerations
|
69
69
|
If you find this library too slow because is using `json.loads()` you can skip that by passing `skip_json_loads=True` to `repair_json`. Like:
|
70
70
|
|
71
71
|
from json_repair import repair_json
|
72
|
-
|
72
|
+
|
73
73
|
good_json_string = repair_json(bad_json_string, skip_json_loads=True)
|
74
74
|
|
75
75
|
I made a choice of not using any fast json library to avoid having any external dependency, so that anybody can use it regardless of their stack.
|
@@ -77,7 +77,7 @@ I made a choice of not using any fast json library to avoid having any external
|
|
77
77
|
Some rules of thumb to use:
|
78
78
|
- Setting `return_objects=True` will always be faster because the parser returns an object already and it doesn't have serialize that object to JSON
|
79
79
|
- `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
|
80
|
-
|
80
|
+
- If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
|
81
81
|
## Adding to requirements
|
82
82
|
**Please pin this library only on the major version!**
|
83
83
|
|
@@ -50,12 +50,13 @@ def test_repair_json():
|
|
50
50
|
assert repair_json("[[1\n\n]") == "[[1]]"
|
51
51
|
assert repair_json("{") == "{}"
|
52
52
|
assert repair_json("}") == '""'
|
53
|
-
assert repair_json('{"') == '{
|
53
|
+
assert repair_json('{"') == '{}'
|
54
54
|
assert repair_json('["') == '[]'
|
55
55
|
assert repair_json("'\"'") == '"\\\""'
|
56
56
|
assert repair_json("'string\"") == '"string\\\""'
|
57
57
|
assert repair_json('{foo: [}') == '{"foo": []}'
|
58
58
|
assert repair_json('{"key": "value:value"}') == '{"key": "value:value"}'
|
59
|
+
assert repair_json('{“slanted_delimiter”: "value"}') == '{"slanted_delimiter": "value"}'
|
59
60
|
assert (
|
60
61
|
repair_json('{"name": "John", "age": 30, "city": "New')
|
61
62
|
== '{"name": "John", "age": 30, "city": "New"}'
|
@@ -102,6 +103,7 @@ def test_repair_json():
|
|
102
103
|
{ "key": "value" }
|
103
104
|
```""") == '{"key": "value"}'
|
104
105
|
assert repair_json('````{ "key": "value" }```') == '{"key": "value"}'
|
106
|
+
assert repair_json(r'{"real_content": "Some string: Some other string Some string <a href=\"https://domain.com\">Some link</a>"') == r'{"real_content": "Some string: Some other string Some string <a href=\\\"https://domain.com\\\">Some link</a>"}'
|
105
107
|
|
106
108
|
|
107
109
|
|
@@ -580,7 +580,7 @@ def test_true_false_correct(benchmark):
|
|
580
580
|
mean_time = benchmark.stats.get("median")
|
581
581
|
|
582
582
|
# Define your time threshold in seconds
|
583
|
-
max_time =
|
583
|
+
max_time = 23 * (1 / 10 ** 6) # 23 microsecond
|
584
584
|
|
585
585
|
# Assert that the average time is below the threshold
|
586
586
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -624,7 +624,7 @@ def test_false_false_correct(benchmark):
|
|
624
624
|
mean_time = benchmark.stats.get("median")
|
625
625
|
|
626
626
|
# Define your time threshold in seconds
|
627
|
-
max_time =
|
627
|
+
max_time = 56 / 10 ** 6 # 56 microsecond
|
628
628
|
|
629
629
|
# Assert that the average time is below the threshold
|
630
630
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|