json-repair 0.10.0__tar.gz → 0.10.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {json_repair-0.10.0/src/json_repair.egg-info → json_repair-0.10.1}/PKG-INFO +6 -6
- {json_repair-0.10.0 → json_repair-0.10.1}/README.md +5 -5
- {json_repair-0.10.0 → json_repair-0.10.1}/pyproject.toml +1 -1
- {json_repair-0.10.0 → json_repair-0.10.1}/src/json_repair/json_repair.py +5 -1
- {json_repair-0.10.0 → json_repair-0.10.1/src/json_repair.egg-info}/PKG-INFO +6 -6
- {json_repair-0.10.0 → json_repair-0.10.1}/tests/test_json_repair.py +1 -0
- {json_repair-0.10.0 → json_repair-0.10.1}/tests/test_performance.py +2 -2
- {json_repair-0.10.0 → json_repair-0.10.1}/LICENSE +0 -0
- {json_repair-0.10.0 → json_repair-0.10.1}/setup.cfg +0 -0
- {json_repair-0.10.0 → json_repair-0.10.1}/src/json_repair/__init__.py +0 -0
- {json_repair-0.10.0 → json_repair-0.10.1}/src/json_repair.egg-info/SOURCES.txt +0 -0
- {json_repair-0.10.0 → json_repair-0.10.1}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.10.0 → json_repair-0.10.1}/src/json_repair.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.10.
|
3
|
+
Version: 0.10.1
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -56,20 +56,20 @@ I searched for a lightweight python package that was able to reliably fix this p
|
|
56
56
|
You can use this library to completely replace `json.loads()`:
|
57
57
|
|
58
58
|
import json_repair
|
59
|
-
|
59
|
+
|
60
60
|
decoded_object = json_repair.loads(json_string)
|
61
61
|
|
62
62
|
or just
|
63
63
|
|
64
64
|
import json_repair
|
65
|
-
|
65
|
+
|
66
66
|
decoded_object = json_repair.repair_json(json_string, return_objects=True)
|
67
|
-
|
67
|
+
|
68
68
|
### Performance considerations
|
69
69
|
If you find this library too slow because is using `json.loads()` you can skip that by passing `skip_json_loads=True` to `repair_json`. Like:
|
70
70
|
|
71
71
|
from json_repair import repair_json
|
72
|
-
|
72
|
+
|
73
73
|
good_json_string = repair_json(bad_json_string, skip_json_loads=True)
|
74
74
|
|
75
75
|
I made a choice of not using any fast json library to avoid having any external dependency, so that anybody can use it regardless of their stack.
|
@@ -77,7 +77,7 @@ I made a choice of not using any fast json library to avoid having any external
|
|
77
77
|
Some rules of thumb to use:
|
78
78
|
- Setting `return_objects=True` will always be faster because the parser returns an object already and it doesn't have serialize that object to JSON
|
79
79
|
- `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
|
80
|
-
|
80
|
+
- If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
|
81
81
|
## Adding to requirements
|
82
82
|
**Please pin this library only on the major version!**
|
83
83
|
|
@@ -19,20 +19,20 @@ I searched for a lightweight python package that was able to reliably fix this p
|
|
19
19
|
You can use this library to completely replace `json.loads()`:
|
20
20
|
|
21
21
|
import json_repair
|
22
|
-
|
22
|
+
|
23
23
|
decoded_object = json_repair.loads(json_string)
|
24
24
|
|
25
25
|
or just
|
26
26
|
|
27
27
|
import json_repair
|
28
|
-
|
28
|
+
|
29
29
|
decoded_object = json_repair.repair_json(json_string, return_objects=True)
|
30
|
-
|
30
|
+
|
31
31
|
### Performance considerations
|
32
32
|
If you find this library too slow because is using `json.loads()` you can skip that by passing `skip_json_loads=True` to `repair_json`. Like:
|
33
33
|
|
34
34
|
from json_repair import repair_json
|
35
|
-
|
35
|
+
|
36
36
|
good_json_string = repair_json(bad_json_string, skip_json_loads=True)
|
37
37
|
|
38
38
|
I made a choice of not using any fast json library to avoid having any external dependency, so that anybody can use it regardless of their stack.
|
@@ -40,7 +40,7 @@ I made a choice of not using any fast json library to avoid having any external
|
|
40
40
|
Some rules of thumb to use:
|
41
41
|
- Setting `return_objects=True` will always be faster because the parser returns an object already and it doesn't have serialize that object to JSON
|
42
42
|
- `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
|
43
|
-
|
43
|
+
- If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
|
44
44
|
## Adding to requirements
|
45
45
|
**Please pin this library only on the major version!**
|
46
46
|
|
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.10.
|
6
|
+
version = "0.10.1"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -209,9 +209,13 @@ class JSONParser:
|
|
209
209
|
break
|
210
210
|
self.index += 1
|
211
211
|
char = self.get_char_at()
|
212
|
+
# If the string contains escaped delimiters we should respect that
|
213
|
+
if char == rstring_delimiter and self.get_char_at(-1) == "\\":
|
214
|
+
self.index += 1
|
215
|
+
char = self.get_char_at()
|
212
216
|
# ChatGPT sometimes forget to quote links in markdown like: { "content": "[LINK]("https://google.com")" }
|
213
217
|
if (
|
214
|
-
|
218
|
+
char == rstring_delimiter
|
215
219
|
# Next character is not a comma
|
216
220
|
and self.get_char_at(1) != ","
|
217
221
|
and (
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.10.
|
3
|
+
Version: 0.10.1
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -56,20 +56,20 @@ I searched for a lightweight python package that was able to reliably fix this p
|
|
56
56
|
You can use this library to completely replace `json.loads()`:
|
57
57
|
|
58
58
|
import json_repair
|
59
|
-
|
59
|
+
|
60
60
|
decoded_object = json_repair.loads(json_string)
|
61
61
|
|
62
62
|
or just
|
63
63
|
|
64
64
|
import json_repair
|
65
|
-
|
65
|
+
|
66
66
|
decoded_object = json_repair.repair_json(json_string, return_objects=True)
|
67
|
-
|
67
|
+
|
68
68
|
### Performance considerations
|
69
69
|
If you find this library too slow because is using `json.loads()` you can skip that by passing `skip_json_loads=True` to `repair_json`. Like:
|
70
70
|
|
71
71
|
from json_repair import repair_json
|
72
|
-
|
72
|
+
|
73
73
|
good_json_string = repair_json(bad_json_string, skip_json_loads=True)
|
74
74
|
|
75
75
|
I made a choice of not using any fast json library to avoid having any external dependency, so that anybody can use it regardless of their stack.
|
@@ -77,7 +77,7 @@ I made a choice of not using any fast json library to avoid having any external
|
|
77
77
|
Some rules of thumb to use:
|
78
78
|
- Setting `return_objects=True` will always be faster because the parser returns an object already and it doesn't have serialize that object to JSON
|
79
79
|
- `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
|
80
|
-
|
80
|
+
- If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
|
81
81
|
## Adding to requirements
|
82
82
|
**Please pin this library only on the major version!**
|
83
83
|
|
@@ -103,6 +103,7 @@ def test_repair_json():
|
|
103
103
|
{ "key": "value" }
|
104
104
|
```""") == '{"key": "value"}'
|
105
105
|
assert repair_json('````{ "key": "value" }```') == '{"key": "value"}'
|
106
|
+
assert repair_json(r'{"real_content": "Some string: Some other string Some string <a href=\"https://domain.com\">Some link</a>"') == r'{"real_content": "Some string: Some other string Some string <a href=\\\"https://domain.com\\\">Some link</a>"}'
|
106
107
|
|
107
108
|
|
108
109
|
|
@@ -580,7 +580,7 @@ def test_true_false_correct(benchmark):
|
|
580
580
|
mean_time = benchmark.stats.get("median")
|
581
581
|
|
582
582
|
# Define your time threshold in seconds
|
583
|
-
max_time =
|
583
|
+
max_time = 23 * (1 / 10 ** 6) # 23 microsecond
|
584
584
|
|
585
585
|
# Assert that the average time is below the threshold
|
586
586
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -624,7 +624,7 @@ def test_false_false_correct(benchmark):
|
|
624
624
|
mean_time = benchmark.stats.get("median")
|
625
625
|
|
626
626
|
# Define your time threshold in seconds
|
627
|
-
max_time =
|
627
|
+
max_time = 56 / 10 ** 6 # 56 microsecond
|
628
628
|
|
629
629
|
# Assert that the average time is below the threshold
|
630
630
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|