json-repair 0.12.3__tar.gz → 0.13.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {json_repair-0.12.3/src/json_repair.egg-info → json_repair-0.13.1}/PKG-INFO +28 -1
- {json_repair-0.12.3 → json_repair-0.13.1}/README.md +27 -0
- {json_repair-0.12.3 → json_repair-0.13.1}/pyproject.toml +1 -1
- {json_repair-0.12.3 → json_repair-0.13.1}/src/json_repair/__init__.py +2 -0
- {json_repair-0.12.3 → json_repair-0.13.1}/src/json_repair/json_repair.py +37 -4
- {json_repair-0.12.3 → json_repair-0.13.1/src/json_repair.egg-info}/PKG-INFO +28 -1
- {json_repair-0.12.3 → json_repair-0.13.1}/tests/test_json_repair.py +38 -2
- {json_repair-0.12.3 → json_repair-0.13.1}/tests/test_performance.py +1 -1
- {json_repair-0.12.3 → json_repair-0.13.1}/LICENSE +0 -0
- {json_repair-0.12.3 → json_repair-0.13.1}/setup.cfg +0 -0
- {json_repair-0.12.3 → json_repair-0.13.1}/src/json_repair.egg-info/SOURCES.txt +0 -0
- {json_repair-0.12.3 → json_repair-0.13.1}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.12.3 → json_repair-0.13.1}/src/json_repair.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.13.1
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -65,6 +65,33 @@ or just
|
|
65
65
|
|
66
66
|
decoded_object = json_repair.repair_json(json_string, return_objects=True)
|
67
67
|
|
68
|
+
### Read json from a file or file descriptor
|
69
|
+
|
70
|
+
JSON repair provides also a drop-in replacement for `json.load()`:
|
71
|
+
|
72
|
+
import json_repair
|
73
|
+
|
74
|
+
try:
|
75
|
+
file_descriptor = open(fname, 'rb')
|
76
|
+
except OSError:
|
77
|
+
...
|
78
|
+
|
79
|
+
with file_descriptor:
|
80
|
+
decoded_object = json_repair.load(file_descriptor)
|
81
|
+
|
82
|
+
and another method to read from a file:
|
83
|
+
|
84
|
+
import json_repair
|
85
|
+
|
86
|
+
try:
|
87
|
+
decoded_object = json_repair.from_file(json_file)
|
88
|
+
except OSError:
|
89
|
+
...
|
90
|
+
except IOError:
|
91
|
+
...
|
92
|
+
|
93
|
+
Keep in mind that the library will not catch any IO-related exception and those will need to be managed by you
|
94
|
+
|
68
95
|
### Performance considerations
|
69
96
|
If you find this library too slow because is using `json.loads()` you can skip that by passing `skip_json_loads=True` to `repair_json`. Like:
|
70
97
|
|
@@ -28,6 +28,33 @@ or just
|
|
28
28
|
|
29
29
|
decoded_object = json_repair.repair_json(json_string, return_objects=True)
|
30
30
|
|
31
|
+
### Read json from a file or file descriptor
|
32
|
+
|
33
|
+
JSON repair provides also a drop-in replacement for `json.load()`:
|
34
|
+
|
35
|
+
import json_repair
|
36
|
+
|
37
|
+
try:
|
38
|
+
file_descriptor = open(fname, 'rb')
|
39
|
+
except OSError:
|
40
|
+
...
|
41
|
+
|
42
|
+
with file_descriptor:
|
43
|
+
decoded_object = json_repair.load(file_descriptor)
|
44
|
+
|
45
|
+
and another method to read from a file:
|
46
|
+
|
47
|
+
import json_repair
|
48
|
+
|
49
|
+
try:
|
50
|
+
decoded_object = json_repair.from_file(json_file)
|
51
|
+
except OSError:
|
52
|
+
...
|
53
|
+
except IOError:
|
54
|
+
...
|
55
|
+
|
56
|
+
Keep in mind that the library will not catch any IO-related exception and those will need to be managed by you
|
57
|
+
|
31
58
|
### Performance considerations
|
32
59
|
If you find this library too slow because is using `json.loads()` you can skip that by passing `skip_json_loads=True` to `repair_json`. Like:
|
33
60
|
|
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.
|
6
|
+
version = "0.13.1"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -23,7 +23,7 @@ All supported use cases are in the unit tests
|
|
23
23
|
"""
|
24
24
|
|
25
25
|
import json
|
26
|
-
from typing import Any, Dict, List, Union
|
26
|
+
from typing import Any, Dict, List, Union, TextIO
|
27
27
|
|
28
28
|
|
29
29
|
class JSONParser:
|
@@ -34,6 +34,7 @@ class JSONParser:
|
|
34
34
|
self.index = 0
|
35
35
|
# This is used in the object member parsing to manage the special cases of missing quotes in key or value
|
36
36
|
self.context = ""
|
37
|
+
self.context_stack = []
|
37
38
|
|
38
39
|
def parse(self) -> Union[Dict[str, Any], List[Any], str, float, int, bool, None]:
|
39
40
|
return self.parse_json()
|
@@ -97,7 +98,7 @@ class JSONParser:
|
|
97
98
|
|
98
99
|
# We are now searching for they string key
|
99
100
|
# Context is used in the string parser to manage the lack of quotes
|
100
|
-
self.
|
101
|
+
self.update_context("object_key")
|
101
102
|
|
102
103
|
self.skip_whitespaces_at()
|
103
104
|
|
@@ -119,12 +120,13 @@ class JSONParser:
|
|
119
120
|
if (self.get_char_at() or "") != ":":
|
120
121
|
self.insert_char_at(":")
|
121
122
|
self.index += 1
|
122
|
-
self.
|
123
|
+
self.update_context("")
|
124
|
+
self.update_context("object_value")
|
123
125
|
# The value can be any valid json
|
124
126
|
value = self.parse_json()
|
125
127
|
|
126
128
|
# Reset context since our job is done
|
127
|
-
self.
|
129
|
+
self.update_context("")
|
128
130
|
obj[key] = value
|
129
131
|
|
130
132
|
if (self.get_char_at() or "") in [",", "'", '"']:
|
@@ -157,6 +159,9 @@ class JSONParser:
|
|
157
159
|
while char and (char.isspace() or char == ","):
|
158
160
|
self.index += 1
|
159
161
|
char = self.get_char_at()
|
162
|
+
# If this is the right value of an object and we are closing the object, it means the array is over
|
163
|
+
if self.context == "object_value" and char == "}":
|
164
|
+
break
|
160
165
|
|
161
166
|
# Especially at the end of an LLM generated json you might miss the last "]"
|
162
167
|
char = self.get_char_at()
|
@@ -166,6 +171,7 @@ class JSONParser:
|
|
166
171
|
# Remove trailing "," before adding the "]"
|
167
172
|
self.remove_char_at()
|
168
173
|
self.insert_char_at("]")
|
174
|
+
self.index -= 1
|
169
175
|
|
170
176
|
self.index += 1
|
171
177
|
return arr
|
@@ -319,6 +325,19 @@ class JSONParser:
|
|
319
325
|
except IndexError:
|
320
326
|
return
|
321
327
|
|
328
|
+
def update_context(self, value: str) -> None:
|
329
|
+
# If a value is provided update the context variable and save in stack
|
330
|
+
if value:
|
331
|
+
if self.context:
|
332
|
+
self.context_stack.append(self.context)
|
333
|
+
self.context = value
|
334
|
+
# Otherwise pop and update the context, or empty if the stack is empty
|
335
|
+
else:
|
336
|
+
try:
|
337
|
+
self.context = self.context_stack.pop()
|
338
|
+
except Exception:
|
339
|
+
self.context = ""
|
340
|
+
|
322
341
|
|
323
342
|
def repair_json(
|
324
343
|
json_str: str, return_objects: bool = False, skip_json_loads: bool = False
|
@@ -351,3 +370,17 @@ def loads(
|
|
351
370
|
It is a wrapper around the `repair_json()` function with `return_objects=True`.
|
352
371
|
"""
|
353
372
|
return repair_json(json_str, True)
|
373
|
+
|
374
|
+
|
375
|
+
def load(fp: TextIO) -> Union[Dict[str, Any], List[Any], str, float, int, bool, None]:
|
376
|
+
return loads(fp.read())
|
377
|
+
|
378
|
+
|
379
|
+
def from_file(
|
380
|
+
filename: str,
|
381
|
+
) -> Union[Dict[str, Any], List[Any], str, float, int, bool, None]:
|
382
|
+
fd = open(filename)
|
383
|
+
jsonobj = load(fd)
|
384
|
+
fd.close()
|
385
|
+
|
386
|
+
return jsonobj
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.13.1
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -65,6 +65,33 @@ or just
|
|
65
65
|
|
66
66
|
decoded_object = json_repair.repair_json(json_string, return_objects=True)
|
67
67
|
|
68
|
+
### Read json from a file or file descriptor
|
69
|
+
|
70
|
+
JSON repair provides also a drop-in replacement for `json.load()`:
|
71
|
+
|
72
|
+
import json_repair
|
73
|
+
|
74
|
+
try:
|
75
|
+
file_descriptor = open(fname, 'rb')
|
76
|
+
except OSError:
|
77
|
+
...
|
78
|
+
|
79
|
+
with file_descriptor:
|
80
|
+
decoded_object = json_repair.load(file_descriptor)
|
81
|
+
|
82
|
+
and another method to read from a file:
|
83
|
+
|
84
|
+
import json_repair
|
85
|
+
|
86
|
+
try:
|
87
|
+
decoded_object = json_repair.from_file(json_file)
|
88
|
+
except OSError:
|
89
|
+
...
|
90
|
+
except IOError:
|
91
|
+
...
|
92
|
+
|
93
|
+
Keep in mind that the library will not catch any IO-related exception and those will need to be managed by you
|
94
|
+
|
68
95
|
### Performance considerations
|
69
96
|
If you find this library too slow because is using `json.loads()` you can skip that by passing `skip_json_loads=True` to `repair_json`. Like:
|
70
97
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from src.json_repair.json_repair import repair_json
|
1
|
+
from src.json_repair.json_repair import from_file, repair_json
|
2
2
|
|
3
3
|
|
4
4
|
def test_repair_json():
|
@@ -169,6 +169,25 @@ def test_repair_json_with_objects():
|
|
169
169
|
|
170
170
|
#Test markdown stupidities from ChatGPT
|
171
171
|
assert repair_json('{ "content": "[LINK]("https://google.com")" }', True) == { "content": "[LINK](\"https://google.com\")"}
|
172
|
+
assert repair_json('''
|
173
|
+
{
|
174
|
+
"resourceType": "Bundle",
|
175
|
+
"id": "1",
|
176
|
+
"type": "collection",
|
177
|
+
"entry": [
|
178
|
+
{
|
179
|
+
"resource": {
|
180
|
+
"resourceType": "Patient",
|
181
|
+
"id": "1",
|
182
|
+
"name": [
|
183
|
+
{"use": "official", "family": "Corwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."},
|
184
|
+
{"use": "maiden", "family": "Goodwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."]}
|
185
|
+
]
|
186
|
+
}
|
187
|
+
}
|
188
|
+
]
|
189
|
+
}
|
190
|
+
''', True) == {"resourceType": "Bundle", "id": "1", "type": "collection", "entry": [{"resource": {"resourceType": "Patient", "id": "1", "name": [{"use": "official", "family": "Corwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."]}, {"use": "maiden", "family": "Goodwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."]}]}}]}
|
172
191
|
|
173
192
|
|
174
193
|
def test_repair_json_corner_cases_generate_by_gpt():
|
@@ -240,4 +259,21 @@ def test_repair_json_skip_json_loads():
|
|
240
259
|
assert repair_json('{"key": true, "key2": false, "key3": null}', skip_json_loads=True) == '{"key": true, "key2": false, "key3": null}'
|
241
260
|
assert repair_json('{"key": true, "key2": false, "key3": null}', return_objects=True, skip_json_loads=True) == {"key": True, "key2": False, "key3": None}
|
242
261
|
assert repair_json('{"key": true, "key2": false, "key3": }', skip_json_loads=True) == '{"key": true, "key2": false, "key3": ""}'
|
243
|
-
assert repair_json('{"key": true, "key2": false, "key3": }', return_objects=True, skip_json_loads=True) == {"key": True, "key2": False, "key3": ""}
|
262
|
+
assert repair_json('{"key": true, "key2": false, "key3": }', return_objects=True, skip_json_loads=True) == {"key": True, "key2": False, "key3": ""}
|
263
|
+
|
264
|
+
def test_repair_json_from_file():
|
265
|
+
import os
|
266
|
+
import tempfile
|
267
|
+
|
268
|
+
# Create a temporary file
|
269
|
+
temp_fd, temp_path = tempfile.mkstemp(suffix=".json")
|
270
|
+
try:
|
271
|
+
# Write content to the temporary file
|
272
|
+
with os.fdopen(temp_fd, 'w') as tmp:
|
273
|
+
tmp.write("{")
|
274
|
+
|
275
|
+
assert(from_file(temp_path)) == {}
|
276
|
+
|
277
|
+
finally:
|
278
|
+
# Clean up - delete the temporary file
|
279
|
+
os.remove(temp_path)
|
@@ -580,7 +580,7 @@ def test_true_false_correct(benchmark):
|
|
580
580
|
mean_time = benchmark.stats.get("median")
|
581
581
|
|
582
582
|
# Define your time threshold in seconds
|
583
|
-
max_time =
|
583
|
+
max_time = 24 * (1 / 10 ** 6) # 24 microsecond
|
584
584
|
|
585
585
|
# Assert that the average time is below the threshold
|
586
586
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|