json-repair 0.23.1__py3-none-any.whl → 0.24.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_repair.py +29 -4
- {json_repair-0.23.1.dist-info → json_repair-0.24.0.dist-info}/METADATA +7 -1
- json_repair-0.24.0.dist-info/RECORD +7 -0
- json_repair-0.23.1.dist-info/RECORD +0 -7
- {json_repair-0.23.1.dist-info → json_repair-0.24.0.dist-info}/LICENSE +0 -0
- {json_repair-0.23.1.dist-info → json_repair-0.24.0.dist-info}/WHEEL +0 -0
- {json_repair-0.23.1.dist-info → json_repair-0.24.0.dist-info}/top_level.txt +0 -0
json_repair/json_repair.py
CHANGED
@@ -91,6 +91,10 @@ class JSONParser:
|
|
91
91
|
) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
|
92
92
|
json = self.parse_json()
|
93
93
|
if self.index < len(self.json_str):
|
94
|
+
self.log(
|
95
|
+
"The parser returned early, checking if there's more json elements",
|
96
|
+
"info",
|
97
|
+
)
|
94
98
|
json = [json]
|
95
99
|
last_index = self.index
|
96
100
|
while self.index < len(self.json_str):
|
@@ -100,10 +104,13 @@ class JSONParser:
|
|
100
104
|
if self.index == last_index:
|
101
105
|
self.index += 1
|
102
106
|
last_index = self.index
|
107
|
+
# If nothing extra was found, don't return an array
|
103
108
|
if len(json) == 1:
|
109
|
+
self.log(
|
110
|
+
"There were no more elements, returning the element without the array",
|
111
|
+
"info",
|
112
|
+
)
|
104
113
|
json = json[0]
|
105
|
-
elif len(json) == 0:
|
106
|
-
json = ""
|
107
114
|
if self.logger.log_level == "none":
|
108
115
|
return json
|
109
116
|
else:
|
@@ -365,7 +372,24 @@ class JSONParser:
|
|
365
372
|
):
|
366
373
|
break
|
367
374
|
elif self.get_context() == "object_value" and char in [",", "}"]:
|
368
|
-
|
375
|
+
rstring_delimiter_missing = True
|
376
|
+
# check if this is a case in which the closing comma is NOT missing instead
|
377
|
+
i = 1
|
378
|
+
next_c = self.get_char_at(i)
|
379
|
+
while next_c and next_c != rstring_delimiter:
|
380
|
+
i += 1
|
381
|
+
next_c = self.get_char_at(i)
|
382
|
+
if next_c:
|
383
|
+
i += 1
|
384
|
+
next_c = self.get_char_at(i)
|
385
|
+
# found a delimiter, now we need to check that is followed strictly by a comma or brace
|
386
|
+
while next_c and next_c.isspace():
|
387
|
+
i += 1
|
388
|
+
next_c = self.get_char_at(i)
|
389
|
+
if next_c and next_c in [",", "}"]:
|
390
|
+
rstring_delimiter_missing = False
|
391
|
+
if rstring_delimiter_missing:
|
392
|
+
break
|
369
393
|
string_acc += char
|
370
394
|
self.index += 1
|
371
395
|
char = self.get_char_at()
|
@@ -496,7 +520,8 @@ class JSONParser:
|
|
496
520
|
number_str = ""
|
497
521
|
number_chars = set("0123456789-.eE/,")
|
498
522
|
char = self.get_char_at()
|
499
|
-
|
523
|
+
is_array = self.get_context() == "array"
|
524
|
+
while char and char in number_chars and (char != "," or not is_array):
|
500
525
|
number_str += char
|
501
526
|
self.index += 1
|
502
527
|
char = self.get_char_at()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.24.0
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -45,6 +45,11 @@ If you find this library useful, you can help me by donating toward my monthly b
|
|
45
45
|
|
46
46
|
---
|
47
47
|
|
48
|
+
# Demo
|
49
|
+
If you are unsure if this library will fix your specific problem, or simply want your json validated online, you can visit the demo site on GitHub pages: https://mangiucugna.github.io/json_repair/
|
50
|
+
|
51
|
+
---
|
52
|
+
|
48
53
|
# Motivation
|
49
54
|
Some LLMs are a bit iffy when it comes to returning well formed JSON data, sometimes they skip a parentheses and sometimes they add some words in it, because that's what an LLM does.
|
50
55
|
Luckily, the mistakes LLMs make are simple enough to be fixed without destroying the content.
|
@@ -160,6 +165,7 @@ You will need owner access to this repository
|
|
160
165
|
# Repair JSON in other programming languages
|
161
166
|
- Typescript: https://github.com/josdejong/jsonrepair
|
162
167
|
- Go: https://github.com/RealAlexandreAI/json-repair
|
168
|
+
- Ruby: https://github.com/sashazykov/json-repair-rb
|
163
169
|
---
|
164
170
|
## Star History
|
165
171
|
|
@@ -0,0 +1,7 @@
|
|
1
|
+
json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
|
2
|
+
json_repair/json_repair.py,sha256=jM509L0rg8AiWksnYdOqueopP8fmh0MJxpxx7LvaoiM,28954
|
3
|
+
json_repair-0.24.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
+
json_repair-0.24.0.dist-info/METADATA,sha256=kSxFAdkH_qxJMX85DKW605BOZrbh8VCWzsAvP_X80cM,7596
|
5
|
+
json_repair-0.24.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
6
|
+
json_repair-0.24.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
+
json_repair-0.24.0.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
|
2
|
-
json_repair/json_repair.py,sha256=3OUrtLLPc0fK0_U6H5-2wEZp1I6GqF5u9K5It6VOIzM,27629
|
3
|
-
json_repair-0.23.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
-
json_repair-0.23.1.dist-info/METADATA,sha256=ujJO8L3zXSI8SRrZSF8qkcAaM8aj7XHWgx88x_OAnmA,7333
|
5
|
-
json_repair-0.23.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
6
|
-
json_repair-0.23.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
-
json_repair-0.23.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|