json-repair 0.17.0__py3-none-any.whl → 0.17.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_repair.py +42 -30
- {json_repair-0.17.0.dist-info → json_repair-0.17.2.dist-info}/METADATA +7 -5
- json_repair-0.17.2.dist-info/RECORD +7 -0
- json_repair-0.17.0.dist-info/RECORD +0 -7
- {json_repair-0.17.0.dist-info → json_repair-0.17.2.dist-info}/LICENSE +0 -0
- {json_repair-0.17.0.dist-info → json_repair-0.17.2.dist-info}/WHEEL +0 -0
- {json_repair-0.17.0.dist-info → json_repair-0.17.2.dist-info}/top_level.txt +0 -0
json_repair/json_repair.py
CHANGED
@@ -227,6 +227,10 @@ class JSONParser:
|
|
227
227
|
self.index += 1
|
228
228
|
char = self.get_char_at()
|
229
229
|
|
230
|
+
if not char:
|
231
|
+
# This is an empty string
|
232
|
+
return ""
|
233
|
+
|
230
234
|
# Ensuring we use the right delimiter
|
231
235
|
if char == "'":
|
232
236
|
lstring_delimiter = rstring_delimiter = "'"
|
@@ -298,16 +302,6 @@ class JSONParser:
|
|
298
302
|
string_acc += char
|
299
303
|
self.index += 1
|
300
304
|
char = self.get_char_at()
|
301
|
-
# If the string contains an escaped character we should respect that or remove the escape
|
302
|
-
if self.get_char_at(-1) == "\\":
|
303
|
-
if char in [rstring_delimiter, "t", "n", "r", "b", "\\"]:
|
304
|
-
string_acc += char
|
305
|
-
self.index += 1
|
306
|
-
char = self.get_char_at()
|
307
|
-
else:
|
308
|
-
# Remove this character from the final output
|
309
|
-
string_acc = string_acc[:-2] + string_acc[-1:]
|
310
|
-
self.index -= 1
|
311
305
|
# ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
|
312
306
|
if char == rstring_delimiter:
|
313
307
|
# Special case here, in case of double quotes one after another
|
@@ -431,28 +425,40 @@ class JSONParser:
|
|
431
425
|
return ""
|
432
426
|
|
433
427
|
def get_char_at(self, count: int = 0) -> Union[str, bool]:
|
434
|
-
if
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
return
|
444
|
-
|
428
|
+
# Why not use something simpler? Because try/except in python is a faster alternative to an "if" statement that is often True
|
429
|
+
try:
|
430
|
+
return self.json_str[self.index + count]
|
431
|
+
except IndexError:
|
432
|
+
if self.json_fd:
|
433
|
+
self.json_fd.seek(self.index + count)
|
434
|
+
char = self.json_fd.read(1)
|
435
|
+
if char == "":
|
436
|
+
return False
|
437
|
+
return char
|
438
|
+
else:
|
445
439
|
return False
|
446
440
|
|
447
441
|
def skip_whitespaces_at(self) -> None:
|
448
442
|
"""
|
449
443
|
This function quickly iterates on whitespaces, syntactic sugar to make the code more concise
|
450
444
|
"""
|
451
|
-
|
452
|
-
char = self.get_char_at()
|
453
|
-
while char and char.isspace():
|
454
|
-
self.index += 1
|
445
|
+
if self.json_fd:
|
455
446
|
char = self.get_char_at()
|
447
|
+
while char and char.isspace():
|
448
|
+
self.index += 1
|
449
|
+
char = self.get_char_at()
|
450
|
+
else:
|
451
|
+
# If this is not a file stream, we do this monster here to make this function much much faster
|
452
|
+
try:
|
453
|
+
char = self.json_str[self.index]
|
454
|
+
except IndexError:
|
455
|
+
return
|
456
|
+
while char.isspace():
|
457
|
+
self.index += 1
|
458
|
+
try:
|
459
|
+
char = self.json_str[self.index]
|
460
|
+
except IndexError:
|
461
|
+
return
|
456
462
|
|
457
463
|
def set_context(self, value: str) -> None:
|
458
464
|
# If a value is provided update the context variable and save in stack
|
@@ -479,11 +485,17 @@ class JSONParser:
|
|
479
485
|
context = self.json_fd.read(self.logger["window"] * 2)
|
480
486
|
self.json_fd.seek(self.index)
|
481
487
|
else:
|
482
|
-
|
483
|
-
self.index
|
484
|
-
- self.logger["window"]
|
485
|
-
|
486
|
-
|
488
|
+
start = (
|
489
|
+
self.index - self.logger["window"]
|
490
|
+
if (self.index - self.logger["window"]) >= 0
|
491
|
+
else 0
|
492
|
+
)
|
493
|
+
end = (
|
494
|
+
self.index + self.logger["window"]
|
495
|
+
if (self.index + self.logger["window"]) <= len(self.json_str)
|
496
|
+
else len(self.json_str)
|
497
|
+
)
|
498
|
+
context = self.json_str[start:end]
|
487
499
|
self.logger["log"].append(
|
488
500
|
{
|
489
501
|
"text": text,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.17.
|
3
|
+
Version: 0.17.2
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -39,6 +39,12 @@ This simple package can be used to fix an invalid json string. To know all cases
|
|
39
39
|
|
40
40
|
Inspired by https://github.com/josdejong/jsonrepair
|
41
41
|
|
42
|
+
---
|
43
|
+
# Offer me a beer
|
44
|
+
If you find this library useful, you can help me by donating toward my monthly beer budget here: https://github.com/sponsors/mangiucugna
|
45
|
+
|
46
|
+
---
|
47
|
+
|
42
48
|
# Motivation
|
43
49
|
Some LLMs are a bit iffy when it comes to returning well formed JSON data, sometimes they skip a parentheses and sometimes they add some words in it, because that's what an LLM does.
|
44
50
|
Luckily, the mistakes LLMs make are simple enough to be fixed without destroying the content.
|
@@ -154,10 +160,6 @@ You will need owner access to this repository
|
|
154
160
|
# Repair JSON in other programming languages
|
155
161
|
- Typescript: https://github.com/josdejong/jsonrepair
|
156
162
|
- Go: https://github.com/RealAlexandreAI/json-repair
|
157
|
-
---
|
158
|
-
# Bonus Content
|
159
|
-
If you need some good Custom Instructions (System Message) to improve your chatbot responses try https://gist.github.com/mangiucugna/7ec015c4266df11be8aa510be0110fe4
|
160
|
-
|
161
163
|
---
|
162
164
|
## Star History
|
163
165
|
|
@@ -0,0 +1,7 @@
|
|
1
|
+
json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
|
2
|
+
json_repair/json_repair.py,sha256=NeAKskIWOewTpIDY3Z6wxYkDHjrNYHNuF4AFO-xBggI,23052
|
3
|
+
json_repair-0.17.2.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
+
json_repair-0.17.2.dist-info/METADATA,sha256=5YoWE0n-iKY2nbZ0D9OQ8QwfA6zZIYVWUkLsYQmxrqo,7333
|
5
|
+
json_repair-0.17.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
6
|
+
json_repair-0.17.2.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
+
json_repair-0.17.2.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
|
2
|
-
json_repair/json_repair.py,sha256=BeThFqLxt2-ln7SA3UPBI47VyxnV2MVDFEaolIOCiNU,22721
|
3
|
-
json_repair-0.17.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
-
json_repair-0.17.0.dist-info/METADATA,sha256=F2KOc46utpaJg3eqlw8oaWhisj3rDnHk-k-5miiWL3U,7355
|
5
|
-
json_repair-0.17.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
6
|
-
json_repair-0.17.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
-
json_repair-0.17.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|