json-repair 0.17.1__py3-none-any.whl → 0.17.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_repair.py +44 -16
- {json_repair-0.17.1.dist-info → json_repair-0.17.3.dist-info}/METADATA +7 -5
- json_repair-0.17.3.dist-info/RECORD +7 -0
- json_repair-0.17.1.dist-info/RECORD +0 -7
- {json_repair-0.17.1.dist-info → json_repair-0.17.3.dist-info}/LICENSE +0 -0
- {json_repair-0.17.1.dist-info → json_repair-0.17.3.dist-info}/WHEEL +0 -0
- {json_repair-0.17.1.dist-info → json_repair-0.17.3.dist-info}/top_level.txt +0 -0
json_repair/json_repair.py
CHANGED
@@ -113,6 +113,7 @@ class JSONParser:
|
|
113
113
|
# <member> starts with a <string>
|
114
114
|
key = ""
|
115
115
|
while key == "" and self.get_char_at():
|
116
|
+
current_index = self.index
|
116
117
|
key = self.parse_string()
|
117
118
|
|
118
119
|
# This can happen sometimes like { "": "value" }
|
@@ -123,7 +124,8 @@ class JSONParser:
|
|
123
124
|
"info",
|
124
125
|
)
|
125
126
|
break
|
126
|
-
elif key == "":
|
127
|
+
elif key == "" and self.index == current_index:
|
128
|
+
# Sometimes the string search might not move the index at all, that might lead us to an infinite loop
|
127
129
|
self.index += 1
|
128
130
|
|
129
131
|
# We reached the end here
|
@@ -302,16 +304,6 @@ class JSONParser:
|
|
302
304
|
string_acc += char
|
303
305
|
self.index += 1
|
304
306
|
char = self.get_char_at()
|
305
|
-
# If the string contains an escaped character we should respect that or remove the escape
|
306
|
-
if self.get_char_at(-1) == "\\":
|
307
|
-
if char in [rstring_delimiter, "t", "n", "r", "b", "\\"]:
|
308
|
-
string_acc += char
|
309
|
-
self.index += 1
|
310
|
-
char = self.get_char_at()
|
311
|
-
else:
|
312
|
-
# Remove this character from the final output
|
313
|
-
string_acc = string_acc[:-2] + string_acc[-1:]
|
314
|
-
self.index -= 1
|
315
307
|
# ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
|
316
308
|
if char == rstring_delimiter:
|
317
309
|
# Special case here, in case of double quotes one after another
|
@@ -495,11 +487,17 @@ class JSONParser:
|
|
495
487
|
context = self.json_fd.read(self.logger["window"] * 2)
|
496
488
|
self.json_fd.seek(self.index)
|
497
489
|
else:
|
498
|
-
|
499
|
-
self.index
|
500
|
-
- self.logger["window"]
|
501
|
-
|
502
|
-
|
490
|
+
start = (
|
491
|
+
self.index - self.logger["window"]
|
492
|
+
if (self.index - self.logger["window"]) >= 0
|
493
|
+
else 0
|
494
|
+
)
|
495
|
+
end = (
|
496
|
+
self.index + self.logger["window"]
|
497
|
+
if (self.index + self.logger["window"]) <= len(self.json_str)
|
498
|
+
else len(self.json_str)
|
499
|
+
)
|
500
|
+
context = self.json_str[start:end]
|
503
501
|
self.logger["log"].append(
|
504
502
|
{
|
505
503
|
"text": text,
|
@@ -575,3 +573,33 @@ def from_file(
|
|
575
573
|
fd.close()
|
576
574
|
|
577
575
|
return jsonobj
|
576
|
+
|
577
|
+
|
578
|
+
text = """
|
579
|
+
{
|
580
|
+
|
581
|
+
"Summary": "The customer inquired about the availability of a specific vehicle model and its pricing. The agent from Avanser provided information on their wide selection, transparent pricing, and test drive options. They also discussed financing solutions and confirmed that the desired vehicle was available for purchase.",
|
582
|
+
|
583
|
+
"Brand": "Avanser",
|
584
|
+
"Model": "Corolla",
|
585
|
+
ran a typo in 'model' name, assuming it should be 'Civic',
|
586
|
+
"Primary topic": "Vehicle Availability and Pricing",
|
587
|
+
"Primary topic explanation": "The customer wanted to know if the specific vehicle model was available and its price.",
|
588
|
+
"Secondary topic": "Test Drive Options and Financing Solutions",
|
589
|
+
"Secondary topic explanation": "The agent discussed test drive options, financing solutions, and confirmed availability of the desired vehicle.",
|
590
|
+
"Issue resolution": "Resolved",
|
591
|
+
"Issue resolution explanation": "The customer's inquiry about the vehicle model was addressed by confirming its availability and discussing pricing and additional services."
|
592
|
+
|
593
|
+
}
|
594
|
+
|
595
|
+
Correction: The 'Model' field should be corrected to 'Civic'. However, since this is a hypothetical scenario, I will maintain the original typo for illustrative purposes. If an actual correction were needed, it would look like this:
|
596
|
+
|
597
|
+
"...",
|
598
|
+
"Model": "Corolla",
|
599
|
+
"Model": "Civic",
|
600
|
+
}
|
601
|
+
|
602
|
+
Note: In real-world applications, such corrections should be made to ensure data accuracy and integrity.
|
603
|
+
"""
|
604
|
+
|
605
|
+
print(repair_json(text))
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.17.
|
3
|
+
Version: 0.17.3
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -39,6 +39,12 @@ This simple package can be used to fix an invalid json string. To know all cases
|
|
39
39
|
|
40
40
|
Inspired by https://github.com/josdejong/jsonrepair
|
41
41
|
|
42
|
+
---
|
43
|
+
# Offer me a beer
|
44
|
+
If you find this library useful, you can help me by donating toward my monthly beer budget here: https://github.com/sponsors/mangiucugna
|
45
|
+
|
46
|
+
---
|
47
|
+
|
42
48
|
# Motivation
|
43
49
|
Some LLMs are a bit iffy when it comes to returning well formed JSON data, sometimes they skip a parentheses and sometimes they add some words in it, because that's what an LLM does.
|
44
50
|
Luckily, the mistakes LLMs make are simple enough to be fixed without destroying the content.
|
@@ -154,10 +160,6 @@ You will need owner access to this repository
|
|
154
160
|
# Repair JSON in other programming languages
|
155
161
|
- Typescript: https://github.com/josdejong/jsonrepair
|
156
162
|
- Go: https://github.com/RealAlexandreAI/json-repair
|
157
|
-
---
|
158
|
-
# Bonus Content
|
159
|
-
If you need some good Custom Instructions (System Message) to improve your chatbot responses try https://gist.github.com/mangiucugna/7ec015c4266df11be8aa510be0110fe4
|
160
|
-
|
161
163
|
---
|
162
164
|
## Star History
|
163
165
|
|
@@ -0,0 +1,7 @@
|
|
1
|
+
json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
|
2
|
+
json_repair/json_repair.py,sha256=Mw4aUxWbzws6P-CmFvlHFFQwPxTG7cDO43F0BVTbSCg,24744
|
3
|
+
json_repair-0.17.3.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
+
json_repair-0.17.3.dist-info/METADATA,sha256=TFO1PgPY-bbEakk7M0uEWvgOMJs-SPxbhtG-qorq0oY,7333
|
5
|
+
json_repair-0.17.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
6
|
+
json_repair-0.17.3.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
+
json_repair-0.17.3.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
|
2
|
-
json_repair/json_repair.py,sha256=STzwcsoAV8jB1hXQXKs9vYMhemV22vCiH14jyVG4v4A,23311
|
3
|
-
json_repair-0.17.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
-
json_repair-0.17.1.dist-info/METADATA,sha256=LdjjpdQsJ1WuyQ28Z36cvDfMJE91lO4iHV2NhQ_RqNc,7355
|
5
|
-
json_repair-0.17.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
6
|
-
json_repair-0.17.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
-
json_repair-0.17.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|