json-repair 0.17.1__py3-none-any.whl → 0.17.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -113,6 +113,7 @@ class JSONParser:
113
113
  # <member> starts with a <string>
114
114
  key = ""
115
115
  while key == "" and self.get_char_at():
116
+ current_index = self.index
116
117
  key = self.parse_string()
117
118
 
118
119
  # This can happen sometimes like { "": "value" }
@@ -123,7 +124,8 @@ class JSONParser:
123
124
  "info",
124
125
  )
125
126
  break
126
- elif key == "":
127
+ elif key == "" and self.index == current_index:
128
+ # Sometimes the string search might not move the index at all, that might lead us to an infinite loop
127
129
  self.index += 1
128
130
 
129
131
  # We reached the end here
@@ -302,16 +304,6 @@ class JSONParser:
302
304
  string_acc += char
303
305
  self.index += 1
304
306
  char = self.get_char_at()
305
- # If the string contains an escaped character we should respect that or remove the escape
306
- if self.get_char_at(-1) == "\\":
307
- if char in [rstring_delimiter, "t", "n", "r", "b", "\\"]:
308
- string_acc += char
309
- self.index += 1
310
- char = self.get_char_at()
311
- else:
312
- # Remove this character from the final output
313
- string_acc = string_acc[:-2] + string_acc[-1:]
314
- self.index -= 1
315
307
  # ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
316
308
  if char == rstring_delimiter:
317
309
  # Special case here, in case of double quotes one after another
@@ -495,11 +487,17 @@ class JSONParser:
495
487
  context = self.json_fd.read(self.logger["window"] * 2)
496
488
  self.json_fd.seek(self.index)
497
489
  else:
498
- context = self.json_str[
499
- self.index
500
- - self.logger["window"] : self.index
501
- + self.logger["window"]
502
- ]
490
+ start = (
491
+ self.index - self.logger["window"]
492
+ if (self.index - self.logger["window"]) >= 0
493
+ else 0
494
+ )
495
+ end = (
496
+ self.index + self.logger["window"]
497
+ if (self.index + self.logger["window"]) <= len(self.json_str)
498
+ else len(self.json_str)
499
+ )
500
+ context = self.json_str[start:end]
503
501
  self.logger["log"].append(
504
502
  {
505
503
  "text": text,
@@ -575,3 +573,33 @@ def from_file(
575
573
  fd.close()
576
574
 
577
575
  return jsonobj
576
+
577
+
578
+ text = """
579
+ {
580
+
581
+ "Summary": "The customer inquired about the availability of a specific vehicle model and its pricing. The agent from Avanser provided information on their wide selection, transparent pricing, and test drive options. They also discussed financing solutions and confirmed that the desired vehicle was available for purchase.",
582
+
583
+ "Brand": "Avanser",
584
+ "Model": "Corolla",
585
+ ran a typo in 'model' name, assuming it should be 'Civic',
586
+ "Primary topic": "Vehicle Availability and Pricing",
587
+ "Primary topic explanation": "The customer wanted to know if the specific vehicle model was available and its price.",
588
+ "Secondary topic": "Test Drive Options and Financing Solutions",
589
+ "Secondary topic explanation": "The agent discussed test drive options, financing solutions, and confirmed availability of the desired vehicle.",
590
+ "Issue resolution": "Resolved",
591
+ "Issue resolution explanation": "The customer's inquiry about the vehicle model was addressed by confirming its availability and discussing pricing and additional services."
592
+
593
+ }
594
+
595
+ Correction: The 'Model' field should be corrected to 'Civic'. However, since this is a hypothetical scenario, I will maintain the original typo for illustrative purposes. If an actual correction were needed, it would look like this:
596
+
597
+ "...",
598
+ "Model": "Corolla",
599
+ "Model": "Civic",
600
+ }
601
+
602
+ Note: In real-world applications, such corrections should be made to ensure data accuracy and integrity.
603
+ """
604
+
605
+ print(repair_json(text))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.17.1
3
+ Version: 0.17.3
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -39,6 +39,12 @@ This simple package can be used to fix an invalid json string. To know all cases
39
39
 
40
40
  Inspired by https://github.com/josdejong/jsonrepair
41
41
 
42
+ ---
43
+ # Offer me a beer
44
+ If you find this library useful, you can help me by donating toward my monthly beer budget here: https://github.com/sponsors/mangiucugna
45
+
46
+ ---
47
+
42
48
  # Motivation
43
49
  Some LLMs are a bit iffy when it comes to returning well formed JSON data, sometimes they skip a parentheses and sometimes they add some words in it, because that's what an LLM does.
44
50
  Luckily, the mistakes LLMs make are simple enough to be fixed without destroying the content.
@@ -154,10 +160,6 @@ You will need owner access to this repository
154
160
  # Repair JSON in other programming languages
155
161
  - Typescript: https://github.com/josdejong/jsonrepair
156
162
  - Go: https://github.com/RealAlexandreAI/json-repair
157
- ---
158
- # Bonus Content
159
- If you need some good Custom Instructions (System Message) to improve your chatbot responses try https://gist.github.com/mangiucugna/7ec015c4266df11be8aa510be0110fe4
160
-
161
163
  ---
162
164
  ## Star History
163
165
 
@@ -0,0 +1,7 @@
1
+ json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
+ json_repair/json_repair.py,sha256=Mw4aUxWbzws6P-CmFvlHFFQwPxTG7cDO43F0BVTbSCg,24744
3
+ json_repair-0.17.3.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
+ json_repair-0.17.3.dist-info/METADATA,sha256=TFO1PgPY-bbEakk7M0uEWvgOMJs-SPxbhtG-qorq0oY,7333
5
+ json_repair-0.17.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
+ json_repair-0.17.3.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
+ json_repair-0.17.3.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
- json_repair/json_repair.py,sha256=STzwcsoAV8jB1hXQXKs9vYMhemV22vCiH14jyVG4v4A,23311
3
- json_repair-0.17.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
- json_repair-0.17.1.dist-info/METADATA,sha256=LdjjpdQsJ1WuyQ28Z36cvDfMJE91lO4iHV2NhQ_RqNc,7355
5
- json_repair-0.17.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
- json_repair-0.17.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
- json_repair-0.17.1.dist-info/RECORD,,