json-repair 0.17.0__py3-none-any.whl → 0.17.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -227,6 +227,10 @@ class JSONParser:
227
227
  self.index += 1
228
228
  char = self.get_char_at()
229
229
 
230
+ if not char:
231
+ # This is an empty string
232
+ return ""
233
+
230
234
  # Ensuring we use the right delimiter
231
235
  if char == "'":
232
236
  lstring_delimiter = rstring_delimiter = "'"
@@ -298,16 +302,6 @@ class JSONParser:
298
302
  string_acc += char
299
303
  self.index += 1
300
304
  char = self.get_char_at()
301
- # If the string contains an escaped character we should respect that or remove the escape
302
- if self.get_char_at(-1) == "\\":
303
- if char in [rstring_delimiter, "t", "n", "r", "b", "\\"]:
304
- string_acc += char
305
- self.index += 1
306
- char = self.get_char_at()
307
- else:
308
- # Remove this character from the final output
309
- string_acc = string_acc[:-2] + string_acc[-1:]
310
- self.index -= 1
311
305
  # ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
312
306
  if char == rstring_delimiter:
313
307
  # Special case here, in case of double quotes one after another
@@ -431,28 +425,40 @@ class JSONParser:
431
425
  return ""
432
426
 
433
427
  def get_char_at(self, count: int = 0) -> Union[str, bool]:
434
- if self.json_fd:
435
- self.json_fd.seek(self.index + count)
436
- char = self.json_fd.read(1)
437
- if char == "":
438
- return False
439
- return char
440
- else:
441
- # Why not use something simpler? Because we might be out of bounds and doing this check all the time is annoying
442
- try:
443
- return self.json_str[self.index + count]
444
- except IndexError:
428
+ # Why not use something simpler? Because try/except in python is a faster alternative to an "if" statement that is often True
429
+ try:
430
+ return self.json_str[self.index + count]
431
+ except IndexError:
432
+ if self.json_fd:
433
+ self.json_fd.seek(self.index + count)
434
+ char = self.json_fd.read(1)
435
+ if char == "":
436
+ return False
437
+ return char
438
+ else:
445
439
  return False
446
440
 
447
441
  def skip_whitespaces_at(self) -> None:
448
442
  """
449
443
  This function quickly iterates on whitespaces, syntactic sugar to make the code more concise
450
444
  """
451
-
452
- char = self.get_char_at()
453
- while char and char.isspace():
454
- self.index += 1
445
+ if self.json_fd:
455
446
  char = self.get_char_at()
447
+ while char and char.isspace():
448
+ self.index += 1
449
+ char = self.get_char_at()
450
+ else:
451
+ # If this is not a file stream, we do this monster here to make this function much much faster
452
+ try:
453
+ char = self.json_str[self.index]
454
+ except IndexError:
455
+ return
456
+ while char.isspace():
457
+ self.index += 1
458
+ try:
459
+ char = self.json_str[self.index]
460
+ except IndexError:
461
+ return
456
462
 
457
463
  def set_context(self, value: str) -> None:
458
464
  # If a value is provided update the context variable and save in stack
@@ -479,11 +485,17 @@ class JSONParser:
479
485
  context = self.json_fd.read(self.logger["window"] * 2)
480
486
  self.json_fd.seek(self.index)
481
487
  else:
482
- context = self.json_str[
483
- self.index
484
- - self.logger["window"] : self.index
485
- + self.logger["window"]
486
- ]
488
+ start = (
489
+ self.index - self.logger["window"]
490
+ if (self.index - self.logger["window"]) >= 0
491
+ else 0
492
+ )
493
+ end = (
494
+ self.index + self.logger["window"]
495
+ if (self.index + self.logger["window"]) <= len(self.json_str)
496
+ else len(self.json_str)
497
+ )
498
+ context = self.json_str[start:end]
487
499
  self.logger["log"].append(
488
500
  {
489
501
  "text": text,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.17.0
3
+ Version: 0.17.2
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -39,6 +39,12 @@ This simple package can be used to fix an invalid json string. To know all cases
39
39
 
40
40
  Inspired by https://github.com/josdejong/jsonrepair
41
41
 
42
+ ---
43
+ # Offer me a beer
44
+ If you find this library useful, you can help me by donating toward my monthly beer budget here: https://github.com/sponsors/mangiucugna
45
+
46
+ ---
47
+
42
48
  # Motivation
43
49
  Some LLMs are a bit iffy when it comes to returning well formed JSON data, sometimes they skip a parentheses and sometimes they add some words in it, because that's what an LLM does.
44
50
  Luckily, the mistakes LLMs make are simple enough to be fixed without destroying the content.
@@ -154,10 +160,6 @@ You will need owner access to this repository
154
160
  # Repair JSON in other programming languages
155
161
  - Typescript: https://github.com/josdejong/jsonrepair
156
162
  - Go: https://github.com/RealAlexandreAI/json-repair
157
- ---
158
- # Bonus Content
159
- If you need some good Custom Instructions (System Message) to improve your chatbot responses try https://gist.github.com/mangiucugna/7ec015c4266df11be8aa510be0110fe4
160
-
161
163
  ---
162
164
  ## Star History
163
165
 
@@ -0,0 +1,7 @@
1
+ json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
+ json_repair/json_repair.py,sha256=NeAKskIWOewTpIDY3Z6wxYkDHjrNYHNuF4AFO-xBggI,23052
3
+ json_repair-0.17.2.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
+ json_repair-0.17.2.dist-info/METADATA,sha256=5YoWE0n-iKY2nbZ0D9OQ8QwfA6zZIYVWUkLsYQmxrqo,7333
5
+ json_repair-0.17.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
+ json_repair-0.17.2.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
+ json_repair-0.17.2.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
- json_repair/json_repair.py,sha256=BeThFqLxt2-ln7SA3UPBI47VyxnV2MVDFEaolIOCiNU,22721
3
- json_repair-0.17.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
- json_repair-0.17.0.dist-info/METADATA,sha256=F2KOc46utpaJg3eqlw8oaWhisj3rDnHk-k-5miiWL3U,7355
5
- json_repair-0.17.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
- json_repair-0.17.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
- json_repair-0.17.0.dist-info/RECORD,,