json-repair 0.23.1__py3-none-any.whl → 0.25.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -91,6 +91,10 @@ class JSONParser:
91
91
  ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
92
92
  json = self.parse_json()
93
93
  if self.index < len(self.json_str):
94
+ self.log(
95
+ "The parser returned early, checking if there's more json elements",
96
+ "info",
97
+ )
94
98
  json = [json]
95
99
  last_index = self.index
96
100
  while self.index < len(self.json_str):
@@ -100,10 +104,13 @@ class JSONParser:
100
104
  if self.index == last_index:
101
105
  self.index += 1
102
106
  last_index = self.index
107
+ # If nothing extra was found, don't return an array
103
108
  if len(json) == 1:
109
+ self.log(
110
+ "There were no more elements, returning the element without the array",
111
+ "info",
112
+ )
104
113
  json = json[0]
105
- elif len(json) == 0:
106
- json = ""
107
114
  if self.logger.log_level == "none":
108
115
  return json
109
116
  else:
@@ -363,9 +370,34 @@ class JSONParser:
363
370
  if self.get_context() == "object_key" and (
364
371
  char == ":" or char.isspace()
365
372
  ):
373
+ self.log(
374
+ "While parsing a string missing the left delimiter in object key context, we found a :, stopping here",
375
+ "info",
376
+ )
366
377
  break
367
378
  elif self.get_context() == "object_value" and char in [",", "}"]:
368
- break
379
+ rstring_delimiter_missing = True
380
+ # check if this is a case in which the closing comma is NOT missing instead
381
+ i = 1
382
+ next_c = self.get_char_at(i)
383
+ while next_c and next_c != rstring_delimiter:
384
+ i += 1
385
+ next_c = self.get_char_at(i)
386
+ if next_c:
387
+ i += 1
388
+ next_c = self.get_char_at(i)
389
+ # found a delimiter, now we need to check that is followed strictly by a comma or brace
390
+ while next_c and next_c.isspace():
391
+ i += 1
392
+ next_c = self.get_char_at(i)
393
+ if next_c and next_c in [",", "}"]:
394
+ rstring_delimiter_missing = False
395
+ if rstring_delimiter_missing:
396
+ self.log(
397
+ "While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn't determine that a right delimiter was present. Stopping here",
398
+ "info",
399
+ )
400
+ break
369
401
  string_acc += char
370
402
  self.index += 1
371
403
  char = self.get_char_at()
@@ -386,6 +418,33 @@ class JSONParser:
386
418
  "While parsing a string, we found a doubled quote, ignoring it",
387
419
  "info",
388
420
  )
421
+ elif missing_quotes and self.get_context() == "object_value":
422
+ # In case of missing starting quote I need to check if the delimeter is the end or the beginning of a key
423
+ i = 1
424
+ next_c = self.get_char_at(i)
425
+ while next_c and next_c not in [
426
+ rstring_delimiter,
427
+ lstring_delimiter,
428
+ ]:
429
+ i += 1
430
+ next_c = self.get_char_at(i)
431
+ if next_c:
432
+ # We found a quote, now let's make sure there's a ":" following
433
+ i += 1
434
+ next_c = self.get_char_at(i)
435
+ # found a delimiter, now we need to check that is followed strictly by a comma or brace
436
+ while next_c and next_c.isspace():
437
+ i += 1
438
+ next_c = self.get_char_at(i)
439
+ if next_c and next_c == ":":
440
+ # Reset the cursor
441
+ self.index -= 1
442
+ char = self.get_char_at()
443
+ self.log(
444
+ "In a string with missing quotes and object value context, I found a delimeter but it turns out it was the beginning on the next key. Stopping here.",
445
+ "info",
446
+ )
447
+ break
389
448
  else:
390
449
  # Check if eventually there is a rstring delimiter, otherwise we bail
391
450
  i = 1
@@ -496,7 +555,8 @@ class JSONParser:
496
555
  number_str = ""
497
556
  number_chars = set("0123456789-.eE/,")
498
557
  char = self.get_char_at()
499
- while char and char in number_chars:
558
+ is_array = self.get_context() == "array"
559
+ while char and char in number_chars and (char != "," or not is_array):
500
560
  number_str += char
501
561
  self.index += 1
502
562
  char = self.get_char_at()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.23.1
3
+ Version: 0.25.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -45,6 +45,11 @@ If you find this library useful, you can help me by donating toward my monthly b
45
45
 
46
46
  ---
47
47
 
48
+ # Demo
49
+ If you are unsure if this library will fix your specific problem, or simply want your json validated online, you can visit the demo site on GitHub pages: https://mangiucugna.github.io/json_repair/
50
+
51
+ ---
52
+
48
53
  # Motivation
49
54
  Some LLMs are a bit iffy when it comes to returning well formed JSON data, sometimes they skip a parentheses and sometimes they add some words in it, because that's what an LLM does.
50
55
  Luckily, the mistakes LLMs make are simple enough to be fixed without destroying the content.
@@ -160,6 +165,7 @@ You will need owner access to this repository
160
165
  # Repair JSON in other programming languages
161
166
  - Typescript: https://github.com/josdejong/jsonrepair
162
167
  - Go: https://github.com/RealAlexandreAI/json-repair
168
+ - Ruby: https://github.com/sashazykov/json-repair-rb
163
169
  ---
164
170
  ## Star History
165
171
 
@@ -0,0 +1,7 @@
1
+ json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
+ json_repair/json_repair.py,sha256=RyxtqoMopJvEwVestHDP_D6FnVkSt3bmmr1liYOQwHE,30976
3
+ json_repair-0.25.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
+ json_repair-0.25.0.dist-info/METADATA,sha256=K6rBtBRbVuRQICWknDvDm3OnNq_bGrwT7K6U99PPYmg,7596
5
+ json_repair-0.25.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
+ json_repair-0.25.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
+ json_repair-0.25.0.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
- json_repair/json_repair.py,sha256=3OUrtLLPc0fK0_U6H5-2wEZp1I6GqF5u9K5It6VOIzM,27629
3
- json_repair-0.23.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
- json_repair-0.23.1.dist-info/METADATA,sha256=ujJO8L3zXSI8SRrZSF8qkcAaM8aj7XHWgx88x_OAnmA,7333
5
- json_repair-0.23.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
- json_repair-0.23.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
- json_repair-0.23.1.dist-info/RECORD,,