json-repair 0.39.0__py3-none-any.whl → 0.39.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,7 @@ JSONReturnType = Union[Dict[str, Any], List[Any], str, float, int, bool, None]
9
9
  class JSONParser:
10
10
  # Constants
11
11
  STRING_DELIMITERS = ['"', "'", "“", "”"]
12
+ NUMBER_CHARS = set("0123456789-.eE/,")
12
13
 
13
14
  def __init__(
14
15
  self,
@@ -129,8 +130,6 @@ class JSONParser:
129
130
  # Context is used in the string parser to manage the lack of quotes
130
131
  self.context.set(ContextValues.OBJECT_KEY)
131
132
 
132
- self.skip_whitespaces_at()
133
-
134
133
  # Save this index in case we need find a duplicate key
135
134
  rollback_index = self.index
136
135
 
@@ -219,18 +218,13 @@ class JSONParser:
219
218
  char = self.get_char_at()
220
219
 
221
220
  # Especially at the end of an LLM generated json you might miss the last "]"
222
- char = self.get_char_at()
223
221
  if char and char != "]":
224
222
  self.log(
225
- "While parsing an array we missed the closing ], adding it back",
226
- )
227
- self.index -= 1
228
- # Add the missing closing bracket
229
- self.json_str = (
230
- self.json_str[: self.index + 1] + "]" + self.json_str[self.index + 1 :]
223
+ "While parsing an array we missed the closing ], ignoring it",
231
224
  )
232
225
 
233
226
  self.index += 1
227
+
234
228
  self.context.reset()
235
229
  return arr
236
230
 
@@ -275,15 +269,11 @@ class JSONParser:
275
269
  self.log(
276
270
  "While parsing a string, we found a literal instead of a quote",
277
271
  )
278
- self.log(
279
- "While parsing a string, we found no starting quote. Will add the quote back",
280
- )
281
272
  missing_quotes = True
282
273
 
283
274
  if not missing_quotes:
284
275
  self.index += 1
285
276
 
286
- self.skip_whitespaces_at()
287
277
  # There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
288
278
  if self.get_char_at() in self.STRING_DELIMITERS:
289
279
  # If the next character is the same type of quote, then we manage it as double quotes
@@ -583,6 +573,13 @@ class JSONParser:
583
573
  elif (
584
574
  next_c == rstring_delimiter and self.get_char_at(i - 1) != "\\"
585
575
  ):
576
+ # Check if self.index:self.index+i is only whitespaces, break if that's the case
577
+ if all(
578
+ str(self.get_char_at(j)).isspace()
579
+ for j in range(1, i)
580
+ if self.get_char_at(j)
581
+ ):
582
+ break
586
583
  if self.context.current == ContextValues.OBJECT_VALUE:
587
584
  # But this might not be it! This could be just a missing comma
588
585
  # We found a delimiter and we need to check if this is a key
@@ -610,26 +607,16 @@ class JSONParser:
610
607
  self.index += 1
611
608
  char = self.get_char_at()
612
609
  elif self.context.current == ContextValues.ARRAY:
613
- # In array context this could be something like "lorem "ipsum" sic"
614
- # So let's check if we find a rstring_delimiter forward otherwise end early
615
- i = self.skip_to_character(rstring_delimiter, idx=i + 1)
616
- next_c = self.get_char_at(i)
617
- if next_c and next_c == rstring_delimiter:
618
- # Ok now if I find a comma or a closing ], that can be have also an optional rstring_delimiter before them
619
- # We can consider this a misplaced quote
620
- i += 1
621
- i = self.skip_whitespaces_at(
622
- idx=i, move_main_index=False
623
- )
624
- next_c = self.get_char_at(i)
625
- if next_c and next_c in [",", "]"]:
626
- self.log(
627
- "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
628
- )
629
- unmatched_delimiter = not unmatched_delimiter
630
- string_acc += str(char)
631
- self.index += 1
632
- char = self.get_char_at()
610
+ # If we got up to here it means that this is a situation like this:
611
+ # ["bla bla bla "puppy" bla bla bla "kitty" bla bla"]
612
+ # So we need to ignore this quote
613
+ self.log(
614
+ "While parsing a string in Array context, we detected a quoted section that would have closed the string but has a different meaning here, ignoring it",
615
+ )
616
+ unmatched_delimiter = not unmatched_delimiter
617
+ string_acc += str(char)
618
+ self.index += 1
619
+ char = self.get_char_at()
633
620
 
634
621
  if (
635
622
  char
@@ -663,10 +650,9 @@ class JSONParser:
663
650
  def parse_number(self) -> Union[float, int, str, JSONReturnType]:
664
651
  # <number> is a valid real number expressed in one of a number of given formats
665
652
  number_str = ""
666
- number_chars = set("0123456789-.eE/,")
667
653
  char = self.get_char_at()
668
654
  is_array = self.context.current == ContextValues.ARRAY
669
- while char and char in number_chars and (char != "," or not is_array):
655
+ while char and char in self.NUMBER_CHARS and (not is_array or char != ","):
670
656
  number_str += char
671
657
  self.index += 1
672
658
  char = self.get_char_at()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: json_repair
3
- Version: 0.39.0
3
+ Version: 0.39.1
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -196,12 +196,12 @@ pipx install json-repair
196
196
  to know all options available:
197
197
  ```
198
198
  $ json_repair -h
199
- usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] filename
199
+ usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] [filename]
200
200
 
201
201
  Repair and parse JSON files.
202
202
 
203
203
  positional arguments:
204
- filename The JSON file to repair
204
+ filename The JSON file to repair (if omitted, reads from stdin)
205
205
 
206
206
  options:
207
207
  -h, --help show this help message and exit
@@ -226,13 +226,13 @@ In this example, any version that starts with `0.` will be acceptable, allowing
226
226
  # How to cite
227
227
  If you are using this library in your academic work (as I know many folks are) please find the BibTex here:
228
228
 
229
- @software{Baccianella_JSON_Repair_-_2024,
229
+ @software{Baccianella_JSON_Repair_-_2025,
230
230
  author = {Baccianella, Stefano},
231
- month = aug,
231
+ month = feb,
232
232
  title = {{JSON Repair - A python module to repair invalid JSON, commonly used to parse the output of LLMs}},
233
233
  url = {https://github.com/mangiucugna/json_repair},
234
- version = {0.28.3},
235
- year = {2024}
234
+ version = {0.39.0},
235
+ year = {2025}
236
236
  }
237
237
 
238
238
  Thank you for citing my work and please send me a link to the paper if you can!
@@ -1,13 +1,13 @@
1
1
  json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
2
2
  json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
3
  json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
4
- json_repair/json_parser.py,sha256=BQsH8CRy59C2176bMwVerfqbHDXfLoEC1v5frmCiv7M,39020
4
+ json_repair/json_parser.py,sha256=kt58S7pHxCOfqktzn48iMrvd3vi7HTfK6OD02PWwWcc,38189
5
5
  json_repair/json_repair.py,sha256=k-5HRRlCqrxNmJi0u1KE3IUeL4HXqi1XZ7oAL-NFDLo,10314
6
6
  json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  json_repair/string_file_wrapper.py,sha256=koZmdq2-Z5K7XF1bDqX6dEbNaVMJYcMTjq-aGe6NQvA,4526
8
- json_repair-0.39.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
9
- json_repair-0.39.0.dist-info/METADATA,sha256=cArvqcMBL9FVCwnJGtsaeF7lXWjOFWG3_1OueGjOiRs,11794
10
- json_repair-0.39.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
11
- json_repair-0.39.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
12
- json_repair-0.39.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
13
- json_repair-0.39.0.dist-info/RECORD,,
8
+ json_repair-0.39.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
9
+ json_repair-0.39.1.dist-info/METADATA,sha256=T1k1afyqqWG-NXYHpgntQsPYpmIdkLg72eSo_iNwZZk,11827
10
+ json_repair-0.39.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
11
+ json_repair-0.39.1.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
12
+ json_repair-0.39.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
13
+ json_repair-0.39.1.dist-info/RECORD,,