json-repair 0.27.1__py3-none-any.whl → 0.28.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -51,9 +51,6 @@ class StringFileWrapper:
51
51
  self.fd.seek(current_position)
52
52
  return self.length
53
53
 
54
- def __setitem__(self) -> None:
55
- raise Exception("This is read-only!")
56
-
57
54
 
58
55
  class LoggerConfig:
59
56
  # This is a type class to simplify the declaration
@@ -179,21 +176,12 @@ class JSONParser:
179
176
 
180
177
  # <member> starts with a <string>
181
178
  key = ""
182
- while key == "" and self.get_char_at():
183
- current_index = self.index
179
+ while self.get_char_at():
184
180
  key = self.parse_string()
185
181
 
186
- # This can happen sometimes like { "": "value" }
187
- if key == "" and self.get_char_at() == ":":
188
- key = "empty_placeholder"
189
- self.log(
190
- "While parsing an object we found an empty key, replacing with empty_placeholder",
191
- "info",
192
- )
182
+ if key != "" or (key == "" and self.get_char_at() == ":"):
183
+ # If the string is empty but there is a object divider, we are done here
193
184
  break
194
- elif key == "" and self.index == current_index:
195
- # Sometimes the string search might not move the index at all, that might lead us to an infinite loop
196
- self.index += 1
197
185
 
198
186
  self.skip_whitespaces_at()
199
187
 
@@ -226,13 +214,6 @@ class JSONParser:
226
214
  # Remove trailing spaces
227
215
  self.skip_whitespaces_at()
228
216
 
229
- # Especially at the end of an LLM generated json you might miss the last "}"
230
- if (self.get_char_at() or "}") != "}":
231
- self.log(
232
- "While parsing an object, we couldn't find the closing }, ignoring",
233
- "info",
234
- )
235
-
236
217
  self.index += 1
237
218
  return obj
238
219
 
@@ -261,13 +242,6 @@ class JSONParser:
261
242
  while char and (char.isspace() or char == ","):
262
243
  self.index += 1
263
244
  char = self.get_char_at()
264
- # If this is the right value of an object and we are closing the object, it means the array is over
265
- if self.get_context() == "object_value" and char == "}":
266
- self.log(
267
- "While parsing an array inside an object, we got to the end without finding a ]. Stopped parsing",
268
- "info",
269
- )
270
- break
271
245
 
272
246
  # Especially at the end of an LLM generated json you might miss the last "]"
273
247
  char = self.get_char_at()
@@ -275,14 +249,6 @@ class JSONParser:
275
249
  self.log(
276
250
  "While parsing an array we missed the closing ], adding it back", "info"
277
251
  )
278
- # Sometimes when you fix a missing "]" you'll have a trailing "," there that makes the JSON invalid
279
- if char == ",":
280
- # Remove trailing "," before adding the "]"
281
- self.log(
282
- "While parsing an array, found a trailing , before adding ]",
283
- "info",
284
- )
285
-
286
252
  self.index -= 1
287
253
 
288
254
  self.index += 1
@@ -337,7 +303,11 @@ class JSONParser:
337
303
 
338
304
  # There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
339
305
  if self.get_char_at() == lstring_delimiter:
340
- # This is a valid exception only if it's closed by a double delimiter again
306
+ # If it's an empty key, this was easy
307
+ if self.get_context() == "object_key" and self.get_char_at(1) == ":":
308
+ self.index += 1
309
+ return ""
310
+ # Find the next delimiter
341
311
  i = 1
342
312
  next_c = self.get_char_at(i)
343
313
  while next_c and next_c != rstring_delimiter:
@@ -429,6 +399,7 @@ class JSONParser:
429
399
  "While parsing a string, we found a doubled quote, ignoring it",
430
400
  "info",
431
401
  )
402
+ self.index += 1
432
403
  elif missing_quotes and self.get_context() == "object_value":
433
404
  # In case of missing starting quote I need to check if the delimeter is the end or the beginning of a key
434
405
  i = 1
@@ -467,7 +438,7 @@ class JSONParser:
467
438
  ]:
468
439
  # This is a bit of a weird workaround, essentially in object_value context we don't always break on commas
469
440
  # This is because the routine after will make sure to correct any bad guess and this solves a corner case
470
- if next_c.isalpha():
441
+ if check_comma_in_object_value and next_c.isalpha():
471
442
  check_comma_in_object_value = False
472
443
  # If we are in an object context, let's check for the right delimiters
473
444
  if (
@@ -575,22 +546,18 @@ class JSONParser:
575
546
  # The number ends with a non valid character for a number/currency, rolling back one
576
547
  number_str = number_str[:-1]
577
548
  self.index -= 1
578
- if number_str:
579
- try:
580
- if "," in number_str:
581
- return str(number_str)
582
- if "." in number_str or "e" in number_str or "E" in number_str:
583
- return float(number_str)
584
- elif number_str == "-":
585
- # If there is a stray "-" this will throw an exception, throw away this character
586
- return self.parse_json()
587
- else:
588
- return int(number_str)
589
- except ValueError:
590
- return number_str
591
- else:
592
- # If nothing works, let's skip and keep parsing
593
- return self.parse_json()
549
+ try:
550
+ if "," in number_str:
551
+ return str(number_str)
552
+ if "." in number_str or "e" in number_str or "E" in number_str:
553
+ return float(number_str)
554
+ elif number_str == "-":
555
+ # If there is a stray "-" this will throw an exception, throw away this character
556
+ return self.parse_json()
557
+ else:
558
+ return int(number_str)
559
+ except ValueError:
560
+ return number_str
594
561
 
595
562
  def parse_boolean_or_null(self) -> Union[bool, str, None]:
596
563
  # <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
@@ -644,16 +611,10 @@ class JSONParser:
644
611
  self.context.append(value)
645
612
 
646
613
  def reset_context(self) -> None:
647
- try:
648
- self.context.pop()
649
- except Exception:
650
- return
614
+ self.context.pop()
651
615
 
652
616
  def get_context(self) -> str:
653
- try:
654
- return self.context[-1]
655
- except Exception:
656
- return ""
617
+ return self.context[-1]
657
618
 
658
619
  def log(self, text: str, level: str) -> None:
659
620
  if level == self.logger.log_level:
json_repair/py.typed ADDED
File without changes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.27.1
3
+ Version: 0.28.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -27,6 +27,7 @@ License: MIT License
27
27
 
28
28
  Project-URL: Homepage, https://github.com/mangiucugna/json_repair/
29
29
  Project-URL: Bug Tracker, https://github.com/mangiucugna/json_repair/issues
30
+ Project-URL: Live demo, https://mangiucugna.github.io/json_repair/
30
31
  Keywords: JSON,REPAIR,LLM,PARSER
31
32
  Classifier: Programming Language :: Python :: 3
32
33
  Classifier: License :: OSI Approved :: MIT License
@@ -38,7 +39,7 @@ License-File: LICENSE
38
39
  [![PyPI](https://img.shields.io/pypi/v/json-repair)](https://pypi.org/project/json-repair/)
39
40
  ![Python version](https://img.shields.io/badge/python-3.8+-important)
40
41
  [![PyPI downloads](https://img.shields.io/pypi/dm/json-repair)](https://pypi.org/project/json-repair/)
41
-
42
+ [![Github Sponsors](https://img.shields.io/github/sponsors/mangiucugna)](https://github.com/sponsors/mangiucugna)
42
43
 
43
44
  This simple package can be used to fix an invalid json string. To know all cases in which this package will work, check out the unit test.
44
45
 
@@ -0,0 +1,8 @@
1
+ json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
+ json_repair/json_repair.py,sha256=5WjVoNO7Grdq9wfnbSjggiVlDv2XVcvWbCIQVmREP38,30109
3
+ json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ json_repair-0.28.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
5
+ json_repair-0.28.0.dist-info/METADATA,sha256=rPrWno-My7ZQt7EJIR0BodmpLQ84fr0vjJwBM4XYmYU,8043
6
+ json_repair-0.28.0.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
7
+ json_repair-0.28.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
8
+ json_repair-0.28.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (72.1.0)
2
+ Generator: setuptools (72.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,7 +0,0 @@
1
- json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
- json_repair/json_repair.py,sha256=54sOesew2z21WAd4UVaEYOLI8oUkSqF8Z4HbkYBdCj8,31792
3
- json_repair-0.27.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
- json_repair-0.27.1.dist-info/METADATA,sha256=z1CIA9xV6Cnd7Ko49bZ1snzdwJHK-ev8vKCfhV62vzo,7863
5
- json_repair-0.27.1.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
6
- json_repair-0.27.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
- json_repair-0.27.1.dist-info/RECORD,,