json-repair 0.27.1__py3-none-any.whl → 0.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -51,9 +51,6 @@ class StringFileWrapper:
51
51
  self.fd.seek(current_position)
52
52
  return self.length
53
53
 
54
- def __setitem__(self) -> None:
55
- raise Exception("This is read-only!")
56
-
57
54
 
58
55
  class LoggerConfig:
59
56
  # This is a type class to simplify the declaration
@@ -179,21 +176,12 @@ class JSONParser:
179
176
 
180
177
  # <member> starts with a <string>
181
178
  key = ""
182
- while key == "" and self.get_char_at():
183
- current_index = self.index
179
+ while self.get_char_at():
184
180
  key = self.parse_string()
185
181
 
186
- # This can happen sometimes like { "": "value" }
187
- if key == "" and self.get_char_at() == ":":
188
- key = "empty_placeholder"
189
- self.log(
190
- "While parsing an object we found an empty key, replacing with empty_placeholder",
191
- "info",
192
- )
182
+ if key != "" or (key == "" and self.get_char_at() == ":"):
183
+ # If the string is empty but there is a object divider, we are done here
193
184
  break
194
- elif key == "" and self.index == current_index:
195
- # Sometimes the string search might not move the index at all, that might lead us to an infinite loop
196
- self.index += 1
197
185
 
198
186
  self.skip_whitespaces_at()
199
187
 
@@ -226,13 +214,6 @@ class JSONParser:
226
214
  # Remove trailing spaces
227
215
  self.skip_whitespaces_at()
228
216
 
229
- # Especially at the end of an LLM generated json you might miss the last "}"
230
- if (self.get_char_at() or "}") != "}":
231
- self.log(
232
- "While parsing an object, we couldn't find the closing }, ignoring",
233
- "info",
234
- )
235
-
236
217
  self.index += 1
237
218
  return obj
238
219
 
@@ -261,13 +242,6 @@ class JSONParser:
261
242
  while char and (char.isspace() or char == ","):
262
243
  self.index += 1
263
244
  char = self.get_char_at()
264
- # If this is the right value of an object and we are closing the object, it means the array is over
265
- if self.get_context() == "object_value" and char == "}":
266
- self.log(
267
- "While parsing an array inside an object, we got to the end without finding a ]. Stopped parsing",
268
- "info",
269
- )
270
- break
271
245
 
272
246
  # Especially at the end of an LLM generated json you might miss the last "]"
273
247
  char = self.get_char_at()
@@ -275,14 +249,6 @@ class JSONParser:
275
249
  self.log(
276
250
  "While parsing an array we missed the closing ], adding it back", "info"
277
251
  )
278
- # Sometimes when you fix a missing "]" you'll have a trailing "," there that makes the JSON invalid
279
- if char == ",":
280
- # Remove trailing "," before adding the "]"
281
- self.log(
282
- "While parsing an array, found a trailing , before adding ]",
283
- "info",
284
- )
285
-
286
252
  self.index -= 1
287
253
 
288
254
  self.index += 1
@@ -337,7 +303,11 @@ class JSONParser:
337
303
 
338
304
  # There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
339
305
  if self.get_char_at() == lstring_delimiter:
340
- # This is a valid exception only if it's closed by a double delimiter again
306
+ # If it's an empty key, this was easy
307
+ if self.get_context() == "object_key" and self.get_char_at(1) == ":":
308
+ self.index += 1
309
+ return ""
310
+ # Find the next delimiter
341
311
  i = 1
342
312
  next_c = self.get_char_at(i)
343
313
  while next_c and next_c != rstring_delimiter:
@@ -429,6 +399,7 @@ class JSONParser:
429
399
  "While parsing a string, we found a doubled quote, ignoring it",
430
400
  "info",
431
401
  )
402
+ self.index += 1
432
403
  elif missing_quotes and self.get_context() == "object_value":
433
404
  # In case of missing starting quote I need to check if the delimeter is the end or the beginning of a key
434
405
  i = 1
@@ -467,7 +438,7 @@ class JSONParser:
467
438
  ]:
468
439
  # This is a bit of a weird workaround, essentially in object_value context we don't always break on commas
469
440
  # This is because the routine after will make sure to correct any bad guess and this solves a corner case
470
- if next_c.isalpha():
441
+ if check_comma_in_object_value and next_c.isalpha():
471
442
  check_comma_in_object_value = False
472
443
  # If we are in an object context, let's check for the right delimiters
473
444
  if (
@@ -575,22 +546,18 @@ class JSONParser:
575
546
  # The number ends with a non valid character for a number/currency, rolling back one
576
547
  number_str = number_str[:-1]
577
548
  self.index -= 1
578
- if number_str:
579
- try:
580
- if "," in number_str:
581
- return str(number_str)
582
- if "." in number_str or "e" in number_str or "E" in number_str:
583
- return float(number_str)
584
- elif number_str == "-":
585
- # If there is a stray "-" this will throw an exception, throw away this character
586
- return self.parse_json()
587
- else:
588
- return int(number_str)
589
- except ValueError:
590
- return number_str
591
- else:
592
- # If nothing works, let's skip and keep parsing
593
- return self.parse_json()
549
+ try:
550
+ if "," in number_str:
551
+ return str(number_str)
552
+ if "." in number_str or "e" in number_str or "E" in number_str:
553
+ return float(number_str)
554
+ elif number_str == "-":
555
+ # If there is a stray "-" this will throw an exception, throw away this character
556
+ return self.parse_json()
557
+ else:
558
+ return int(number_str)
559
+ except ValueError:
560
+ return number_str
594
561
 
595
562
  def parse_boolean_or_null(self) -> Union[bool, str, None]:
596
563
  # <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
@@ -644,16 +611,10 @@ class JSONParser:
644
611
  self.context.append(value)
645
612
 
646
613
  def reset_context(self) -> None:
647
- try:
648
- self.context.pop()
649
- except Exception:
650
- return
614
+ self.context.pop()
651
615
 
652
616
  def get_context(self) -> str:
653
- try:
654
- return self.context[-1]
655
- except Exception:
656
- return ""
617
+ return self.context[-1]
657
618
 
658
619
  def log(self, text: str, level: str) -> None:
659
620
  if level == self.logger.log_level:
json_repair/py.typed ADDED
File without changes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.27.1
3
+ Version: 0.28.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -27,6 +27,7 @@ License: MIT License
27
27
 
28
28
  Project-URL: Homepage, https://github.com/mangiucugna/json_repair/
29
29
  Project-URL: Bug Tracker, https://github.com/mangiucugna/json_repair/issues
30
+ Project-URL: Live demo, https://mangiucugna.github.io/json_repair/
30
31
  Keywords: JSON,REPAIR,LLM,PARSER
31
32
  Classifier: Programming Language :: Python :: 3
32
33
  Classifier: License :: OSI Approved :: MIT License
@@ -38,7 +39,7 @@ License-File: LICENSE
38
39
  [![PyPI](https://img.shields.io/pypi/v/json-repair)](https://pypi.org/project/json-repair/)
39
40
  ![Python version](https://img.shields.io/badge/python-3.8+-important)
40
41
  [![PyPI downloads](https://img.shields.io/pypi/dm/json-repair)](https://pypi.org/project/json-repair/)
41
-
42
+ [![Github Sponsors](https://img.shields.io/github/sponsors/mangiucugna)](https://github.com/sponsors/mangiucugna)
42
43
 
43
44
  This simple package can be used to fix an invalid json string. To know all cases in which this package will work, check out the unit test.
44
45
 
@@ -0,0 +1,8 @@
1
+ json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
+ json_repair/json_repair.py,sha256=5WjVoNO7Grdq9wfnbSjggiVlDv2XVcvWbCIQVmREP38,30109
3
+ json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ json_repair-0.28.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
5
+ json_repair-0.28.0.dist-info/METADATA,sha256=rPrWno-My7ZQt7EJIR0BodmpLQ84fr0vjJwBM4XYmYU,8043
6
+ json_repair-0.28.0.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
7
+ json_repair-0.28.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
8
+ json_repair-0.28.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (72.1.0)
2
+ Generator: setuptools (72.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,7 +0,0 @@
1
- json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
- json_repair/json_repair.py,sha256=54sOesew2z21WAd4UVaEYOLI8oUkSqF8Z4HbkYBdCj8,31792
3
- json_repair-0.27.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
- json_repair-0.27.1.dist-info/METADATA,sha256=z1CIA9xV6Cnd7Ko49bZ1snzdwJHK-ev8vKCfhV62vzo,7863
5
- json_repair-0.27.1.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
6
- json_repair-0.27.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
- json_repair-0.27.1.dist-info/RECORD,,