json-repair 0.20.0__py3-none-any.whl → 0.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,11 +24,55 @@ All supported use cases are in the unit tests
24
24
 
25
25
  import os
26
26
  import json
27
- from typing import Any, Dict, List, Union, TextIO
27
+ from typing import Any, Dict, List, Optional, Union, TextIO, Tuple
28
+
29
+
30
+ class StringFileWrapper:
31
+ # This is a trick to simplify the code, transform the filedescriptor handling into a string handling
32
+ def __init__(self, fd: TextIO) -> None:
33
+ self.fd = fd
34
+ self.length: int = 0
35
+
36
+ def __getitem__(self, index: int) -> str:
37
+ if isinstance(index, slice):
38
+ self.fd.seek(index.start)
39
+ value = self.fd.read(index.stop - index.start)
40
+ self.fd.seek(index.start)
41
+ return value
42
+ else:
43
+ self.fd.seek(index)
44
+ return self.fd.read(1)
45
+
46
+ def __len__(self) -> int:
47
+ if self.length < 1:
48
+ current_position = self.fd.tell()
49
+ self.fd.seek(0, os.SEEK_END)
50
+ self.length = self.fd.tell()
51
+ self.fd.seek(current_position)
52
+ return self.length
53
+
54
+ def __setitem__(self) -> None:
55
+ raise Exception("This is read-only!")
56
+
57
+
58
+ class LoggerConfig:
59
+ # This is a type class to simplify the declaration
60
+ def __init__(self, log_level: Optional[str]):
61
+ self.log: List[Dict[str, str]] = []
62
+ self.window: int = 10
63
+ self.log_level: str = log_level if log_level else "none"
64
+
65
+
66
+ JSONReturnType = Union[Dict[str, Any], List[Any], str, float, int, bool, None]
28
67
 
29
68
 
30
69
  class JSONParser:
31
- def __init__(self, json_str: str, json_fd: TextIO, logging: bool = False) -> None:
70
+ def __init__(
71
+ self,
72
+ json_str: Union[str, StringFileWrapper],
73
+ json_fd: Optional[TextIO],
74
+ logging: Optional[bool],
75
+ ) -> None:
32
76
  # The string to parse
33
77
  self.json_str = json_str
34
78
  # Alternatively, the file description with a json file in it
@@ -36,25 +80,23 @@ class JSONParser:
36
80
  # This is a trick we do to treat the file wrapper as an array
37
81
  self.json_str = StringFileWrapper(json_fd)
38
82
  # Index is our iterator that will keep track of which character we are looking at right now
39
- self.index = 0
83
+ self.index: int = 0
40
84
  # This is used in the object member parsing to manage the special cases of missing quotes in key or value
41
- self.context = []
85
+ self.context: list[str] = []
42
86
  # Use this to log the activity, but only if logging is active
43
- self.logger = {
44
- "log": [],
45
- "window": 10,
46
- "log_level": "info" if logging else "none",
47
- }
48
-
49
- def parse(self) -> Union[Dict[str, Any], List[Any], str, float, int, bool, None]:
50
- if self.logger["log_level"] == "none":
87
+ self.logger = LoggerConfig(log_level="info" if logging else None)
88
+
89
+ def parse(
90
+ self,
91
+ ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
92
+ if self.logger.log_level == "none":
51
93
  return self.parse_json()
52
94
  else:
53
- return self.parse_json(), self.logger["log"]
95
+ return self.parse_json(), self.logger.log
54
96
 
55
97
  def parse_json(
56
98
  self,
57
- ) -> Union[Dict[str, Any], List[Any], str, float, int, bool, None]:
99
+ ) -> JSONReturnType:
58
100
  char = self.get_char_at()
59
101
  # False means that we are at the end of the string provided, is the base case for recursion
60
102
  if char is False:
@@ -131,10 +173,14 @@ class JSONParser:
131
173
  # Sometimes the string search might not move the index at all, that might lead us to an infinite loop
132
174
  self.index += 1
133
175
 
176
+ self.skip_whitespaces_at()
177
+
134
178
  # We reached the end here
135
179
  if (self.get_char_at() or "}") == "}":
136
180
  continue
137
181
 
182
+ self.skip_whitespaces_at()
183
+
138
184
  # An extreme case of missing ":" after a key
139
185
  if (self.get_char_at() or "") != ":":
140
186
  self.log(
@@ -178,7 +224,7 @@ class JSONParser:
178
224
  value = self.parse_json()
179
225
 
180
226
  # It is possible that parse_json() returns nothing valid, so we stop
181
- if not value:
227
+ if value == "":
182
228
  break
183
229
 
184
230
  if value == "..." and self.get_char_at(-1) == ".":
@@ -221,7 +267,7 @@ class JSONParser:
221
267
  self.reset_context()
222
268
  return arr
223
269
 
224
- def parse_string(self) -> str:
270
+ def parse_string(self) -> Union[str, JSONReturnType]:
225
271
  # <string> is a string of valid characters enclosed in quotes
226
272
  # i.e. { name: "John" }
227
273
  # Somehow all weird cases in an invalid JSON happen to be resolved in this function, so be careful here
@@ -320,7 +366,7 @@ class JSONParser:
320
366
  string_acc = string_acc[:-1]
321
367
  if char in [rstring_delimiter, "t", "n", "r", "b", "\\"]:
322
368
  escape_seqs = {"t": "\t", "n": "\n", "r": "\r", "b": "\b"}
323
- string_acc += escape_seqs.get(char, char)
369
+ string_acc += escape_seqs.get(char, char) or char
324
370
  self.index += 1
325
371
  char = self.get_char_at()
326
372
  # ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
@@ -358,7 +404,29 @@ class JSONParser:
358
404
  break
359
405
  i += 1
360
406
  next_c = self.get_char_at(i)
361
- if next_c == rstring_delimiter:
407
+ # If we stopped for a comma in object_value context, let's check if find a "} at the end of the string
408
+ if next_c == "," and self.get_context() == "object_value":
409
+ i += 1
410
+ next_c = self.get_char_at(i)
411
+ while next_c and next_c != rstring_delimiter:
412
+ i += 1
413
+ next_c = self.get_char_at(i)
414
+ # Ok now I found a delimiter, let's skip whitespaces and see if next we find a }
415
+ i += 1
416
+ next_c = self.get_char_at(i)
417
+ while next_c and next_c.isspace():
418
+ i += 1
419
+ next_c = self.get_char_at(i)
420
+ if next_c == "}":
421
+ # OK this is valid then
422
+ self.log(
423
+ "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here since this is the last element of the object, ignoring it",
424
+ "info",
425
+ )
426
+ string_acc += char
427
+ self.index += 1
428
+ char = self.get_char_at()
429
+ elif next_c == rstring_delimiter:
362
430
  if self.get_context() == "object_value":
363
431
  # But this might not be it! This could be just a missing comma
364
432
  # We found a delimiter and we need to check if this is a key
@@ -414,7 +482,7 @@ class JSONParser:
414
482
 
415
483
  return string_acc.rstrip()
416
484
 
417
- def parse_number(self) -> Union[float, int, str]:
485
+ def parse_number(self) -> Union[float, int, str, JSONReturnType]:
418
486
  # <number> is a valid real number expressed in one of a number of given formats
419
487
  number_str = ""
420
488
  number_chars = set("0123456789-.eE/,")
@@ -447,8 +515,7 @@ class JSONParser:
447
515
  def parse_boolean_or_null(self) -> Union[bool, str, None]:
448
516
  # <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
449
517
  starting_index = self.index
450
- value = ""
451
- char = self.get_char_at().lower()
518
+ char = (self.get_char_at() or "").lower()
452
519
  if char == "t":
453
520
  value = ("true", True)
454
521
  elif char == "f":
@@ -456,12 +523,12 @@ class JSONParser:
456
523
  elif char == "n":
457
524
  value = ("null", None)
458
525
 
459
- if len(value):
526
+ if value:
460
527
  i = 0
461
528
  while char and i < len(value[0]) and char == value[0][i]:
462
529
  i += 1
463
530
  self.index += 1
464
- char = self.get_char_at().lower()
531
+ char = (self.get_char_at() or "").lower()
465
532
  if i == len(value[0]):
466
533
  return value[1]
467
534
 
@@ -509,12 +576,12 @@ class JSONParser:
509
576
  return ""
510
577
 
511
578
  def log(self, text: str, level: str) -> None:
512
- if level == self.logger["log_level"]:
579
+ if level == self.logger.log_level:
513
580
  context = ""
514
- start = max(self.index - self.logger["window"], 0)
515
- end = min(self.index + self.logger["window"], len(self.json_str))
581
+ start = max(self.index - self.logger.window, 0)
582
+ end = min(self.index + self.logger.window, len(self.json_str))
516
583
  context = self.json_str[start:end]
517
- self.logger["log"].append(
584
+ self.logger.log.append(
518
585
  {
519
586
  "text": text,
520
587
  "context": context,
@@ -524,11 +591,11 @@ class JSONParser:
524
591
 
525
592
  def repair_json(
526
593
  json_str: str = "",
527
- return_objects: bool = False,
528
- skip_json_loads: bool = False,
529
- logging: bool = False,
530
- json_fd: TextIO = None,
531
- ) -> Union[Dict[str, Any], List[Any], str, float, int, bool, None]:
594
+ return_objects: Optional[bool] = False,
595
+ skip_json_loads: Optional[bool] = False,
596
+ logging: Optional[bool] = False,
597
+ json_fd: Optional[TextIO] = None,
598
+ ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
532
599
  """
533
600
  Given a json formatted string, it will try to decode it and, if it fails, it will try to fix it.
534
601
  It will return the fixed string by default.
@@ -555,7 +622,7 @@ def repair_json(
555
622
 
556
623
  def loads(
557
624
  json_str: str, skip_json_loads: bool = False, logging: bool = False
558
- ) -> Union[Dict[str, Any], List[Any], str, float, int, bool, None]:
625
+ ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
559
626
  """
560
627
  This function works like `json.loads()` except that it will fix your JSON in the process.
561
628
  It is a wrapper around the `repair_json()` function with `return_objects=True`.
@@ -570,7 +637,7 @@ def loads(
570
637
 
571
638
  def load(
572
639
  fd: TextIO, skip_json_loads: bool = False, logging: bool = False
573
- ) -> Union[Dict[str, Any], List[Any], str, float, int, bool, None]:
640
+ ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
574
641
  """
575
642
  This function works like `json.load()` except that it will fix your JSON in the process.
576
643
  It is a wrapper around the `repair_json()` function with `json_fd=fd` and `return_objects=True`.
@@ -580,7 +647,7 @@ def load(
580
647
 
581
648
  def from_file(
582
649
  filename: str, skip_json_loads: bool = False, logging: bool = False
583
- ) -> Union[Dict[str, Any], List[Any], str, float, int, bool, None]:
650
+ ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
584
651
  """
585
652
  This function is a wrapper around `load()` so you can pass the filename as string
586
653
  """
@@ -589,31 +656,3 @@ def from_file(
589
656
  fd.close()
590
657
 
591
658
  return jsonobj
592
-
593
-
594
- class StringFileWrapper:
595
- # This is a trick to simplify the code above, transform the filedescriptor handling into an array handling
596
- def __init__(self, fd: TextIO) -> None:
597
- self.fd = fd
598
- self.length = None
599
-
600
- def __getitem__(self, index: int) -> Any:
601
- if isinstance(index, slice):
602
- self.fd.seek(index.start)
603
- value = self.fd.read(index.stop - index.start)
604
- self.fd.seek(index.start)
605
- return value
606
- else:
607
- self.fd.seek(index)
608
- return self.fd.read(1)
609
-
610
- def __len__(self) -> int:
611
- if not self.length:
612
- current_position = self.fd.tell()
613
- self.fd.seek(0, os.SEEK_END)
614
- self.length = self.fd.tell()
615
- self.fd.seek(current_position)
616
- return self.length
617
-
618
- def __setitem__(self):
619
- raise Exception("This is read-only!")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.20.0
3
+ Version: 0.21.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -0,0 +1,7 @@
1
+ json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
+ json_repair/json_repair.py,sha256=ry94U3QoJwVgyG1qeQNEb8Qt8NtCLpCGR41GBA7tozY,27320
3
+ json_repair-0.21.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
+ json_repair-0.21.0.dist-info/METADATA,sha256=obBsHuNN7Ph5zX77VHmER2O9A61F3MXGBreEowdr-so,7333
5
+ json_repair-0.21.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
+ json_repair-0.21.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
+ json_repair-0.21.0.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
- json_repair/json_repair.py,sha256=zYg4tIwZ4rdVkCQ5XVceNQaOz2MT50O7jHJbJ1EpKhk,25446
3
- json_repair-0.20.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
- json_repair-0.20.0.dist-info/METADATA,sha256=2WrsjORPx37e4CqdkdiARwB-VoP5EyjsZaS0hcVnVBo,7333
5
- json_repair-0.20.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
- json_repair-0.20.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
- json_repair-0.20.0.dist-info/RECORD,,