json-repair 0.20.1__py3-none-any.whl → 0.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,11 +24,55 @@ All supported use cases are in the unit tests
24
24
 
25
25
  import os
26
26
  import json
27
- from typing import Any, Dict, List, Union, TextIO
27
+ from typing import Any, Dict, List, Optional, Union, TextIO, Tuple
28
+
29
+
30
+ class StringFileWrapper:
31
+ # This is a trick to simplify the code, transform the filedescriptor handling into a string handling
32
+ def __init__(self, fd: TextIO) -> None:
33
+ self.fd = fd
34
+ self.length: int = 0
35
+
36
+ def __getitem__(self, index: int) -> str:
37
+ if isinstance(index, slice):
38
+ self.fd.seek(index.start)
39
+ value = self.fd.read(index.stop - index.start)
40
+ self.fd.seek(index.start)
41
+ return value
42
+ else:
43
+ self.fd.seek(index)
44
+ return self.fd.read(1)
45
+
46
+ def __len__(self) -> int:
47
+ if self.length < 1:
48
+ current_position = self.fd.tell()
49
+ self.fd.seek(0, os.SEEK_END)
50
+ self.length = self.fd.tell()
51
+ self.fd.seek(current_position)
52
+ return self.length
53
+
54
+ def __setitem__(self) -> None:
55
+ raise Exception("This is read-only!")
56
+
57
+
58
+ class LoggerConfig:
59
+ # This is a type class to simplify the declaration
60
+ def __init__(self, log_level: Optional[str]):
61
+ self.log: List[Dict[str, str]] = []
62
+ self.window: int = 10
63
+ self.log_level: str = log_level if log_level else "none"
64
+
65
+
66
+ JSONReturnType = Union[Dict[str, Any], List[Any], str, float, int, bool, None]
28
67
 
29
68
 
30
69
  class JSONParser:
31
- def __init__(self, json_str: str, json_fd: TextIO, logging: bool = False) -> None:
70
+ def __init__(
71
+ self,
72
+ json_str: Union[str, StringFileWrapper],
73
+ json_fd: Optional[TextIO],
74
+ logging: Optional[bool],
75
+ ) -> None:
32
76
  # The string to parse
33
77
  self.json_str = json_str
34
78
  # Alternatively, the file description with a json file in it
@@ -36,25 +80,23 @@ class JSONParser:
36
80
  # This is a trick we do to treat the file wrapper as an array
37
81
  self.json_str = StringFileWrapper(json_fd)
38
82
  # Index is our iterator that will keep track of which character we are looking at right now
39
- self.index = 0
83
+ self.index: int = 0
40
84
  # This is used in the object member parsing to manage the special cases of missing quotes in key or value
41
- self.context = []
85
+ self.context: list[str] = []
42
86
  # Use this to log the activity, but only if logging is active
43
- self.logger = {
44
- "log": [],
45
- "window": 10,
46
- "log_level": "info" if logging else "none",
47
- }
48
-
49
- def parse(self) -> Union[Dict[str, Any], List[Any], str, float, int, bool, None]:
50
- if self.logger["log_level"] == "none":
87
+ self.logger = LoggerConfig(log_level="info" if logging else None)
88
+
89
+ def parse(
90
+ self,
91
+ ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
92
+ if self.logger.log_level == "none":
51
93
  return self.parse_json()
52
94
  else:
53
- return self.parse_json(), self.logger["log"]
95
+ return self.parse_json(), self.logger.log
54
96
 
55
97
  def parse_json(
56
98
  self,
57
- ) -> Union[Dict[str, Any], List[Any], str, float, int, bool, None]:
99
+ ) -> JSONReturnType:
58
100
  char = self.get_char_at()
59
101
  # False means that we are at the end of the string provided, is the base case for recursion
60
102
  if char is False:
@@ -225,7 +267,7 @@ class JSONParser:
225
267
  self.reset_context()
226
268
  return arr
227
269
 
228
- def parse_string(self) -> str:
270
+ def parse_string(self) -> Union[str, JSONReturnType]:
229
271
  # <string> is a string of valid characters enclosed in quotes
230
272
  # i.e. { name: "John" }
231
273
  # Somehow all weird cases in an invalid JSON happen to be resolved in this function, so be careful here
@@ -324,7 +366,7 @@ class JSONParser:
324
366
  string_acc = string_acc[:-1]
325
367
  if char in [rstring_delimiter, "t", "n", "r", "b", "\\"]:
326
368
  escape_seqs = {"t": "\t", "n": "\n", "r": "\r", "b": "\b"}
327
- string_acc += escape_seqs.get(char, char)
369
+ string_acc += escape_seqs.get(char, char) or char
328
370
  self.index += 1
329
371
  char = self.get_char_at()
330
372
  # ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
@@ -362,7 +404,29 @@ class JSONParser:
362
404
  break
363
405
  i += 1
364
406
  next_c = self.get_char_at(i)
365
- if next_c == rstring_delimiter:
407
+ # If we stopped for a comma in object_value context, let's check if find a "} at the end of the string
408
+ if next_c == "," and self.get_context() == "object_value":
409
+ i += 1
410
+ next_c = self.get_char_at(i)
411
+ while next_c and next_c != rstring_delimiter:
412
+ i += 1
413
+ next_c = self.get_char_at(i)
414
+ # Ok now I found a delimiter, let's skip whitespaces and see if next we find a }
415
+ i += 1
416
+ next_c = self.get_char_at(i)
417
+ while next_c and next_c.isspace():
418
+ i += 1
419
+ next_c = self.get_char_at(i)
420
+ if next_c == "}":
421
+ # OK this is valid then
422
+ self.log(
423
+ "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here since this is the last element of the object, ignoring it",
424
+ "info",
425
+ )
426
+ string_acc += char
427
+ self.index += 1
428
+ char = self.get_char_at()
429
+ elif next_c == rstring_delimiter:
366
430
  if self.get_context() == "object_value":
367
431
  # But this might not be it! This could be just a missing comma
368
432
  # We found a delimiter and we need to check if this is a key
@@ -418,7 +482,7 @@ class JSONParser:
418
482
 
419
483
  return string_acc.rstrip()
420
484
 
421
- def parse_number(self) -> Union[float, int, str]:
485
+ def parse_number(self) -> Union[float, int, str, JSONReturnType]:
422
486
  # <number> is a valid real number expressed in one of a number of given formats
423
487
  number_str = ""
424
488
  number_chars = set("0123456789-.eE/,")
@@ -451,8 +515,7 @@ class JSONParser:
451
515
  def parse_boolean_or_null(self) -> Union[bool, str, None]:
452
516
  # <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
453
517
  starting_index = self.index
454
- value = ""
455
- char = self.get_char_at().lower()
518
+ char = (self.get_char_at() or "").lower()
456
519
  if char == "t":
457
520
  value = ("true", True)
458
521
  elif char == "f":
@@ -460,12 +523,12 @@ class JSONParser:
460
523
  elif char == "n":
461
524
  value = ("null", None)
462
525
 
463
- if len(value):
526
+ if value:
464
527
  i = 0
465
528
  while char and i < len(value[0]) and char == value[0][i]:
466
529
  i += 1
467
530
  self.index += 1
468
- char = self.get_char_at().lower()
531
+ char = (self.get_char_at() or "").lower()
469
532
  if i == len(value[0]):
470
533
  return value[1]
471
534
 
@@ -513,12 +576,12 @@ class JSONParser:
513
576
  return ""
514
577
 
515
578
  def log(self, text: str, level: str) -> None:
516
- if level == self.logger["log_level"]:
579
+ if level == self.logger.log_level:
517
580
  context = ""
518
- start = max(self.index - self.logger["window"], 0)
519
- end = min(self.index + self.logger["window"], len(self.json_str))
581
+ start = max(self.index - self.logger.window, 0)
582
+ end = min(self.index + self.logger.window, len(self.json_str))
520
583
  context = self.json_str[start:end]
521
- self.logger["log"].append(
584
+ self.logger.log.append(
522
585
  {
523
586
  "text": text,
524
587
  "context": context,
@@ -528,11 +591,11 @@ class JSONParser:
528
591
 
529
592
  def repair_json(
530
593
  json_str: str = "",
531
- return_objects: bool = False,
532
- skip_json_loads: bool = False,
533
- logging: bool = False,
534
- json_fd: TextIO = None,
535
- ) -> Union[Dict[str, Any], List[Any], str, float, int, bool, None]:
594
+ return_objects: Optional[bool] = False,
595
+ skip_json_loads: Optional[bool] = False,
596
+ logging: Optional[bool] = False,
597
+ json_fd: Optional[TextIO] = None,
598
+ ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
536
599
  """
537
600
  Given a json formatted string, it will try to decode it and, if it fails, it will try to fix it.
538
601
  It will return the fixed string by default.
@@ -559,7 +622,7 @@ def repair_json(
559
622
 
560
623
  def loads(
561
624
  json_str: str, skip_json_loads: bool = False, logging: bool = False
562
- ) -> Union[Dict[str, Any], List[Any], str, float, int, bool, None]:
625
+ ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
563
626
  """
564
627
  This function works like `json.loads()` except that it will fix your JSON in the process.
565
628
  It is a wrapper around the `repair_json()` function with `return_objects=True`.
@@ -574,7 +637,7 @@ def loads(
574
637
 
575
638
  def load(
576
639
  fd: TextIO, skip_json_loads: bool = False, logging: bool = False
577
- ) -> Union[Dict[str, Any], List[Any], str, float, int, bool, None]:
640
+ ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
578
641
  """
579
642
  This function works like `json.load()` except that it will fix your JSON in the process.
580
643
  It is a wrapper around the `repair_json()` function with `json_fd=fd` and `return_objects=True`.
@@ -584,7 +647,7 @@ def load(
584
647
 
585
648
  def from_file(
586
649
  filename: str, skip_json_loads: bool = False, logging: bool = False
587
- ) -> Union[Dict[str, Any], List[Any], str, float, int, bool, None]:
650
+ ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
588
651
  """
589
652
  This function is a wrapper around `load()` so you can pass the filename as string
590
653
  """
@@ -593,31 +656,3 @@ def from_file(
593
656
  fd.close()
594
657
 
595
658
  return jsonobj
596
-
597
-
598
- class StringFileWrapper:
599
- # This is a trick to simplify the code above, transform the filedescriptor handling into an array handling
600
- def __init__(self, fd: TextIO) -> None:
601
- self.fd = fd
602
- self.length = None
603
-
604
- def __getitem__(self, index: int) -> Any:
605
- if isinstance(index, slice):
606
- self.fd.seek(index.start)
607
- value = self.fd.read(index.stop - index.start)
608
- self.fd.seek(index.start)
609
- return value
610
- else:
611
- self.fd.seek(index)
612
- return self.fd.read(1)
613
-
614
- def __len__(self) -> int:
615
- if not self.length:
616
- current_position = self.fd.tell()
617
- self.fd.seek(0, os.SEEK_END)
618
- self.length = self.fd.tell()
619
- self.fd.seek(current_position)
620
- return self.length
621
-
622
- def __setitem__(self):
623
- raise Exception("This is read-only!")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.20.1
3
+ Version: 0.21.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -0,0 +1,7 @@
1
+ json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
+ json_repair/json_repair.py,sha256=ry94U3QoJwVgyG1qeQNEb8Qt8NtCLpCGR41GBA7tozY,27320
3
+ json_repair-0.21.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
+ json_repair-0.21.0.dist-info/METADATA,sha256=obBsHuNN7Ph5zX77VHmER2O9A61F3MXGBreEowdr-so,7333
5
+ json_repair-0.21.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
+ json_repair-0.21.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
+ json_repair-0.21.0.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
- json_repair/json_repair.py,sha256=evtrrG5xGfWBa1tSTW07u03PXP3bGoKsN7A_8WcsN1s,25528
3
- json_repair-0.20.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
- json_repair-0.20.1.dist-info/METADATA,sha256=dFTIO7S7G_bZDgNgWHD7Ey7B5qB2Q_9CXHXGycmldsU,7333
5
- json_repair-0.20.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
- json_repair-0.20.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
- json_repair-0.20.1.dist-info/RECORD,,