PyPI - json-repair - Versions diffs - 0.19.0__py3-none-any.whl → 0.19.2__py3-none-any.whl - Mend

json-repair 0.19.0py3-none-any.whl → 0.19.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of json-repair might be problematic. Click here for more details.

Files changed (7) hide show

json_repair/json_repair.py CHANGED Viewed

@@ -22,6 +22,7 @@ If something is wrong (a missing parantheses or quotes for example) it will use
 All supported use cases are in the unit tests
 """
+import os
 import json
 from typing import Any, Dict, List, Union, TextIO
@@ -31,7 +32,9 @@ class JSONParser:
         # The string to parse
         self.json_str = json_str
         # Alternatively, the file description with a json file in it
-        self.json_fd = json_fd
+        if json_fd:
+            # This is a trick we do to treat the file wrapper as an array
+            self.json_str = StringFileWrapper(json_fd)
         # Index is our iterator that will keep track of which character we are looking at right now
         self.index = 0
         # This is used in the object member parsing to manage the special cases of missing quotes in key or value
@@ -246,7 +249,8 @@ class JSONParser:
             rstring_delimiter = "”"
         elif char.isalpha():
             # This could be a <boolean> and not a string. Because (T)rue or (F)alse or (N)ull are valid
-            if char.lower() in ["t", "f", "n"]:
+            # But remember, object keys are only of type string
+            if char.lower() in ["t", "f", "n"] and self.get_context() != "object_key":
                 value = self.parse_boolean_or_null()
                 if value != "":
                     return value
@@ -263,7 +267,8 @@ class JSONParser:
                 self.index += 1
                 return self.parse_json()
             self.log(
-                "While parsing a string, we found no starting quote, ignoring", "info"
+                "While parsing a string, we found no starting quote. Will add the quote back",
+                "info",
             )
             missing_quotes = True
@@ -309,6 +314,15 @@ class JSONParser:
             string_acc += char
             self.index += 1
             char = self.get_char_at()
+            if len(string_acc) > 1 and string_acc[-1] == "\\":
+                # This is a special case, if people use real strings this might happen
+                self.log("Found a stray escape sequence, normalizing it", "info")
+                string_acc = string_acc[:-1]
+                if char in [rstring_delimiter, "t", "n", "r", "b", "\\"]:
+                    escape_seqs = {"t": "\t", "n": "\n", "r": "\r", "b": "\b"}
+                    string_acc += escape_seqs.get(char, char)
+                    self.index += 1
+                    char = self.get_char_at()
             # ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
             if char == rstring_delimiter:
                 # Special case here, in case of double quotes one after another
@@ -442,36 +456,22 @@ class JSONParser:
         try:
             return self.json_str[self.index + count]
         except IndexError:
-            if self.json_fd:
-                self.json_fd.seek(self.index + count)
-                char = self.json_fd.read(1)
-                if char == "":
-                    return False
-                return char
-            else:
-                return False
+            return False
     def skip_whitespaces_at(self) -> None:
         """
         This function quickly iterates on whitespaces, syntactic sugar to make the code more concise
         """
-        if self.json_fd:
-            char = self.get_char_at()
-            while char and char.isspace():
-                self.index += 1
-                char = self.get_char_at()
-        else:
-            # If this is not a file stream, we do this monster here to make this function much much faster
+        try:
+            char = self.json_str[self.index]
+        except IndexError:
+            return
+        while char.isspace():
+            self.index += 1
             try:
                 char = self.json_str[self.index]
             except IndexError:
                 return
-            while char.isspace():
-                self.index += 1
-                try:
-                    char = self.json_str[self.index]
-                except IndexError:
-                    return
     def set_context(self, value: str) -> None:
         # If a value is provided update the context variable and save in stack
@@ -493,22 +493,9 @@ class JSONParser:
     def log(self, text: str, level: str) -> None:
         if level == self.logger["log_level"]:
             context = ""
-            if self.json_fd:
-                self.json_fd.seek(self.index - self.logger["window"])
-                context = self.json_fd.read(self.logger["window"] * 2)
-                self.json_fd.seek(self.index)
-            else:
-                start = (
-                    self.index - self.logger["window"]
-                    if (self.index - self.logger["window"]) >= 0
-                    else 0
-                )
-                end = (
-                    self.index + self.logger["window"]
-                    if (self.index + self.logger["window"]) <= len(self.json_str)
-                    else len(self.json_str)
-                )
-                context = self.json_str[start:end]
+            start = max(self.index - self.logger["window"], 0)
+            end = min(self.index + self.logger["window"], len(self.json_str))
+            context = self.json_str[start:end]
             self.logger["log"].append(
                 {
                     "text": text,
@@ -584,3 +571,31 @@ def from_file(
     fd.close()
     return jsonobj
+class StringFileWrapper:
+    # This is a trick to simplify the code above, transform the filedescriptor handling into an array handling
+    def __init__(self, fd: TextIO) -> None:
+        self.fd = fd
+        self.length = None
+    def __getitem__(self, index: int) -> Any:
+        if isinstance(index, slice):
+            self.fd.seek(index.start)
+            value = self.fd.read(index.stop - index.start)
+            self.fd.seek(index.start)
+            return value
+        else:
+            self.fd.seek(index)
+            return self.fd.read(1)
+    def __len__(self) -> int:
+        if not self.length:
+            current_position = self.fd.tell()
+            self.fd.seek(0, os.SEEK_END)
+            self.length = self.fd.tell()
+            self.fd.seek(current_position)
+        return self.length
+    def __setitem__(self):
+        raise Exception("This is read-only!")

{json_repair-0.19.0.dist-info → json_repair-0.19.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: json_repair
-Version: 0.19.0
+Version: 0.19.2
 Summary: A package to repair broken json strings
 Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
 License: MIT License

json_repair-0.19.2.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
+json_repair/json_repair.py,sha256=qNkCy5by3jFhCAC3pDNKzDECOJr1Crh-P71j6N1CBBg,24405
+json_repair-0.19.2.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
+json_repair-0.19.2.dist-info/METADATA,sha256=UdTpknO6GMk16WlPV-YxSFd_QbY25WiNkqj2IE3B_NA,7333
+json_repair-0.19.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+json_repair-0.19.2.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
+json_repair-0.19.2.dist-info/RECORD,,

json_repair-0.19.0.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
-json_repair/json_repair.py,sha256=ORf1wm6wTXriTJBtCJtoFU4rEw4daAoqV0ktdyhcOT0,23775
-json_repair-0.19.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
-json_repair-0.19.0.dist-info/METADATA,sha256=Mj5le5SqwFzYmWGZWu5JbjZNqX4cYPP_h1XpQDYNeOI,7333
-json_repair-0.19.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-json_repair-0.19.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
-json_repair-0.19.0.dist-info/RECORD,,

{json_repair-0.19.0.dist-info → json_repair-0.19.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{json_repair-0.19.0.dist-info → json_repair-0.19.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{json_repair-0.19.0.dist-info → json_repair-0.19.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

json-repair 0.19.0__py3-none-any.whl → 0.19.2__py3-none-any.whl

Potentially problematic release.

json-repair 0.19.0py3-none-any.whl → 0.19.2py3-none-any.whl