json-repair 0.15.5__py3-none-any.whl → 0.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_repair.py +23 -14
 - {json_repair-0.15.5.dist-info → json_repair-0.16.0.dist-info}/METADATA +1 -1
 - json_repair-0.16.0.dist-info/RECORD +7 -0
 - json_repair-0.15.5.dist-info/RECORD +0 -7
 - {json_repair-0.15.5.dist-info → json_repair-0.16.0.dist-info}/LICENSE +0 -0
 - {json_repair-0.15.5.dist-info → json_repair-0.16.0.dist-info}/WHEEL +0 -0
 - {json_repair-0.15.5.dist-info → json_repair-0.16.0.dist-info}/top_level.txt +0 -0
 
    
        json_repair/json_repair.py
    CHANGED
    
    | 
         @@ -64,7 +64,7 @@ class JSONParser: 
     | 
|
| 
       64 
64 
     | 
    
         
             
                        return self.parse_array()
         
     | 
| 
       65 
65 
     | 
    
         
             
                    # there can be an edge case in which a key is empty and at the end of an object
         
     | 
| 
       66 
66 
     | 
    
         
             
                    # like "key": }. We return an empty string here to close the object properly
         
     | 
| 
       67 
     | 
    
         
            -
                    elif char == "}" 
     | 
| 
      
 67 
     | 
    
         
            +
                    elif char == "}":
         
     | 
| 
       68 
68 
     | 
    
         
             
                        self.log(
         
     | 
| 
       69 
69 
     | 
    
         
             
                            "At the end of an object we found a key with missing value, skipping",
         
     | 
| 
       70 
70 
     | 
    
         
             
                            "info",
         
     | 
| 
         @@ -78,13 +78,13 @@ class JSONParser: 
     | 
|
| 
       78 
78 
     | 
    
         
             
                    elif char == "“":
         
     | 
| 
       79 
79 
     | 
    
         
             
                        return self.parse_string(string_quotes=["“", "”"])
         
     | 
| 
       80 
80 
     | 
    
         
             
                    # <number> starts with [0-9] or minus
         
     | 
| 
       81 
     | 
    
         
            -
                    elif char.isdigit() or char == "-" or char == ".":
         
     | 
| 
      
 81 
     | 
    
         
            +
                    elif self.get_context() != "" and char.isdigit() or char == "-" or char == ".":
         
     | 
| 
       82 
82 
     | 
    
         
             
                        return self.parse_number()
         
     | 
| 
       83 
83 
     | 
    
         
             
                    # <boolean> could be (T)rue or (F)alse or (N)ull
         
     | 
| 
       84 
     | 
    
         
            -
                    elif char.lower() in ["t", "f", "n"]:
         
     | 
| 
      
 84 
     | 
    
         
            +
                    elif self.get_context() != "" and char.lower() in ["t", "f", "n"]:
         
     | 
| 
       85 
85 
     | 
    
         
             
                        return self.parse_boolean_or_null()
         
     | 
| 
       86 
86 
     | 
    
         
             
                    # This might be a <string> that is missing the starting '"'
         
     | 
| 
       87 
     | 
    
         
            -
                    elif char.isalpha():
         
     | 
| 
      
 87 
     | 
    
         
            +
                    elif self.get_context() != "" and char.isalpha():
         
     | 
| 
       88 
88 
     | 
    
         
             
                        return self.parse_string()
         
     | 
| 
       89 
89 
     | 
    
         
             
                    # If everything else fails, we just ignore and move on
         
     | 
| 
       90 
90 
     | 
    
         
             
                    else:
         
     | 
| 
         @@ -131,6 +131,8 @@ class JSONParser: 
     | 
|
| 
       131 
131 
     | 
    
         
             
                                    "info",
         
     | 
| 
       132 
132 
     | 
    
         
             
                                )
         
     | 
| 
       133 
133 
     | 
    
         
             
                                break
         
     | 
| 
      
 134 
     | 
    
         
            +
                            elif key == "":
         
     | 
| 
      
 135 
     | 
    
         
            +
                                self.index += 1
         
     | 
| 
       134 
136 
     | 
    
         | 
| 
       135 
137 
     | 
    
         
             
                        # We reached the end here
         
     | 
| 
       136 
138 
     | 
    
         
             
                        if (self.get_char_at() or "}") == "}":
         
     | 
| 
         @@ -172,8 +174,10 @@ class JSONParser: 
     | 
|
| 
       172 
174 
     | 
    
         
             
                def parse_array(self) -> List[Any]:
         
     | 
| 
       173 
175 
     | 
    
         
             
                    # <array> ::= '[' [ <json> *(', ' <json>) ] ']' ; A sequence of JSON values separated by commas
         
     | 
| 
       174 
176 
     | 
    
         
             
                    arr = []
         
     | 
| 
      
 177 
     | 
    
         
            +
                    self.set_context("array")
         
     | 
| 
       175 
178 
     | 
    
         
             
                    # Stop when you either find the closing parentheses or you have iterated over the entire string
         
     | 
| 
       176 
179 
     | 
    
         
             
                    while (self.get_char_at() or "]") != "]":
         
     | 
| 
      
 180 
     | 
    
         
            +
                        self.skip_whitespaces_at()
         
     | 
| 
       177 
181 
     | 
    
         
             
                        value = self.parse_json()
         
     | 
| 
       178 
182 
     | 
    
         | 
| 
       179 
183 
     | 
    
         
             
                        # It is possible that parse_json() returns nothing valid, so we stop
         
     | 
| 
         @@ -209,6 +213,7 @@ class JSONParser: 
     | 
|
| 
       209 
213 
     | 
    
         
             
                        self.index -= 1
         
     | 
| 
       210 
214 
     | 
    
         | 
| 
       211 
215 
     | 
    
         
             
                    self.index += 1
         
     | 
| 
      
 216 
     | 
    
         
            +
                    self.reset_context()
         
     | 
| 
       212 
217 
     | 
    
         
             
                    return arr
         
     | 
| 
       213 
218 
     | 
    
         | 
| 
       214 
219 
     | 
    
         
             
                def parse_string(self, string_quotes=False) -> str:
         
     | 
| 
         @@ -218,6 +223,7 @@ class JSONParser: 
     | 
|
| 
       218 
223 
     | 
    
         | 
| 
       219 
224 
     | 
    
         
             
                    # Flag to manage corner cases related to missing starting quote
         
     | 
| 
       220 
225 
     | 
    
         
             
                    fixed_quotes = False
         
     | 
| 
      
 226 
     | 
    
         
            +
                    doubled_quotes = False
         
     | 
| 
       221 
227 
     | 
    
         
             
                    lstring_delimiter = rstring_delimiter = '"'
         
     | 
| 
       222 
228 
     | 
    
         
             
                    if isinstance(string_quotes, list):
         
     | 
| 
       223 
229 
     | 
    
         
             
                        lstring_delimiter = string_quotes[0]
         
     | 
| 
         @@ -239,6 +245,7 @@ class JSONParser: 
     | 
|
| 
       239 
245 
     | 
    
         
             
                                "While parsing a string, we found a valid starting doubled quote, ignoring it",
         
     | 
| 
       240 
246 
     | 
    
         
             
                                "info",
         
     | 
| 
       241 
247 
     | 
    
         
             
                            )
         
     | 
| 
      
 248 
     | 
    
         
            +
                            doubled_quotes = True
         
     | 
| 
       242 
249 
     | 
    
         
             
                            self.index += 1
         
     | 
| 
       243 
250 
     | 
    
         
             
                    char = self.get_char_at()
         
     | 
| 
       244 
251 
     | 
    
         
             
                    if char != lstring_delimiter:
         
     | 
| 
         @@ -279,13 +286,9 @@ class JSONParser: 
     | 
|
| 
       279 
286 
     | 
    
         
             
                                self.remove_char_at(-1)
         
     | 
| 
       280 
287 
     | 
    
         
             
                                self.index -= 1
         
     | 
| 
       281 
288 
     | 
    
         
             
                        # ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
         
     | 
| 
       282 
     | 
    
         
            -
                        if  
     | 
| 
       283 
     | 
    
         
            -
                            char == rstring_delimiter
         
     | 
| 
       284 
     | 
    
         
            -
                            # Next character is not a delimiter
         
     | 
| 
       285 
     | 
    
         
            -
                            and self.get_char_at(1) not in [",", ":", "]", "}"]
         
     | 
| 
       286 
     | 
    
         
            -
                        ):
         
     | 
| 
      
 289 
     | 
    
         
            +
                        if char == rstring_delimiter:
         
     | 
| 
       287 
290 
     | 
    
         
             
                            # Special case here, in case of double quotes one after another
         
     | 
| 
       288 
     | 
    
         
            -
                            if self.get_char_at(1) == rstring_delimiter:
         
     | 
| 
      
 291 
     | 
    
         
            +
                            if doubled_quotes and self.get_char_at(1) == rstring_delimiter:
         
     | 
| 
       289 
292 
     | 
    
         
             
                                self.log(
         
     | 
| 
       290 
293 
     | 
    
         
             
                                    "While parsing a string, we found a doubled quote, ignoring it",
         
     | 
| 
       291 
294 
     | 
    
         
             
                                    "info",
         
     | 
| 
         @@ -294,13 +297,19 @@ class JSONParser: 
     | 
|
| 
       294 
297 
     | 
    
         
             
                                self.remove_char_at()
         
     | 
| 
       295 
298 
     | 
    
         
             
                            else:
         
     | 
| 
       296 
299 
     | 
    
         
             
                                # Check if eventually there is a rstring delimiter, otherwise we bail
         
     | 
| 
       297 
     | 
    
         
            -
                                i =  
     | 
| 
      
 300 
     | 
    
         
            +
                                i = 1
         
     | 
| 
       298 
301 
     | 
    
         
             
                                next_c = self.get_char_at(i)
         
     | 
| 
       299 
302 
     | 
    
         
             
                                while next_c and next_c != rstring_delimiter:
         
     | 
| 
      
 303 
     | 
    
         
            +
                                    # If we are in an object context, let's check for the right delimiters
         
     | 
| 
      
 304 
     | 
    
         
            +
                                    if (
         
     | 
| 
      
 305 
     | 
    
         
            +
                                        ("object_key" in self.context and next_c == ":")
         
     | 
| 
      
 306 
     | 
    
         
            +
                                        or ("object_value" in self.context and next_c in ["}", ","])
         
     | 
| 
      
 307 
     | 
    
         
            +
                                        or ("array" in self.context and next_c in ["]", ","])
         
     | 
| 
      
 308 
     | 
    
         
            +
                                    ):
         
     | 
| 
      
 309 
     | 
    
         
            +
                                        break
         
     | 
| 
       300 
310 
     | 
    
         
             
                                    i += 1
         
     | 
| 
       301 
311 
     | 
    
         
             
                                    next_c = self.get_char_at(i)
         
     | 
| 
       302 
     | 
    
         
            -
                                 
     | 
| 
       303 
     | 
    
         
            -
                                if next_c:
         
     | 
| 
      
 312 
     | 
    
         
            +
                                if next_c == rstring_delimiter:
         
     | 
| 
       304 
313 
     | 
    
         
             
                                    self.log(
         
     | 
| 
       305 
314 
     | 
    
         
             
                                        "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
         
     | 
| 
       306 
315 
     | 
    
         
             
                                        "info",
         
     | 
| 
         @@ -416,7 +425,7 @@ class JSONParser: 
     | 
|
| 
       416 
425 
     | 
    
         | 
| 
       417 
426 
     | 
    
         
             
                def get_context(self) -> str:
         
     | 
| 
       418 
427 
     | 
    
         
             
                    try:
         
     | 
| 
       419 
     | 
    
         
            -
                        return self.context[ 
     | 
| 
      
 428 
     | 
    
         
            +
                        return self.context[-1]
         
     | 
| 
       420 
429 
     | 
    
         
             
                    except Exception:
         
     | 
| 
       421 
430 
     | 
    
         
             
                        return ""
         
     | 
| 
       422 
431 
     | 
    
         | 
| 
         @@ -0,0 +1,7 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            json_repair/__init__.py,sha256=AlNie5y6BZBioGi5fzTAUvum_y0U5aL5aNsuQ_68LQc,175
         
     | 
| 
      
 2 
     | 
    
         
            +
            json_repair/json_repair.py,sha256=XahLp82VVwg8KgyywNxMFBHIPIbvPDp6uWMP1VD_40w,20418
         
     | 
| 
      
 3 
     | 
    
         
            +
            json_repair-0.16.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
         
     | 
| 
      
 4 
     | 
    
         
            +
            json_repair-0.16.0.dist-info/METADATA,sha256=-OLuKPGwu4enrP7kP2947Gyz9l7JRVpGTRH1b1MB2ZY,7355
         
     | 
| 
      
 5 
     | 
    
         
            +
            json_repair-0.16.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
         
     | 
| 
      
 6 
     | 
    
         
            +
            json_repair-0.16.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
         
     | 
| 
      
 7 
     | 
    
         
            +
            json_repair-0.16.0.dist-info/RECORD,,
         
     | 
| 
         @@ -1,7 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            json_repair/__init__.py,sha256=AlNie5y6BZBioGi5fzTAUvum_y0U5aL5aNsuQ_68LQc,175
         
     | 
| 
       2 
     | 
    
         
            -
            json_repair/json_repair.py,sha256=nt7wiolfT-b65nRqdHP6CHl0BXeGduBPUFrgN4xPyFU,19879
         
     | 
| 
       3 
     | 
    
         
            -
            json_repair-0.15.5.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
         
     | 
| 
       4 
     | 
    
         
            -
            json_repair-0.15.5.dist-info/METADATA,sha256=8rWonhcvr29wSKLVwEbqBydSPQ6k7H9tN6illiA4gPI,7355
         
     | 
| 
       5 
     | 
    
         
            -
            json_repair-0.15.5.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
         
     | 
| 
       6 
     | 
    
         
            -
            json_repair-0.15.5.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
         
     | 
| 
       7 
     | 
    
         
            -
            json_repair-0.15.5.dist-info/RECORD,,
         
     | 
| 
         
            File without changes
         
     | 
| 
         
            File without changes
         
     | 
| 
         
            File without changes
         
     |