PyPI - json-repair - Versions diffs - 0.52.5__tar.gz → 0.53.1__tar.gz - Mend

json-repair 0.52.5tar.gz → 0.53.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

{json_repair-0.52.5/src/json_repair.egg-info → json_repair-0.53.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: json_repair
-Version: 0.52.5
+Version: 0.53.1
 Summary: A package to repair broken json strings
 Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
 License-Expression: MIT

{json_repair-0.52.5 → json_repair-0.53.1}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
 build-backend = "setuptools.build_meta"
 [project]
 name = "json_repair"
-version = "0.52.5"
+version = "0.53.1"
 license = "MIT"
 license-files = ["LICENSE"]
 authors = [
@@ -117,3 +117,5 @@ line-ending = "auto"
 [tool.ruff.lint.per-file-ignores]
 # Explicit re-exports is fine in __init__.py, still a code smell elsewhere.
 "__init__.py" = ["PLC0414"]
+[tool.mypy]
+strict = true

{json_repair-0.52.5 → json_repair-0.53.1}/src/json_repair/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from .constants import JSONReturnType
 from .json_repair import from_file, load, loads, repair_json
+from .utils.constants import JSONReturnType
 __all__ = ["from_file", "load", "loads", "repair_json", "JSONReturnType"]

{json_repair-0.52.5 → json_repair-0.53.1}/src/json_repair/json_parser.py RENAMED Viewed

@@ -1,36 +1,32 @@
-from typing import Literal, TextIO
+from typing import TextIO
-from .constants import STRING_DELIMITERS, JSONReturnType
-from .json_context import JsonContext
-from .object_comparer import ObjectComparer
 from .parse_array import parse_array as _parse_array
-from .parse_boolean_or_null import parse_boolean_or_null as _parse_boolean_or_null
 from .parse_comment import parse_comment as _parse_comment
 from .parse_number import parse_number as _parse_number
 from .parse_object import parse_object as _parse_object
 from .parse_string import parse_string as _parse_string
-from .string_file_wrapper import StringFileWrapper
+from .utils.constants import STRING_DELIMITERS, JSONReturnType
+from .utils.json_context import JsonContext
+from .utils.object_comparer import ObjectComparer
+from .utils.string_file_wrapper import StringFileWrapper
 class JSONParser:
     # Split the parse methods into separate files because this one was like 3000 lines
-    def parse_array(self, *args, **kwargs):
-        return _parse_array(self, *args, **kwargs)
+    def parse_array(self) -> list[JSONReturnType]:
+        return _parse_array(self)
-    def parse_boolean_or_null(self, *args, **kwargs):
-        return _parse_boolean_or_null(self, *args, **kwargs)
+    def parse_comment(self) -> JSONReturnType:
+        return _parse_comment(self)
-    def parse_comment(self, *args, **kwargs):
-        return _parse_comment(self, *args, **kwargs)
+    def parse_number(self) -> JSONReturnType:
+        return _parse_number(self)
-    def parse_number(self, *args, **kwargs):
-        return _parse_number(self, *args, **kwargs)
+    def parse_object(self) -> JSONReturnType:
+        return _parse_object(self)
-    def parse_object(self, *args, **kwargs):
-        return _parse_object(self, *args, **kwargs)
-    def parse_string(self, *args, **kwargs):
-        return _parse_string(self, *args, **kwargs)
+    def parse_string(self) -> JSONReturnType:
+        return _parse_string(self)
     def __init__(
         self,
@@ -107,8 +103,8 @@ class JSONParser:
     ) -> JSONReturnType:
         while True:
             char = self.get_char_at()
-            # False means that we are at the end of the string provided
-            if char is False:
+            # None means that we are at the end of the string provided
+            if char is None:
                 return ""
             # <object> starts with '{'
             elif char == "{":
@@ -130,30 +126,36 @@ class JSONParser:
             else:
                 self.index += 1
-    def get_char_at(self, count: int = 0) -> str | Literal[False]:
+    def get_char_at(self, count: int = 0) -> str | None:
         # Why not use something simpler? Because try/except in python is a faster alternative to an "if" statement that is often True
         try:
             return self.json_str[self.index + count]
         except IndexError:
-            return False
+            return None
-    def skip_whitespaces_at(self, idx: int = 0, move_main_index=True) -> int:
+    def skip_whitespaces(self) -> None:
         """
-        This function quickly iterates on whitespaces, syntactic sugar to make the code more concise
+        This function quickly iterates on whitespaces, moving the self.index forward
         """
         try:
-            char = self.json_str[self.index + idx]
-        except IndexError:
-            return idx
-        while char.isspace():
-            if move_main_index:
+            char = self.json_str[self.index]
+            while char.isspace():
                 self.index += 1
-            else:
+                char = self.json_str[self.index]
+        except IndexError:
+            pass
+    def scroll_whitespaces(self, idx: int = 0) -> int:
+        """
+        This function quickly iterates on whitespaces. Doesn't move the self.index and returns the offset from self.index
+        """
+        try:
+            char = self.json_str[self.index + idx]
+            while char.isspace():
                 idx += 1
-            try:
                 char = self.json_str[self.index + idx]
-            except IndexError:
-                return idx
+        except IndexError:
+            pass
         return idx
     def skip_to_character(self, character: str | list[str], idx: int = 0) -> int:

{json_repair-0.52.5 → json_repair-0.53.1}/src/json_repair/json_repair.py RENAMED Viewed

@@ -25,10 +25,10 @@ All supported use cases are in the unit tests
 import argparse
 import json
 import sys
-from typing import Literal, TextIO, overload
+from typing import Any, Literal, TextIO, overload
-from .constants import JSONReturnType
 from .json_parser import JSONParser
+from .utils.constants import JSONReturnType
 @overload
@@ -40,7 +40,7 @@ def repair_json(
     json_fd: TextIO | None = None,
     chunk_length: int = 0,
     stream_stable: bool = False,
-    **json_dumps_args,
+    **json_dumps_args: Any,
 ) -> str: ...
@@ -53,7 +53,7 @@ def repair_json(
     json_fd: TextIO | None = None,
     chunk_length: int = 0,
     stream_stable: bool = False,
-    **json_dumps_args,
+    **json_dumps_args: Any,
 ) -> JSONReturnType | tuple[JSONReturnType, list[dict[str, str]]]: ...
@@ -65,8 +65,8 @@ def repair_json(
     json_fd: TextIO | None = None,
     chunk_length: int = 0,
     stream_stable: bool = False,
-    **json_dumps_args,
-) -> JSONReturnType | tuple[JSONReturnType, list[dict[str, str]]] | tuple[JSONReturnType, list]:
+    **json_dumps_args: Any,
+) -> JSONReturnType | tuple[JSONReturnType, list[dict[str, str]]]:
     """
     Given a json formatted string, it will try to decode it and, if it fails, it will try to fix it.

{json_repair-0.52.5 → json_repair-0.53.1}/src/json_repair/parse_array.py RENAMED Viewed

@@ -1,8 +1,8 @@
 from typing import TYPE_CHECKING
-from .constants import STRING_DELIMITERS, JSONReturnType
-from .json_context import ContextValues
-from .object_comparer import ObjectComparer
+from .utils.constants import STRING_DELIMITERS, JSONReturnType
+from .utils.json_context import ContextValues
+from .utils.object_comparer import ObjectComparer
 if TYPE_CHECKING:
     from .json_parser import JSONParser
@@ -15,7 +15,7 @@ def parse_array(self: "JSONParser") -> list[JSONReturnType]:
     # Stop when you either find the closing parentheses or you have iterated over the entire string
     char = self.get_char_at()
     while char and char not in ["]", "}"]:
-        self.skip_whitespaces_at()
+        self.skip_whitespaces()
         value: JSONReturnType = ""
         if char in STRING_DELIMITERS:
             # Sometimes it can happen that LLMs forget to start an object and then you think it's a string in an array
@@ -23,13 +23,13 @@ def parse_array(self: "JSONParser") -> list[JSONReturnType]:
             # And either parse the string or parse the object
             i = 1
             i = self.skip_to_character(char, i)
-            i = self.skip_whitespaces_at(idx=i + 1, move_main_index=False)
+            i = self.scroll_whitespaces(idx=i + 1)
             value = self.parse_object() if self.get_char_at(i) == ":" else self.parse_string()
         else:
             value = self.parse_json()
-        # It is possible that parse_json() returns nothing valid, so we increase by 1
-        if ObjectComparer.is_strictly_empty(value):
+        # It is possible that parse_json() returns nothing valid, so we increase by 1, unless we find an array separator
+        if ObjectComparer.is_strictly_empty(value) and self.get_char_at() not in ["]", ","]:
             self.index += 1
         elif value == "..." and self.get_char_at(-1) == ".":
             self.log(
@@ -45,7 +45,7 @@ def parse_array(self: "JSONParser") -> list[JSONReturnType]:
             char = self.get_char_at()
     # Especially at the end of an LLM generated json you might miss the last "]"
-    if char and char != "]":
+    if char != "]":
         self.log(
             "While parsing an array we missed the closing ], ignoring it",
         )

{json_repair-0.52.5 → json_repair-0.53.1}/src/json_repair/parse_comment.py RENAMED Viewed

@@ -1,7 +1,7 @@
 from typing import TYPE_CHECKING
-from .constants import JSONReturnType
-from .json_context import ContextValues
+from .utils.constants import JSONReturnType
+from .utils.json_context import ContextValues
 if TYPE_CHECKING:
     from .json_parser import JSONParser

{json_repair-0.52.5 → json_repair-0.53.1}/src/json_repair/parse_number.py RENAMED Viewed

@@ -1,6 +1,7 @@
 from typing import TYPE_CHECKING
-from .json_context import ContextValues
+from .utils.constants import JSONReturnType
+from .utils.json_context import ContextValues
 NUMBER_CHARS: set[str] = set("0123456789-.eE/,")
@@ -9,7 +10,7 @@ if TYPE_CHECKING:
     from .json_parser import JSONParser
-def parse_number(self: "JSONParser") -> float | int | str | bool | None:
+def parse_number(self: "JSONParser") -> JSONReturnType:
     # <number> is a valid real number expressed in one of a number of given formats
     number_str = ""
     char = self.get_char_at()

{json_repair-0.52.5 → json_repair-0.53.1}/src/json_repair/parse_object.py RENAMED Viewed

@@ -1,13 +1,13 @@
 from typing import TYPE_CHECKING
-from .constants import STRING_DELIMITERS, JSONReturnType
-from .json_context import ContextValues
+from .utils.constants import STRING_DELIMITERS, JSONReturnType
+from .utils.json_context import ContextValues
 if TYPE_CHECKING:
     from .json_parser import JSONParser
-def parse_object(self: "JSONParser") -> dict[str, JSONReturnType]:
+def parse_object(self: "JSONParser") -> JSONReturnType:
     # <object> ::= '{' [ <member> *(', ' <member>) ] '}' ; A sequence of 'members'
     obj: dict[str, JSONReturnType] = {}
     start_index = self.index
@@ -17,10 +17,10 @@ def parse_object(self: "JSONParser") -> dict[str, JSONReturnType]:
         # <member> ::= <string> ': ' <json>
         # Skip filler whitespaces
-        self.skip_whitespaces_at()
+        self.skip_whitespaces()
         # Sometimes LLMs do weird things, if we find a ":" so early, we'll change it to "," and move on
-        if (self.get_char_at() or "") == ":":
+        if self.get_char_at() == ":":
             self.log(
                 "While parsing an object we found a : before a key, ignoring",
             )
@@ -53,14 +53,14 @@ def parse_object(self: "JSONParser") -> dict[str, JSONReturnType]:
                             prev_value.extend(
                                 new_array[0] if len(new_array) == 1 and isinstance(new_array[0], list) else new_array
                             )
-                        self.skip_whitespaces_at()
+                        self.skip_whitespaces()
                         if self.get_char_at() == ",":
                             self.index += 1
-                        self.skip_whitespaces_at()
+                        self.skip_whitespaces()
                         continue
             key = str(self.parse_string())
             if key == "":
-                self.skip_whitespaces_at()
+                self.skip_whitespaces()
             if key != "" or (key == "" and self.get_char_at() in [":", "}"]):
                 # If the string is empty but there is a object divider, we are done here
                 break
@@ -74,16 +74,16 @@ def parse_object(self: "JSONParser") -> dict[str, JSONReturnType]:
             break
         # Skip filler whitespaces
-        self.skip_whitespaces_at()
+        self.skip_whitespaces()
         # We reached the end here
         if (self.get_char_at() or "}") == "}":
             continue
-        self.skip_whitespaces_at()
+        self.skip_whitespaces()
         # An extreme case of missing ":" after a key
-        if (self.get_char_at() or "") != ":":
+        if self.get_char_at() != ":":
             self.log(
                 "While parsing an object we missed a : after a key",
             )
@@ -92,10 +92,10 @@ def parse_object(self: "JSONParser") -> dict[str, JSONReturnType]:
         self.context.reset()
         self.context.set(ContextValues.OBJECT_VALUE)
         # The value can be any valid json
-        self.skip_whitespaces_at()
+        self.skip_whitespaces()
         # Corner case, a lone comma
         value: JSONReturnType = ""
-        if (self.get_char_at() or "") in [",", "}"]:
+        if self.get_char_at() in [",", "}"]:
             self.log(
                 "While parsing an object value we found a stray , ignoring it",
             )
@@ -106,11 +106,11 @@ def parse_object(self: "JSONParser") -> dict[str, JSONReturnType]:
         self.context.reset()
         obj[key] = value
-        if (self.get_char_at() or "") in [",", "'", '"']:
+        if self.get_char_at() in [",", "'", '"']:
             self.index += 1
         # Remove trailing spaces
-        self.skip_whitespaces_at()
+        self.skip_whitespaces()
     self.index += 1
@@ -126,12 +126,12 @@ def parse_object(self: "JSONParser") -> dict[str, JSONReturnType]:
     if not self.context.empty:
         return obj
-    self.skip_whitespaces_at()
-    if (self.get_char_at() or "") != ",":
+    self.skip_whitespaces()
+    if self.get_char_at() != ",":
         return obj
     self.index += 1
-    self.skip_whitespaces_at()
-    if (self.get_char_at() or "") not in STRING_DELIMITERS:
+    self.skip_whitespaces()
+    if self.get_char_at() not in STRING_DELIMITERS:
         return obj
     self.log(
         "Found a comma and string delimiter after object closing brace, checking for additional key-value pairs",

{json_repair-0.52.5 → json_repair-0.53.1}/src/json_repair/parse_string.py RENAMED Viewed

@@ -1,13 +1,22 @@
 from typing import TYPE_CHECKING
-from .constants import STRING_DELIMITERS, JSONReturnType
-from .json_context import ContextValues
+from .parse_string_helpers.parse_boolean_or_null import parse_boolean_or_null
+from .parse_string_helpers.parse_json_llm_block import parse_json_llm_block
+from .utils.constants import STRING_DELIMITERS, JSONReturnType
+from .utils.json_context import ContextValues
 if TYPE_CHECKING:
     from .json_parser import JSONParser
 def parse_string(self: "JSONParser") -> JSONReturnType:
+    # Utility function to append a character to the accumulator and update the index
+    def _append_literal_char(acc: str, current_char: str | None) -> tuple[str, str | None]:
+        acc += str(current_char)
+        self.index += 1
+        char = self.get_char_at()
+        return acc, char
     # <string> is a string of valid characters enclosed in quotes
     # i.e. { name: "John" }
     # Somehow all weird cases in an invalid JSON happen to be resolved in this function, so be careful here
@@ -39,7 +48,7 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
         # This could be a <boolean> and not a string. Because (T)rue or (F)alse or (N)ull are valid
         # But remember, object keys are only of type string
         if char.lower() in ["t", "f", "n"] and self.context.current != ContextValues.OBJECT_KEY:
-            value = self.parse_boolean_or_null()
+            value = parse_boolean_or_null(self)
             if value != "":
                 return value
         self.log(
@@ -49,12 +58,21 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
     if not missing_quotes:
         self.index += 1
+    if self.get_char_at() == "`":
+        ret_val = parse_json_llm_block(self)
+        # If we found a valid JSON block, return it, otherwise continue parsing the string
+        if ret_val is not False:
+            return ret_val
+        self.log(
+            "While parsing a string, we found code fences but they did not enclose valid JSON, continuing parsing the string",
+        )
     # There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
-    if self.get_char_at() in STRING_DELIMITERS and self.get_char_at() == lstring_delimiter:
+    if self.get_char_at() == lstring_delimiter:
         # If it's an empty key, this was easy
-        if (self.context.current == ContextValues.OBJECT_KEY and self.get_char_at(1) == ":") or (
-            self.context.current == ContextValues.OBJECT_VALUE and self.get_char_at(1) in [",", "}"]
+        if (
+            (self.context.current == ContextValues.OBJECT_KEY and self.get_char_at(1) == ":")
+            or (self.context.current == ContextValues.OBJECT_VALUE and self.get_char_at(1) in [",", "}"])
+            or (self.context.current == ContextValues.ARRAY and self.get_char_at(1) in [",", "]"])
         ):
             self.index += 1
             return ""
@@ -69,7 +87,7 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
         next_c = self.get_char_at(i)
         # Now check that the next character is also a delimiter to ensure that we have "".....""
         # In that case we ignore this rstring delimiter
-        if next_c and (self.get_char_at(i + 1) or "") == rstring_delimiter:
+        if self.get_char_at(i + 1) == rstring_delimiter:
             self.log(
                 "While parsing a string, we found a valid starting doubled quote",
             )
@@ -77,7 +95,7 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
             self.index += 1
         else:
             # Ok this is not a doubled quote, check if this is an empty string or not
-            i = self.skip_whitespaces_at(idx=1, move_main_index=False)
+            i = self.scroll_whitespaces(idx=1)
             next_c = self.get_char_at(i)
             if next_c in STRING_DELIMITERS + ["{", "["]:
                 # something fishy is going on here
@@ -127,7 +145,7 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
         ):
             rstring_delimiter_missing = True
             # check if this is a case in which the closing comma is NOT missing instead
-            self.skip_whitespaces_at()
+            self.skip_whitespaces()
             if self.get_char_at(1) == "\\":
                 # Ok this is a quoted string, skip
                 rstring_delimiter_missing = False
@@ -137,7 +155,7 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
                 i += 1
                 # found a delimiter, now we need to check that is followed strictly by a comma or brace
                 # or the string ended
-                i = self.skip_whitespaces_at(idx=i, move_main_index=False)
+                i = self.scroll_whitespaces(idx=i)
                 next_c = self.get_char_at(i)
                 if not next_c or next_c in [",", "}"]:
                     rstring_delimiter_missing = False
@@ -152,7 +170,7 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
                     else:
                         # But again, this could just be something a bit stupid like "lorem, "ipsum" sic"
                         # Check if we find a : afterwards (skipping space)
-                        i = self.skip_whitespaces_at(idx=i + 1, move_main_index=False)
+                        i = self.scroll_whitespaces(idx=i + 1)
                         next_c = self.get_char_at(i)
                         if next_c and next_c != ":":
                             rstring_delimiter_missing = False
@@ -167,7 +185,7 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
                     break
                 else:
                     # skip any whitespace first
-                    i = self.skip_whitespaces_at(idx=1, move_main_index=False)
+                    i = self.scroll_whitespaces(idx=1)
                     # We couldn't find any rstring_delimeter before the end of the string
                     # check if this is the last string of an object and therefore we can keep going
                     # make an exception if this is the last char before the closing brace
@@ -204,19 +222,15 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
         if self.context.current == ContextValues.OBJECT_VALUE and char == "}":
             # We found the end of an object while parsing a value
             # Check if the object is really over, to avoid doubling the closing brace
-            i = self.skip_whitespaces_at(idx=1, move_main_index=False)
+            i = self.scroll_whitespaces(idx=1)
             next_c = self.get_char_at(i)
-            if next_c and next_c == "`":
+            if next_c == "`" and self.get_char_at(i + 1) == "`" and self.get_char_at(i + 2) == "`":
                 # This could be a special case in which the LLM added code fences after the object
                 # So we need to check if there are another two ` after this one`
-                next_c = self.get_char_at(i + 1)
-                if next_c and next_c == "`":
-                    next_c = self.get_char_at(i + 2)
-                    if next_c and next_c == "`":
-                        self.log(
-                            "While parsing a string in object value context, we found a } that closes the object before code fences, stopping here",
-                        )
-                        break
+                self.log(
+                    "While parsing a string in object value context, we found a } that closes the object before code fences, stopping here",
+                )
+                break
             if not next_c:
                 self.log(
                     "While parsing a string in object value context, we found a } that closes the object, stopping here",
@@ -274,12 +288,11 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
                     # found a second delimiter
                     i += 1
                     # Skip spaces
-                    i = self.skip_whitespaces_at(idx=i, move_main_index=False)
-                    next_c = self.get_char_at(i)
-                    if next_c and next_c in [",", "}"]:
+                    i = self.scroll_whitespaces(idx=i)
+                    if self.get_char_at(i) in [",", "}"]:
                         # Ok then this is a missing right quote
                         self.log(
-                            "While parsing a string missing the right delimiter in object key context, we found a :, stopping here",
+                            "While parsing a string missing the right delimiter in object key context, we found a , or } stopping here",
                         )
                         break
             else:
@@ -308,9 +321,8 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
                     # We found a quote, now let's make sure there's a ":" following
                     i += 1
                     # found a delimiter, now we need to check that is followed strictly by a comma or brace
-                    i = self.skip_whitespaces_at(idx=i, move_main_index=False)
-                    next_c = self.get_char_at(i)
-                    if next_c and next_c == ":":
+                    i = self.scroll_whitespaces(idx=i)
+                    if self.get_char_at(i) == ":":
                         # Reset the cursor
                         self.index -= 1
                         char = self.get_char_at()
@@ -320,9 +332,7 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
                         break
             elif unmatched_delimiter:
                 unmatched_delimiter = False
-                string_acc += str(char)
-                self.index += 1
-                char = self.get_char_at()
+                string_acc, char = _append_literal_char(string_acc, char)
             else:
                 # Check if eventually there is a rstring delimiter, otherwise we bail
                 i = 1
@@ -357,22 +367,20 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
                     next_c = self.get_char_at(i)
                     # Ok now I found a delimiter, let's skip whitespaces and see if next we find a } or a ,
                     i += 1
-                    i = self.skip_whitespaces_at(idx=i, move_main_index=False)
+                    i = self.scroll_whitespaces(idx=i)
                     next_c = self.get_char_at(i)
                     if next_c in ["}", ","]:
                         self.log(
-                            "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
+                            "While parsing a string, we found a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
                         )
-                        string_acc += str(char)
-                        self.index += 1
-                        char = self.get_char_at()
+                        string_acc, char = _append_literal_char(string_acc, char)
                         continue
                 elif next_c == rstring_delimiter and self.get_char_at(i - 1) != "\\":
                     # Check if self.index:self.index+i is only whitespaces, break if that's the case
                     if all(str(self.get_char_at(j)).isspace() for j in range(1, i) if self.get_char_at(j)):
                         break
                     if self.context.current == ContextValues.OBJECT_VALUE:
-                        i = self.skip_whitespaces_at(idx=i + 1, move_main_index=False)
+                        i = self.scroll_whitespaces(idx=i + 1)
                         if self.get_char_at(i) == ",":
                             # So we found a comma, this could be a case of a single quote like "va"lue",
                             # Search if it's followed by another key, starting with the first delimeter
@@ -380,15 +388,13 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
                             i += 1
                             i = self.skip_to_character(character=rstring_delimiter, idx=i + 1)
                             i += 1
-                            i = self.skip_whitespaces_at(idx=i, move_main_index=False)
+                            i = self.scroll_whitespaces(idx=i)
                             next_c = self.get_char_at(i)
                             if next_c == ":":
                                 self.log(
-                                    "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
+                                    "While parsing a string, we found a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
                                 )
-                                string_acc += str(char)
-                                self.index += 1
-                                char = self.get_char_at()
+                                string_acc, char = _append_literal_char(string_acc, char)
                                 continue
                         # We found a delimiter and we need to check if this is a key
                         # so find a rstring_delimiter and a colon after
@@ -405,12 +411,10 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
                         # Only if we fail to find a ':' then we know this is misplaced quote
                         if next_c != ":":
                             self.log(
-                                "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
+                                "While parsing a string, we found a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
                             )
                             unmatched_delimiter = not unmatched_delimiter
-                            string_acc += str(char)
-                            self.index += 1
-                            char = self.get_char_at()
+                            string_acc, char = _append_literal_char(string_acc, char)
                     elif self.context.current == ContextValues.ARRAY:
                         # So here we can have a few valid cases:
                         # ["bla bla bla "puppy" bla bla bla "kitty" bla bla"]
@@ -434,9 +438,7 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
                                 "While parsing a string in Array context, we detected a quoted section that would have closed the string but has a different meaning here, ignoring it",
                             )
                             unmatched_delimiter = not unmatched_delimiter
-                            string_acc += str(char)
-                            self.index += 1
-                            char = self.get_char_at()
+                            string_acc, char = _append_literal_char(string_acc, char)
                         else:
                             break
                     elif self.context.current == ContextValues.OBJECT_KEY:
@@ -444,14 +446,12 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
                         self.log(
                             "While parsing a string in Object Key context, we detected a quoted section that would have closed the string but has a different meaning here, ignoring it",
                         )
-                        string_acc += str(char)
-                        self.index += 1
-                        char = self.get_char_at()
+                        string_acc, char = _append_literal_char(string_acc, char)
     if char and missing_quotes and self.context.current == ContextValues.OBJECT_KEY and char.isspace():
         self.log(
             "While parsing a string, handling an extreme corner case in which the LLM added a comment instead of valid string, invalidate the string and return an empty value",
         )
-        self.skip_whitespaces_at()
+        self.skip_whitespaces()
         if self.get_char_at() not in [":", ","]:
             return ""

json_repair-0.53.1/src/json_repair/parse_string_helpers/parse_boolean_or_null.py ADDED Viewed

@@ -0,0 +1,28 @@
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from ..json_parser import JSONParser  # noqa: TID252
+def parse_boolean_or_null(parser: "JSONParser") -> bool | str | None:
+    # <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
+    char = (parser.get_char_at() or "").lower()
+    value_map: dict[str, tuple[str, bool | None]] = {
+        "t": ("true", True),
+        "f": ("false", False),
+        "n": ("null", None),
+    }
+    value: tuple[str, bool | None] = value_map[char]
+    i = 0
+    starting_index = parser.index
+    while char and i < len(value[0]) and char == value[0][i]:
+        i += 1
+        parser.index += 1
+        char = (parser.get_char_at() or "").lower()
+    if i == len(value[0]):
+        return value[1]
+    # If nothing works reset the index before returning
+    parser.index = starting_index
+    return ""

json_repair-0.53.1/src/json_repair/parse_string_helpers/parse_json_llm_block.py ADDED Viewed

@@ -0,0 +1,19 @@
+from typing import TYPE_CHECKING
+from ..utils.constants import JSONReturnType  # noqa: TID252
+if TYPE_CHECKING:
+    from ..json_parser import JSONParser  # noqa: TID252
+def parse_json_llm_block(parser: "JSONParser") -> JSONReturnType:
+    """
+    Extracts and normalizes JSON enclosed in ```json ... ``` blocks.
+    """
+    # Try to find a ```json ... ``` block
+    if parser.json_str[parser.index : parser.index + 7] == "```json":
+        i = parser.skip_to_character("`", idx=7)
+        if parser.json_str[parser.index + i : parser.index + i + 3] == "```":
+            parser.index += 7  # Move past ```json
+            return parser.parse_json()
+    return False

{json_repair-0.52.5 → json_repair-0.53.1/src/json_repair.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: json_repair
-Version: 0.52.5
+Version: 0.53.1
 Summary: A package to repair broken json strings
 Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
 License-Expression: MIT

{json_repair-0.52.5 → json_repair-0.53.1}/src/json_repair.egg-info/SOURCES.txt RENAMED Viewed

@@ -3,27 +3,27 @@ README.md
 pyproject.toml
 src/json_repair/__init__.py
 src/json_repair/__main__.py
-src/json_repair/constants.py
-src/json_repair/json_context.py
 src/json_repair/json_parser.py
 src/json_repair/json_repair.py
-src/json_repair/object_comparer.py
 src/json_repair/parse_array.py
-src/json_repair/parse_boolean_or_null.py
 src/json_repair/parse_comment.py
 src/json_repair/parse_number.py
 src/json_repair/parse_object.py
 src/json_repair/parse_string.py
 src/json_repair/py.typed
-src/json_repair/string_file_wrapper.py
 src/json_repair.egg-info/PKG-INFO
 src/json_repair.egg-info/SOURCES.txt
 src/json_repair.egg-info/dependency_links.txt
 src/json_repair.egg-info/entry_points.txt
 src/json_repair.egg-info/top_level.txt
+src/json_repair/parse_string_helpers/parse_boolean_or_null.py
+src/json_repair/parse_string_helpers/parse_json_llm_block.py
+src/json_repair/utils/constants.py
+src/json_repair/utils/json_context.py
+src/json_repair/utils/object_comparer.py
+src/json_repair/utils/string_file_wrapper.py
 tests/test_json_repair.py
 tests/test_parse_array.py
-tests/test_parse_boolean_or_null.py
 tests/test_parse_comment.py
 tests/test_parse_number.py
 tests/test_parse_object.py

{json_repair-0.52.5 → json_repair-0.53.1}/tests/test_parse_string.py RENAMED Viewed

@@ -100,3 +100,24 @@ def test_leading_trailing_characters():
                        ```""")
         == '{"key": "value"}'
     )
+def test_string_json_llm_block():
+    assert repair_json('{"key": "``"') == '{"key": "``"}'
+    assert repair_json('{"key": "```json"') == '{"key": "```json"}'
+    assert (
+        repair_json('{"key": "```json {"key": [{"key1": 1},{"key2": 2}]}```"}')
+        == '{"key": {"key": [{"key1": 1}, {"key2": 2}]}}'
+    )
+    assert repair_json('{"response": "```json{}"') == '{"response": "```json{}"}'
+def test_parse_boolean_or_null():
+    assert repair_json("True", return_objects=True) == ""
+    assert repair_json("False", return_objects=True) == ""
+    assert repair_json("Null", return_objects=True) == ""
+    assert repair_json("true", return_objects=True)
+    assert not repair_json("false", return_objects=True)
+    assert repair_json("null", return_objects=True) is None
+    assert repair_json('  {"key": true, "key2": false, "key3": null}') == '{"key": true, "key2": false, "key3": null}'
+    assert repair_json('{"key": TRUE, "key2": FALSE, "key3": Null}   ') == '{"key": true, "key2": false, "key3": null}'

{json_repair-0.52.5 → json_repair-0.53.1}/tests/test_repair_json_cli.py RENAMED Viewed

@@ -11,23 +11,23 @@ from src.json_repair.json_repair import cli
 def test_cli(capsys):
     # Create a temporary file
     temp_fd, temp_path = tempfile.mkstemp(suffix=".json")
+    _, tempout_path = tempfile.mkstemp(suffix=".json")
     try:
         # Write content to the temporary file
         with os.fdopen(temp_fd, "w") as tmp:
             tmp.write("{key:value")
-        cli(inline_args=[temp_path, "--indent", 0, "--ensure_ascii"])
+        cli(inline_args=[temp_path, "--indent", "0", "--ensure_ascii"])
         captured = capsys.readouterr()
         assert captured.out == '{\n"key": "value"\n}\n'
         # Test the output option
-        tempout_fd, tempout_path = tempfile.mkstemp(suffix=".json")
-        cli(inline_args=[temp_path, "--indent", 0, "-o", tempout_path])
+        cli(inline_args=[temp_path, "--indent", "0", "-o", tempout_path])
         with open(tempout_path) as tmp:
             out = tmp.read()
         assert out == '{\n"key": "value"\n}'
         # Test the inline option
-        cli(inline_args=[temp_path, "--indent", 0, "-i"])
+        cli(inline_args=[temp_path, "--indent", "0", "-i"])
         with open(temp_path) as tmp:
             out = tmp.read()
         assert out == '{\n"key": "value"\n}'
@@ -43,7 +43,7 @@ def test_cli(capsys):
     expected_output = '{\n"key": "value"\n}\n'
     # Patch sys.stdin so that cli() reads from it instead of a file.
     with patch("sys.stdin", new=io.StringIO(test_input)):
-        cli(inline_args=["--indent", 0])
+        cli(inline_args=["--indent", "0"])
     captured = capsys.readouterr()
     assert captured.out == expected_output

json_repair-0.52.5/src/json_repair/parse_boolean_or_null.py DELETED Viewed

@@ -1,30 +0,0 @@
-from typing import TYPE_CHECKING
-if TYPE_CHECKING:
-    from .json_parser import JSONParser
-def parse_boolean_or_null(self: "JSONParser") -> bool | str | None:
-    # <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
-    starting_index = self.index
-    char = (self.get_char_at() or "").lower()
-    value: tuple[str, bool | None] | None = None
-    if char == "t":
-        value = ("true", True)
-    elif char == "f":
-        value = ("false", False)
-    elif char == "n":
-        value = ("null", None)
-    if value:
-        i = 0
-        while char and i < len(value[0]) and char == value[0][i]:
-            i += 1
-            self.index += 1
-            char = (self.get_char_at() or "").lower()
-        if i == len(value[0]):
-            return value[1]
-    # If nothing works reset the index before returning
-    self.index = starting_index
-    return ""

json_repair-0.52.5/tests/test_parse_boolean_or_null.py DELETED Viewed

@@ -1,12 +0,0 @@
-from src.json_repair.json_repair import repair_json
-def test_parse_boolean_or_null():
-    assert repair_json("True", return_objects=True) == ""
-    assert repair_json("False", return_objects=True) == ""
-    assert repair_json("Null", return_objects=True) == ""
-    assert repair_json("true", return_objects=True)
-    assert not repair_json("false", return_objects=True)
-    assert repair_json("null", return_objects=True) is None
-    assert repair_json('  {"key": true, "key2": false, "key3": null}') == '{"key": true, "key2": false, "key3": null}'
-    assert repair_json('{"key": TRUE, "key2": FALSE, "key3": Null}   ') == '{"key": true, "key2": false, "key3": null}'

{json_repair-0.52.5 → json_repair-0.53.1}/LICENSE RENAMED Viewed

File without changes

{json_repair-0.52.5 → json_repair-0.53.1}/README.md RENAMED Viewed

File without changes

{json_repair-0.52.5 → json_repair-0.53.1}/setup.cfg RENAMED Viewed

File without changes

{json_repair-0.52.5 → json_repair-0.53.1}/src/json_repair/__main__.py RENAMED Viewed

File without changes

{json_repair-0.52.5 → json_repair-0.53.1}/src/json_repair/py.typed RENAMED Viewed

File without changes

{json_repair-0.52.5/src/json_repair → json_repair-0.53.1/src/json_repair/utils}/constants.py RENAMED Viewed

File without changes

{json_repair-0.52.5/src/json_repair → json_repair-0.53.1/src/json_repair/utils}/json_context.py RENAMED Viewed

File without changes

{json_repair-0.52.5/src/json_repair → json_repair-0.53.1/src/json_repair/utils}/object_comparer.py RENAMED Viewed

File without changes

{json_repair-0.52.5/src/json_repair → json_repair-0.53.1/src/json_repair/utils}/string_file_wrapper.py RENAMED Viewed

File without changes

{json_repair-0.52.5 → json_repair-0.53.1}/src/json_repair.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{json_repair-0.52.5 → json_repair-0.53.1}/src/json_repair.egg-info/entry_points.txt RENAMED Viewed

File without changes

{json_repair-0.52.5 → json_repair-0.53.1}/src/json_repair.egg-info/top_level.txt RENAMED Viewed

File without changes

{json_repair-0.52.5 → json_repair-0.53.1}/tests/test_json_repair.py RENAMED Viewed

File without changes

{json_repair-0.52.5 → json_repair-0.53.1}/tests/test_parse_array.py RENAMED Viewed

File without changes

{json_repair-0.52.5 → json_repair-0.53.1}/tests/test_parse_comment.py RENAMED Viewed

File without changes

{json_repair-0.52.5 → json_repair-0.53.1}/tests/test_parse_number.py RENAMED Viewed

File without changes

{json_repair-0.52.5 → json_repair-0.53.1}/tests/test_parse_object.py RENAMED Viewed

File without changes

{json_repair-0.52.5 → json_repair-0.53.1}/tests/test_performance.py RENAMED Viewed

File without changes

{json_repair-0.52.5 → json_repair-0.53.1}/tests/test_repair_json_from_file.py RENAMED Viewed

File without changes

json-repair 0.52.5__tar.gz → 0.53.1__tar.gz

json-repair 0.52.5tar.gz → 0.53.1tar.gz