PyPI - json-repair - Versions diffs - 0.53.0__py3-none-any.whl → 0.54__py3-none-any.whl - Mend

json-repair 0.53.0py3-none-any.whl → 0.54py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

json_repair/__init__.py +1 -1
json_repair/json_parser.py +46 -34
json_repair/json_repair.py +27 -9
json_repair/parse_array.py +8 -8
json_repair/parse_comment.py +2 -2
json_repair/parse_number.py +3 -2
json_repair/parse_object.py +51 -28
json_repair/parse_string.py +56 -55
json_repair/parse_string_helpers/parse_boolean_or_null.py +28 -0
json_repair/parse_string_helpers/parse_json_llm_block.py +7 -7
json_repair/utils/string_file_wrapper.py +176 -0
{json_repair-0.53.0.dist-info → json_repair-0.54.dist-info}/METADATA +19 -1
json_repair-0.54.dist-info/RECORD +22 -0
json_repair/parse_boolean_or_null.py +0 -30
json_repair/string_file_wrapper.py +0 -108
json_repair-0.53.0.dist-info/RECORD +0 -22
/json_repair/{constants.py → utils/constants.py} +0 -0
/json_repair/{json_context.py → utils/json_context.py} +0 -0
/json_repair/{object_comparer.py → utils/object_comparer.py} +0 -0
{json_repair-0.53.0.dist-info → json_repair-0.54.dist-info}/WHEEL +0 -0
{json_repair-0.53.0.dist-info → json_repair-0.54.dist-info}/entry_points.txt +0 -0
{json_repair-0.53.0.dist-info → json_repair-0.54.dist-info}/licenses/LICENSE +0 -0
{json_repair-0.53.0.dist-info → json_repair-0.54.dist-info}/top_level.txt +0 -0

json_repair/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from .constants import JSONReturnType
 from .json_repair import from_file, load, loads, repair_json
+from .utils.constants import JSONReturnType
 __all__ = ["from_file", "load", "loads", "repair_json", "JSONReturnType"]

json_repair/json_parser.py CHANGED Viewed

@@ -1,36 +1,32 @@
-from typing import Literal, TextIO
+from typing import TextIO
-from .constants import STRING_DELIMITERS, JSONReturnType
-from .json_context import JsonContext
-from .object_comparer import ObjectComparer
 from .parse_array import parse_array as _parse_array
-from .parse_boolean_or_null import parse_boolean_or_null as _parse_boolean_or_null
 from .parse_comment import parse_comment as _parse_comment
 from .parse_number import parse_number as _parse_number
 from .parse_object import parse_object as _parse_object
 from .parse_string import parse_string as _parse_string
-from .string_file_wrapper import StringFileWrapper
+from .utils.constants import STRING_DELIMITERS, JSONReturnType
+from .utils.json_context import JsonContext
+from .utils.object_comparer import ObjectComparer
+from .utils.string_file_wrapper import StringFileWrapper
 class JSONParser:
     # Split the parse methods into separate files because this one was like 3000 lines
-    def parse_array(self, *args, **kwargs):
-        return _parse_array(self, *args, **kwargs)
+    def parse_array(self) -> list[JSONReturnType]:
+        return _parse_array(self)
-    def parse_boolean_or_null(self, *args, **kwargs):
-        return _parse_boolean_or_null(self, *args, **kwargs)
+    def parse_comment(self) -> JSONReturnType:
+        return _parse_comment(self)
-    def parse_comment(self, *args, **kwargs):
-        return _parse_comment(self, *args, **kwargs)
+    def parse_number(self) -> JSONReturnType:
+        return _parse_number(self)
-    def parse_number(self, *args, **kwargs):
-        return _parse_number(self, *args, **kwargs)
+    def parse_object(self) -> JSONReturnType:
+        return _parse_object(self)
-    def parse_object(self, *args, **kwargs):
-        return _parse_object(self, *args, **kwargs)
-    def parse_string(self, *args, **kwargs):
-        return _parse_string(self, *args, **kwargs)
+    def parse_string(self) -> JSONReturnType:
+        return _parse_string(self)
     def __init__(
         self,
@@ -39,6 +35,7 @@ class JSONParser:
         logging: bool | None,
         json_fd_chunk_length: int = 0,
         stream_stable: bool = False,
+        strict: bool = False,
     ) -> None:
         # The string to parse
         self.json_str: str | StringFileWrapper = json_str
@@ -70,6 +67,10 @@ class JSONParser:
         #   case 3:  '{"key": "val\\n123,`key2:value2' => '{"key": "val\\n123,`key2:value2"}'
         #   case 4:  '{"key": "val\\n123,`key2:value2`"}' => '{"key": "val\\n123,`key2:value2`"}'
         self.stream_stable = stream_stable
+        # Over time the library got more and more complex heuristics to repair JSON. Some of these heuristics
+        # may not be desirable in some use cases and the user would prefer json_repair to return an exception.
+        # So strict mode was added to disable some of those heuristics.
+        self.strict = strict
     def parse(
         self,
@@ -97,6 +98,11 @@ class JSONParser:
                     "There were no more elements, returning the element without the array",
                 )
                 json = json[0]
+            elif self.strict:
+                self.log(
+                    "Multiple top-level JSON elements found in strict mode, raising an error",
+                )
+                raise ValueError("Multiple top-level JSON elements found in strict mode.")
         if self.logging:
             return json, self.logger
         else:
@@ -107,8 +113,8 @@ class JSONParser:
     ) -> JSONReturnType:
         while True:
             char = self.get_char_at()
-            # False means that we are at the end of the string provided
-            if char is False:
+            # None means that we are at the end of the string provided
+            if char is None:
                 return ""
             # <object> starts with '{'
             elif char == "{":
@@ -130,30 +136,36 @@ class JSONParser:
             else:
                 self.index += 1
-    def get_char_at(self, count: int = 0) -> str | Literal[False]:
+    def get_char_at(self, count: int = 0) -> str | None:
         # Why not use something simpler? Because try/except in python is a faster alternative to an "if" statement that is often True
         try:
             return self.json_str[self.index + count]
         except IndexError:
-            return False
+            return None
-    def skip_whitespaces_at(self, idx: int = 0, move_main_index=True) -> int:
+    def skip_whitespaces(self) -> None:
         """
-        This function quickly iterates on whitespaces, syntactic sugar to make the code more concise
+        This function quickly iterates on whitespaces, moving the self.index forward
         """
         try:
-            char = self.json_str[self.index + idx]
-        except IndexError:
-            return idx
-        while char.isspace():
-            if move_main_index:
+            char = self.json_str[self.index]
+            while char.isspace():
                 self.index += 1
-            else:
+                char = self.json_str[self.index]
+        except IndexError:
+            pass
+    def scroll_whitespaces(self, idx: int = 0) -> int:
+        """
+        This function quickly iterates on whitespaces. Doesn't move the self.index and returns the offset from self.index
+        """
+        try:
+            char = self.json_str[self.index + idx]
+            while char.isspace():
                 idx += 1
-            try:
                 char = self.json_str[self.index + idx]
-            except IndexError:
-                return idx
+        except IndexError:
+            pass
         return idx
     def skip_to_character(self, character: str | list[str], idx: int = 0) -> int:

json_repair/json_repair.py CHANGED Viewed

@@ -25,10 +25,10 @@ All supported use cases are in the unit tests
 import argparse
 import json
 import sys
-from typing import Literal, TextIO, overload
+from typing import Any, Literal, TextIO, overload
-from .constants import JSONReturnType
 from .json_parser import JSONParser
+from .utils.constants import JSONReturnType
 @overload
@@ -40,7 +40,8 @@ def repair_json(
     json_fd: TextIO | None = None,
     chunk_length: int = 0,
     stream_stable: bool = False,
-    **json_dumps_args,
+    strict: bool = False,
+    **json_dumps_args: Any,
 ) -> str: ...
@@ -53,7 +54,8 @@ def repair_json(
     json_fd: TextIO | None = None,
     chunk_length: int = 0,
     stream_stable: bool = False,
-    **json_dumps_args,
+    strict: bool = False,
+    **json_dumps_args: Any,
 ) -> JSONReturnType | tuple[JSONReturnType, list[dict[str, str]]]: ...
@@ -65,8 +67,9 @@ def repair_json(
     json_fd: TextIO | None = None,
     chunk_length: int = 0,
     stream_stable: bool = False,
-    **json_dumps_args,
-) -> JSONReturnType | tuple[JSONReturnType, list[dict[str, str]]] | tuple[JSONReturnType, list]:
+    strict: bool = False,
+    **json_dumps_args: Any,
+) -> JSONReturnType | tuple[JSONReturnType, list[dict[str, str]]]:
     """
     Given a json formatted string, it will try to decode it and, if it fails, it will try to fix it.
@@ -79,10 +82,11 @@ def repair_json(
         ensure_ascii (bool, optional): Set to False to avoid converting non-latin characters to ascii (for example when using chinese characters). Defaults to True. Ignored if `skip_json_loads` is True.
         chunk_length (int, optional): Size in bytes of the file chunks to read at once. Ignored if `json_fd` is None. Do not use! Use `from_file` or `load` instead. Defaults to 1MB.
         stream_stable (bool, optional): When the json to be repaired is the accumulation of streaming json at a certain moment.If this parameter to True will keep the repair results stable.
+        strict (bool, optional): If True, surface structural problems (duplicate keys, missing separators, empty keys/values, etc.) as ValueError instead of repairing them.
     Returns:
         Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: The repaired JSON or a tuple with the repaired JSON and repair log when logging is True.
     """
-    parser = JSONParser(json_str, json_fd, logging, chunk_length, stream_stable)
+    parser = JSONParser(json_str, json_fd, logging, chunk_length, stream_stable, strict)
     if skip_json_loads:
         parsed_json = parser.parse()
     else:
@@ -109,6 +113,7 @@ def loads(
     skip_json_loads: bool = False,
     logging: bool = False,
     stream_stable: bool = False,
+    strict: bool = False,
 ) -> JSONReturnType | tuple[JSONReturnType, list[dict[str, str]]] | str:
     """
     This function works like `json.loads()` except that it will fix your JSON in the process.
@@ -118,6 +123,7 @@ def loads(
         json_str (str): The JSON string to load and repair.
         skip_json_loads (bool, optional): If True, skip calling the built-in json.loads() function to verify that the json is valid before attempting to repair. Defaults to False.
         logging (bool, optional): If True, return a tuple with the repaired json and a log of all repair actions. Defaults to False.
+        strict (bool, optional): If True, surface structural problems (duplicate keys, missing separators, empty keys/values, etc.) as ValueError instead of repairing them.
     Returns:
         Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]], str]: The repaired JSON object or a tuple with the repaired JSON object and repair log.
@@ -128,6 +134,7 @@ def loads(
         skip_json_loads=skip_json_loads,
         logging=logging,
         stream_stable=stream_stable,
+        strict=strict,
     )
@@ -136,6 +143,7 @@ def load(
     skip_json_loads: bool = False,
     logging: bool = False,
     chunk_length: int = 0,
+    strict: bool = False,
 ) -> JSONReturnType | tuple[JSONReturnType, list[dict[str, str]]]:
     """
     This function works like `json.load()` except that it will fix your JSON in the process.
@@ -146,6 +154,7 @@ def load(
         skip_json_loads (bool, optional): If True, skip calling the built-in json.loads() function to verify that the json is valid before attempting to repair. Defaults to False.
         logging (bool, optional): If True, return a tuple with the repaired json and a log of all repair actions. Defaults to False.
         chunk_length (int, optional): Size in bytes of the file chunks to read at once. Defaults to 1MB.
+        strict (bool, optional): If True, surface structural problems (duplicate keys, missing separators, empty keys/values, etc.) as ValueError instead of repairing them.
     Returns:
         Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: The repaired JSON object or a tuple with the repaired JSON object and repair log.
@@ -156,6 +165,7 @@ def load(
         return_objects=True,
         skip_json_loads=skip_json_loads,
         logging=logging,
+        strict=strict,
     )
@@ -164,6 +174,7 @@ def from_file(
     skip_json_loads: bool = False,
     logging: bool = False,
     chunk_length: int = 0,
+    strict: bool = False,
 ) -> JSONReturnType | tuple[JSONReturnType, list[dict[str, str]]]:
     """
     This function is a wrapper around `load()` so you can pass the filename as string
@@ -173,6 +184,7 @@ def from_file(
         skip_json_loads (bool, optional): If True, skip calling the built-in json.loads() function to verify that the json is valid before attempting to repair. Defaults to False.
         logging (bool, optional): If True, return a tuple with the repaired json and a log of all repair actions. Defaults to False.
         chunk_length (int, optional): Size in bytes of the file chunks to read at once. Defaults to 1MB.
+        strict (bool, optional): If True, surface structural problems (duplicate keys, missing separators, empty keys/values, etc.) as ValueError instead of repairing them.
     Returns:
         Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: The repaired JSON object or a tuple with the repaired JSON object and repair log.
@@ -183,6 +195,7 @@ def from_file(
             skip_json_loads=skip_json_loads,
             logging=logging,
             chunk_length=chunk_length,
+            strict=strict,
         )
     return jsonobj
@@ -240,6 +253,11 @@ def cli(inline_args: list[str] | None = None) -> int:
         default=2,
         help="Number of spaces for indentation (Default 2)",
     )
+    parser.add_argument(
+        "--strict",
+        action="store_true",
+        help="Raise on duplicate keys, missing separators, empty keys/values, and other unrecoverable structures instead of repairing them",
+    )
     args = parser.parse_args() if inline_args is None else parser.parse_args(inline_args)
@@ -259,10 +277,10 @@ def cli(inline_args: list[str] | None = None) -> int:
     try:
         # Use from_file if a filename is provided; otherwise read from stdin.
         if args.filename:
-            result = from_file(args.filename)
+            result = from_file(args.filename, strict=args.strict)
         else:
             data = sys.stdin.read()
-            result = loads(data)
+            result = loads(data, strict=args.strict)
         if args.inline or args.output:
             with open(args.output or args.filename, mode="w") as fd:
                 json.dump(result, fd, indent=args.indent, ensure_ascii=ensure_ascii)

json_repair/parse_array.py CHANGED Viewed

@@ -1,8 +1,8 @@
 from typing import TYPE_CHECKING
-from .constants import STRING_DELIMITERS, JSONReturnType
-from .json_context import ContextValues
-from .object_comparer import ObjectComparer
+from .utils.constants import STRING_DELIMITERS, JSONReturnType
+from .utils.json_context import ContextValues
+from .utils.object_comparer import ObjectComparer
 if TYPE_CHECKING:
     from .json_parser import JSONParser
@@ -15,7 +15,7 @@ def parse_array(self: "JSONParser") -> list[JSONReturnType]:
     # Stop when you either find the closing parentheses or you have iterated over the entire string
     char = self.get_char_at()
     while char and char not in ["]", "}"]:
-        self.skip_whitespaces_at()
+        self.skip_whitespaces()
         value: JSONReturnType = ""
         if char in STRING_DELIMITERS:
             # Sometimes it can happen that LLMs forget to start an object and then you think it's a string in an array
@@ -23,13 +23,13 @@ def parse_array(self: "JSONParser") -> list[JSONReturnType]:
             # And either parse the string or parse the object
             i = 1
             i = self.skip_to_character(char, i)
-            i = self.skip_whitespaces_at(idx=i + 1, move_main_index=False)
+            i = self.scroll_whitespaces(idx=i + 1)
             value = self.parse_object() if self.get_char_at(i) == ":" else self.parse_string()
         else:
             value = self.parse_json()
-        # It is possible that parse_json() returns nothing valid, so we increase by 1
-        if ObjectComparer.is_strictly_empty(value):
+        # It is possible that parse_json() returns nothing valid, so we increase by 1, unless we find an array separator
+        if ObjectComparer.is_strictly_empty(value) and self.get_char_at() not in ["]", ","]:
             self.index += 1
         elif value == "..." and self.get_char_at(-1) == ".":
             self.log(
@@ -45,7 +45,7 @@ def parse_array(self: "JSONParser") -> list[JSONReturnType]:
             char = self.get_char_at()
     # Especially at the end of an LLM generated json you might miss the last "]"
-    if char and char != "]":
+    if char != "]":
         self.log(
             "While parsing an array we missed the closing ], ignoring it",
         )

json_repair/parse_comment.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from typing import TYPE_CHECKING
-from .constants import JSONReturnType
-from .json_context import ContextValues
+from .utils.constants import JSONReturnType
+from .utils.json_context import ContextValues
 if TYPE_CHECKING:
     from .json_parser import JSONParser

json_repair/parse_number.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from typing import TYPE_CHECKING
-from .json_context import ContextValues
+from .utils.constants import JSONReturnType
+from .utils.json_context import ContextValues
 NUMBER_CHARS: set[str] = set("0123456789-.eE/,")
@@ -9,7 +10,7 @@ if TYPE_CHECKING:
     from .json_parser import JSONParser
-def parse_number(self: "JSONParser") -> float | int | str | bool | None:
+def parse_number(self: "JSONParser") -> JSONReturnType:
     # <number> is a valid real number expressed in one of a number of given formats
     number_str = ""
     char = self.get_char_at()

json_repair/parse_object.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from typing import TYPE_CHECKING
-from .constants import STRING_DELIMITERS, JSONReturnType
-from .json_context import ContextValues
+from .utils.constants import STRING_DELIMITERS, JSONReturnType
+from .utils.json_context import ContextValues
 if TYPE_CHECKING:
     from .json_parser import JSONParser
@@ -17,10 +17,10 @@ def parse_object(self: "JSONParser") -> JSONReturnType:
         # <member> ::= <string> ': ' <json>
         # Skip filler whitespaces
-        self.skip_whitespaces_at()
+        self.skip_whitespaces()
         # Sometimes LLMs do weird things, if we find a ":" so early, we'll change it to "," and move on
-        if (self.get_char_at() or "") == ":":
+        if self.get_char_at() == ":":
             self.log(
                 "While parsing an object we found a : before a key, ignoring",
             )
@@ -53,18 +53,26 @@ def parse_object(self: "JSONParser") -> JSONReturnType:
                             prev_value.extend(
                                 new_array[0] if len(new_array) == 1 and isinstance(new_array[0], list) else new_array
                             )
-                        self.skip_whitespaces_at()
+                        self.skip_whitespaces()
                         if self.get_char_at() == ",":
                             self.index += 1
-                        self.skip_whitespaces_at()
+                        self.skip_whitespaces()
                         continue
             key = str(self.parse_string())
             if key == "":
-                self.skip_whitespaces_at()
+                self.skip_whitespaces()
             if key != "" or (key == "" and self.get_char_at() in [":", "}"]):
-                # If the string is empty but there is a object divider, we are done here
+                # Empty keys now trigger in strict mode, otherwise we keep repairing as before
+                if key == "" and self.strict:
+                    self.log(
+                        "Empty key found in strict mode while parsing object, raising an error",
+                    )
+                    raise ValueError("Empty key found in strict mode while parsing object.")
                 break
         if ContextValues.ARRAY in self.context.context and key in obj:
+            if self.strict:
+                self.log("Duplicate key found in strict mode while parsing object, raising an error")
+                raise ValueError("Duplicate key found in strict mode while parsing object.")
             self.log(
                 "While parsing an object we found a duplicate key, closing the object here and rolling back the index",
             )
@@ -74,16 +82,21 @@ def parse_object(self: "JSONParser") -> JSONReturnType:
             break
         # Skip filler whitespaces
-        self.skip_whitespaces_at()
+        self.skip_whitespaces()
         # We reached the end here
         if (self.get_char_at() or "}") == "}":
             continue
-        self.skip_whitespaces_at()
+        self.skip_whitespaces()
         # An extreme case of missing ":" after a key
-        if (self.get_char_at() or "") != ":":
+        if self.get_char_at() != ":":
+            if self.strict:
+                self.log(
+                    "Missing ':' after key in strict mode while parsing object, raising an error",
+                )
+                raise ValueError("Missing ':' after key in strict mode while parsing object.")
             self.log(
                 "While parsing an object we missed a : after a key",
             )
@@ -91,31 +104,40 @@ def parse_object(self: "JSONParser") -> JSONReturnType:
         self.index += 1
         self.context.reset()
         self.context.set(ContextValues.OBJECT_VALUE)
-        # The value can be any valid json
-        self.skip_whitespaces_at()
+        # The value can be any valid json; strict mode will refuse repaired empties
+        self.skip_whitespaces()
         # Corner case, a lone comma
         value: JSONReturnType = ""
-        if (self.get_char_at() or "") in [",", "}"]:
+        if self.get_char_at() in [",", "}"]:
             self.log(
-                "While parsing an object value we found a stray , ignoring it",
+                "While parsing an object value we found a stray " + str(self.get_char_at()) + ", ignoring it",
             )
         else:
             value = self.parse_json()
+        if value == "" and self.strict and self.get_char_at(-1) not in STRING_DELIMITERS:
+            self.log(
+                "Parsed value is empty in strict mode while parsing object, raising an error",
+            )
+            raise ValueError("Parsed value is empty in strict mode while parsing object.")
         # Reset context since our job is done
         self.context.reset()
         obj[key] = value
-        if (self.get_char_at() or "") in [",", "'", '"']:
+        if self.get_char_at() in [",", "'", '"']:
             self.index += 1
         # Remove trailing spaces
-        self.skip_whitespaces_at()
+        self.skip_whitespaces()
     self.index += 1
     # If the object is empty but also isn't just {}
     if not obj and self.index - start_index > 2:
+        if self.strict:
+            self.log(
+                "Parsed object is empty but contains extra characters in strict mode, raising an error",
+            )
+            raise ValueError("Parsed object is empty but contains extra characters in strict mode.")
         self.log("Parsed object is empty, we will try to parse this as an array instead")
         self.index = start_index
         return self.parse_array()
@@ -126,18 +148,19 @@ def parse_object(self: "JSONParser") -> JSONReturnType:
     if not self.context.empty:
         return obj
-    self.skip_whitespaces_at()
-    if (self.get_char_at() or "") != ",":
+    self.skip_whitespaces()
+    if self.get_char_at() != ",":
         return obj
     self.index += 1
-    self.skip_whitespaces_at()
-    if (self.get_char_at() or "") not in STRING_DELIMITERS:
+    self.skip_whitespaces()
+    if self.get_char_at() not in STRING_DELIMITERS:
         return obj
-    self.log(
-        "Found a comma and string delimiter after object closing brace, checking for additional key-value pairs",
-    )
-    additional_obj = self.parse_object()
-    if isinstance(additional_obj, dict):
-        obj.update(additional_obj)
+    if not self.strict:
+        self.log(
+            "Found a comma and string delimiter after object closing brace, checking for additional key-value pairs",
+        )
+        additional_obj = self.parse_object()
+        if isinstance(additional_obj, dict):
+            obj.update(additional_obj)
     return obj

json_repair/parse_string.py CHANGED Viewed

@@ -1,14 +1,22 @@
 from typing import TYPE_CHECKING
-from .constants import STRING_DELIMITERS, JSONReturnType
-from .json_context import ContextValues
+from .parse_string_helpers.parse_boolean_or_null import parse_boolean_or_null
 from .parse_string_helpers.parse_json_llm_block import parse_json_llm_block
+from .utils.constants import STRING_DELIMITERS, JSONReturnType
+from .utils.json_context import ContextValues
 if TYPE_CHECKING:
     from .json_parser import JSONParser
 def parse_string(self: "JSONParser") -> JSONReturnType:
+    # Utility function to append a character to the accumulator and update the index
+    def _append_literal_char(acc: str, current_char: str | None) -> tuple[str, str | None]:
+        acc += str(current_char)
+        self.index += 1
+        char = self.get_char_at()
+        return acc, char
     # <string> is a string of valid characters enclosed in quotes
     # i.e. { name: "John" }
     # Somehow all weird cases in an invalid JSON happen to be resolved in this function, so be careful here
@@ -40,7 +48,7 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
         # This could be a <boolean> and not a string. Because (T)rue or (F)alse or (N)ull are valid
         # But remember, object keys are only of type string
         if char.lower() in ["t", "f", "n"] and self.context.current != ContextValues.OBJECT_KEY:
-            value = self.parse_boolean_or_null()
+            value = parse_boolean_or_null(self)
             if value != "":
                 return value
         self.log(
@@ -59,10 +67,12 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
             "While parsing a string, we found code fences but they did not enclose valid JSON, continuing parsing the string",
         )
     # There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
-    if self.get_char_at() in STRING_DELIMITERS and self.get_char_at() == lstring_delimiter:
+    if self.get_char_at() == lstring_delimiter:
         # If it's an empty key, this was easy
-        if (self.context.current == ContextValues.OBJECT_KEY and self.get_char_at(1) == ":") or (
-            self.context.current == ContextValues.OBJECT_VALUE and self.get_char_at(1) in [",", "}"]
+        if (
+            (self.context.current == ContextValues.OBJECT_KEY and self.get_char_at(1) == ":")
+            or (self.context.current == ContextValues.OBJECT_VALUE and self.get_char_at(1) in [",", "}"])
+            or (self.context.current == ContextValues.ARRAY and self.get_char_at(1) in [",", "]"])
         ):
             self.index += 1
             return ""
@@ -71,13 +81,16 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
             self.log(
                 "While parsing a string, we found a doubled quote and then a quote again, ignoring it",
             )
-            return ""
+            if self.strict:
+                raise ValueError("Found doubled quotes followed by another quote.")
+            else:
+                return ""
         # Find the next delimiter
         i = self.skip_to_character(character=rstring_delimiter, idx=1)
         next_c = self.get_char_at(i)
         # Now check that the next character is also a delimiter to ensure that we have "".....""
         # In that case we ignore this rstring delimiter
-        if next_c and (self.get_char_at(i + 1) or "") == rstring_delimiter:
+        if self.get_char_at(i + 1) == rstring_delimiter:
             self.log(
                 "While parsing a string, we found a valid starting doubled quote",
             )
@@ -85,13 +98,17 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
             self.index += 1
         else:
             # Ok this is not a doubled quote, check if this is an empty string or not
-            i = self.skip_whitespaces_at(idx=1, move_main_index=False)
+            i = self.scroll_whitespaces(idx=1)
             next_c = self.get_char_at(i)
             if next_c in STRING_DELIMITERS + ["{", "["]:
                 # something fishy is going on here
                 self.log(
                     "While parsing a string, we found a doubled quote but also another quote afterwards, ignoring it",
                 )
+                if self.strict:
+                    raise ValueError(
+                        "Found doubled quotes followed by another quote while parsing a string.",
+                    )
                 self.index += 1
                 return ""
             elif next_c not in [",", "]", "}"]:
@@ -135,7 +152,7 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
         ):
             rstring_delimiter_missing = True
             # check if this is a case in which the closing comma is NOT missing instead
-            self.skip_whitespaces_at()
+            self.skip_whitespaces()
             if self.get_char_at(1) == "\\":
                 # Ok this is a quoted string, skip
                 rstring_delimiter_missing = False
@@ -145,7 +162,7 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
                 i += 1
                 # found a delimiter, now we need to check that is followed strictly by a comma or brace
                 # or the string ended
-                i = self.skip_whitespaces_at(idx=i, move_main_index=False)
+                i = self.scroll_whitespaces(idx=i)
                 next_c = self.get_char_at(i)
                 if not next_c or next_c in [",", "}"]:
                     rstring_delimiter_missing = False
@@ -160,7 +177,7 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
                     else:
                         # But again, this could just be something a bit stupid like "lorem, "ipsum" sic"
                         # Check if we find a : afterwards (skipping space)
-                        i = self.skip_whitespaces_at(idx=i + 1, move_main_index=False)
+                        i = self.scroll_whitespaces(idx=i + 1)
                         next_c = self.get_char_at(i)
                         if next_c and next_c != ":":
                             rstring_delimiter_missing = False
@@ -175,7 +192,7 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
                     break
                 else:
                     # skip any whitespace first
-                    i = self.skip_whitespaces_at(idx=1, move_main_index=False)
+                    i = self.scroll_whitespaces(idx=1)
                     # We couldn't find any rstring_delimeter before the end of the string
                     # check if this is the last string of an object and therefore we can keep going
                     # make an exception if this is the last char before the closing brace
@@ -212,19 +229,15 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
         if self.context.current == ContextValues.OBJECT_VALUE and char == "}":
             # We found the end of an object while parsing a value
             # Check if the object is really over, to avoid doubling the closing brace
-            i = self.skip_whitespaces_at(idx=1, move_main_index=False)
+            i = self.scroll_whitespaces(idx=1)
             next_c = self.get_char_at(i)
-            if next_c and next_c == "`":
+            if next_c == "`" and self.get_char_at(i + 1) == "`" and self.get_char_at(i + 2) == "`":
                 # This could be a special case in which the LLM added code fences after the object
                 # So we need to check if there are another two ` after this one`
-                next_c = self.get_char_at(i + 1)
-                if next_c and next_c == "`":
-                    next_c = self.get_char_at(i + 2)
-                    if next_c and next_c == "`":
-                        self.log(
-                            "While parsing a string in object value context, we found a } that closes the object before code fences, stopping here",
-                        )
-                        break
+                self.log(
+                    "While parsing a string in object value context, we found a } that closes the object before code fences, stopping here",
+                )
+                break
             if not next_c:
                 self.log(
                     "While parsing a string in object value context, we found a } that closes the object, stopping here",
@@ -282,12 +295,13 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
                     # found a second delimiter
                     i += 1
                     # Skip spaces
-                    i = self.skip_whitespaces_at(idx=i, move_main_index=False)
-                    next_c = self.get_char_at(i)
-                    if next_c and next_c in [",", "}"]:
+                    i = self.scroll_whitespaces(idx=i)
+                    if self.get_char_at(i) in [",", "}"]:
                         # Ok then this is a missing right quote
                         self.log(
-                            "While parsing a string missing the right delimiter in object key context, we found a :, stopping here",
+                            "While parsing a string missing the right delimiter in object key context, we found a "
+                            + str(self.get_char_at(i))
+                            + " stopping here",
                         )
                         break
             else:
@@ -316,9 +330,8 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
                     # We found a quote, now let's make sure there's a ":" following
                     i += 1
                     # found a delimiter, now we need to check that is followed strictly by a comma or brace
-                    i = self.skip_whitespaces_at(idx=i, move_main_index=False)
-                    next_c = self.get_char_at(i)
-                    if next_c and next_c == ":":
+                    i = self.scroll_whitespaces(idx=i)
+                    if self.get_char_at(i) == ":":
                         # Reset the cursor
                         self.index -= 1
                         char = self.get_char_at()
@@ -328,9 +341,7 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
                         break
             elif unmatched_delimiter:
                 unmatched_delimiter = False
-                string_acc += str(char)
-                self.index += 1
-                char = self.get_char_at()
+                string_acc, char = _append_literal_char(string_acc, char)
             else:
                 # Check if eventually there is a rstring delimiter, otherwise we bail
                 i = 1
@@ -365,22 +376,20 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
                     next_c = self.get_char_at(i)
                     # Ok now I found a delimiter, let's skip whitespaces and see if next we find a } or a ,
                     i += 1
-                    i = self.skip_whitespaces_at(idx=i, move_main_index=False)
+                    i = self.scroll_whitespaces(idx=i)
                     next_c = self.get_char_at(i)
                     if next_c in ["}", ","]:
                         self.log(
-                            "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
+                            "While parsing a string, we found a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
                         )
-                        string_acc += str(char)
-                        self.index += 1
-                        char = self.get_char_at()
+                        string_acc, char = _append_literal_char(string_acc, char)
                         continue
                 elif next_c == rstring_delimiter and self.get_char_at(i - 1) != "\\":
                     # Check if self.index:self.index+i is only whitespaces, break if that's the case
                     if all(str(self.get_char_at(j)).isspace() for j in range(1, i) if self.get_char_at(j)):
                         break
                     if self.context.current == ContextValues.OBJECT_VALUE:
-                        i = self.skip_whitespaces_at(idx=i + 1, move_main_index=False)
+                        i = self.scroll_whitespaces(idx=i + 1)
                         if self.get_char_at(i) == ",":
                             # So we found a comma, this could be a case of a single quote like "va"lue",
                             # Search if it's followed by another key, starting with the first delimeter
@@ -388,15 +397,13 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
                             i += 1
                             i = self.skip_to_character(character=rstring_delimiter, idx=i + 1)
                             i += 1
-                            i = self.skip_whitespaces_at(idx=i, move_main_index=False)
+                            i = self.scroll_whitespaces(idx=i)
                             next_c = self.get_char_at(i)
                             if next_c == ":":
                                 self.log(
-                                    "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
+                                    "While parsing a string, we found a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
                                 )
-                                string_acc += str(char)
-                                self.index += 1
-                                char = self.get_char_at()
+                                string_acc, char = _append_literal_char(string_acc, char)
                                 continue
                         # We found a delimiter and we need to check if this is a key
                         # so find a rstring_delimiter and a colon after
@@ -413,12 +420,10 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
                         # Only if we fail to find a ':' then we know this is misplaced quote
                         if next_c != ":":
                             self.log(
-                                "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
+                                "While parsing a string, we found a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
                             )
                             unmatched_delimiter = not unmatched_delimiter
-                            string_acc += str(char)
-                            self.index += 1
-                            char = self.get_char_at()
+                            string_acc, char = _append_literal_char(string_acc, char)
                     elif self.context.current == ContextValues.ARRAY:
                         # So here we can have a few valid cases:
                         # ["bla bla bla "puppy" bla bla bla "kitty" bla bla"]
@@ -442,9 +447,7 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
                                 "While parsing a string in Array context, we detected a quoted section that would have closed the string but has a different meaning here, ignoring it",
                             )
                             unmatched_delimiter = not unmatched_delimiter
-                            string_acc += str(char)
-                            self.index += 1
-                            char = self.get_char_at()
+                            string_acc, char = _append_literal_char(string_acc, char)
                         else:
                             break
                     elif self.context.current == ContextValues.OBJECT_KEY:
@@ -452,14 +455,12 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
                         self.log(
                             "While parsing a string in Object Key context, we detected a quoted section that would have closed the string but has a different meaning here, ignoring it",
                         )
-                        string_acc += str(char)
-                        self.index += 1
-                        char = self.get_char_at()
+                        string_acc, char = _append_literal_char(string_acc, char)
     if char and missing_quotes and self.context.current == ContextValues.OBJECT_KEY and char.isspace():
         self.log(
             "While parsing a string, handling an extreme corner case in which the LLM added a comment instead of valid string, invalidate the string and return an empty value",
         )
-        self.skip_whitespaces_at()
+        self.skip_whitespaces()
         if self.get_char_at() not in [":", ","]:
             return ""

json_repair/parse_string_helpers/parse_boolean_or_null.py ADDED Viewed

@@ -0,0 +1,28 @@
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from ..json_parser import JSONParser  # noqa: TID252
+def parse_boolean_or_null(parser: "JSONParser") -> bool | str | None:
+    # <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
+    char = (parser.get_char_at() or "").lower()
+    value_map: dict[str, tuple[str, bool | None]] = {
+        "t": ("true", True),
+        "f": ("false", False),
+        "n": ("null", None),
+    }
+    value: tuple[str, bool | None] = value_map[char]
+    i = 0
+    starting_index = parser.index
+    while char and i < len(value[0]) and char == value[0][i]:
+        i += 1
+        parser.index += 1
+        char = (parser.get_char_at() or "").lower()
+    if i == len(value[0]):
+        return value[1]
+    # If nothing works reset the index before returning
+    parser.index = starting_index
+    return ""

json_repair/parse_string_helpers/parse_json_llm_block.py CHANGED Viewed

@@ -1,19 +1,19 @@
 from typing import TYPE_CHECKING
-from ..constants import JSONReturnType  # noqa: TID252
+from ..utils.constants import JSONReturnType  # noqa: TID252
 if TYPE_CHECKING:
     from ..json_parser import JSONParser  # noqa: TID252
-def parse_json_llm_block(self: "JSONParser") -> JSONReturnType:
+def parse_json_llm_block(parser: "JSONParser") -> JSONReturnType:
     """
     Extracts and normalizes JSON enclosed in ```json ... ``` blocks.
     """
     # Try to find a ```json ... ``` block
-    if self.json_str[self.index : self.index + 7] == "```json":
-        i = self.skip_to_character("`", idx=7)
-        if self.json_str[self.index + i : self.index + i + 3] == "```":
-            self.index += 7  # Move past ```json
-            return self.parse_json()
+    if parser.json_str[parser.index : parser.index + 7] == "```json":
+        i = parser.skip_to_character("`", idx=7)
+        if parser.json_str[parser.index + i : parser.index + i + 3] == "```":
+            parser.index += 7  # Move past ```json
+            return parser.parse_json()
     return False

json_repair/utils/string_file_wrapper.py ADDED Viewed

@@ -0,0 +1,176 @@
+import os
+from typing import TextIO
+class StringFileWrapper:
+    # This is a trick to simplify the code, transform the filedescriptor handling into a string handling
+    def __init__(self, fd: TextIO, chunk_length: int) -> None:
+        """
+        Initialize the StringFileWrapper with a file descriptor and chunk length.
+        Args:
+            fd (TextIO): The file descriptor to wrap.
+            CHUNK_LENGTH (int): The length of each chunk to read from the file.
+        Attributes:
+            fd (TextIO): The wrapped file descriptor.
+            length (int): The total length of the file content.
+            buffers (dict[int, str]): Dictionary to store chunks of file content.
+            buffer_length (int): The length of each buffer chunk.
+        """
+        self.fd = fd
+        # Buffers are chunks of text read from the file and cached to reduce disk access.
+        self.buffers: dict[int, str] = {}
+        if not chunk_length or chunk_length < 2:
+            chunk_length = 1_000_000
+        # chunk_length now refers to the number of characters per chunk.
+        self.buffer_length = chunk_length
+        # Keep track of the starting file position ("cookie") for each chunk so we can
+        # seek safely without landing in the middle of a multibyte code point.
+        self._chunk_positions: list[int] = [0]
+        self.length: int | None = None
+    def get_buffer(self, index: int) -> str:
+        """
+        Retrieve or load a buffer chunk from the file.
+        Args:
+            index (int): The index of the buffer chunk to retrieve.
+        Returns:
+            str: The buffer chunk at the specified index.
+        """
+        if index < 0:
+            raise IndexError("Negative indexing is not supported")
+        cached = self.buffers.get(index)
+        if cached is not None:
+            return cached
+        self._ensure_chunk_position(index)
+        start_pos = self._chunk_positions[index]
+        self.fd.seek(start_pos)
+        chunk = self.fd.read(self.buffer_length)
+        if not chunk:
+            raise IndexError("Chunk index out of range")
+        end_pos = self.fd.tell()
+        if len(self._chunk_positions) <= index + 1:
+            self._chunk_positions.append(end_pos)
+        if len(chunk) < self.buffer_length:
+            self.length = index * self.buffer_length + len(chunk)
+        self.buffers[index] = chunk
+        # Save memory by keeping max 2MB buffer chunks and min 2 chunks
+        max_buffers = max(2, int(2_000_000 / self.buffer_length))
+        if len(self.buffers) > max_buffers:
+            oldest_key = next(iter(self.buffers))
+            if oldest_key != index:
+                self.buffers.pop(oldest_key)
+        return chunk
+    def __getitem__(self, index: int | slice) -> str:
+        """
+        Retrieve a character or a slice of characters from the file.
+        Args:
+            index (Union[int, slice]): The index or slice of characters to retrieve.
+        Returns:
+            str: The character(s) at the specified index or slice.
+        """
+        # The buffer is an array that is seek like a RAM:
+        # self.buffers[index]: the row in the array of length 1MB, index is `i` modulo CHUNK_LENGTH
+        # self.buffures[index][j]: the column of the row that is `i` remainder CHUNK_LENGTH
+        if isinstance(index, slice):
+            total_len = len(self)
+            start = 0 if index.start is None else index.start
+            stop = total_len if index.stop is None else index.stop
+            step = 1 if index.step is None else index.step
+            if start < 0:
+                start += total_len
+            if stop < 0:
+                stop += total_len
+            start = max(start, 0)
+            stop = min(stop, total_len)
+            if step == 0:
+                raise ValueError("slice step cannot be zero")
+            if step != 1:
+                return "".join(self[i] for i in range(start, stop, step))
+            if start >= stop:
+                return ""
+            buffer_index = start // self.buffer_length
+            buffer_end = (stop - 1) // self.buffer_length
+            start_mod = start % self.buffer_length
+            stop_mod = stop % self.buffer_length
+            if stop_mod == 0 and stop > start:
+                stop_mod = self.buffer_length
+            if buffer_index == buffer_end:
+                buffer = self.get_buffer(buffer_index)
+                return buffer[start_mod:stop_mod]
+            start_slice = self.get_buffer(buffer_index)[start_mod:]
+            end_slice = self.get_buffer(buffer_end)[:stop_mod]
+            middle_slices = [self.get_buffer(i) for i in range(buffer_index + 1, buffer_end)]
+            return start_slice + "".join(middle_slices) + end_slice
+        else:
+            if index < 0:
+                index += len(self)
+            if index < 0:
+                raise IndexError("string index out of range")
+            buffer_index = index // self.buffer_length
+            buffer = self.get_buffer(buffer_index)
+            return buffer[index % self.buffer_length]
+    def __len__(self) -> int:
+        """
+        Get the total length of the file.
+        Returns:
+            int: The total number of characters in the file.
+        """
+        if self.length is None:
+            while self.length is None:
+                chunk_index = len(self._chunk_positions)
+                self._ensure_chunk_position(chunk_index)
+        return self.length
+    def __setitem__(self, index: int | slice, value: str) -> None:  # pragma: no cover
+        """
+        Set a character or a slice of characters in the file.
+        Args:
+            index (slice): The slice of characters to set.
+            value (str): The value to set at the specified index or slice.
+        """
+        start = index.start or 0 if isinstance(index, slice) else index or 0
+        if start < 0:
+            start += len(self)
+        current_position = self.fd.tell()
+        self.fd.seek(start)
+        self.fd.write(value)
+        self.fd.seek(current_position)
+    def _ensure_chunk_position(self, chunk_index: int) -> None:
+        """
+        Ensure that we know the starting file position for the given chunk index.
+        """
+        while len(self._chunk_positions) <= chunk_index:
+            prev_index = len(self._chunk_positions) - 1
+            start_pos = self._chunk_positions[-1]
+            self.fd.seek(start_pos, os.SEEK_SET)
+            chunk = self.fd.read(self.buffer_length)
+            end_pos = self.fd.tell()
+            if len(chunk) < self.buffer_length:
+                self.length = prev_index * self.buffer_length + len(chunk)
+            self._chunk_positions.append(end_pos)
+            if not chunk:
+                break
+        if len(self._chunk_positions) <= chunk_index:
+            raise IndexError("Chunk index out of range")

{json_repair-0.53.0.dist-info → json_repair-0.54.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: json_repair
-Version: 0.53.0
+Version: 0.54
 Summary: A package to repair broken json strings
 Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
 License-Expression: MIT
@@ -167,6 +167,23 @@ Some rules of thumb to use:
 - `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
 - If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
+### Strict mode
+By default `json_repair` does its best to “fix” input, even when the JSON is far from valid.
+In some scenarios you want the opposite behavior and need the parser to error out instead of repairing; pass `strict=True` to `repair_json`, `loads`, `load`, or `from_file` to enable that mode:
+```
+from json_repair import repair_json
+repair_json(bad_json_string, strict=True)
+```
+The CLI exposes the same behavior with `json_repair --strict input.json` (or piping data via stdin).
+In strict mode the parser raises `ValueError` as soon as it encounters structural issues such as duplicate keys, missing `:` separators, empty keys/values introduced by stray commas, multiple top-level elements, or other ambiguous constructs. This is useful when you just need validation with friendlier error messages while still benefiting from json_repair’s resilience elsewhere in your stack.
+Strict mode still honors `skip_json_loads=True`; combining them lets you skip the initial `json.loads` check but still enforce strict parsing rules.
 ### Use json_repair with streaming
 Sometimes you are streaming some data and want to repair the JSON coming from it. Normally this won't work but you can pass `stream_stable` to `repair_json()` or `loads()` to make it work:
@@ -198,6 +215,7 @@ options:
                         If specified, the output will be written to TARGET filename instead of stdout
   --ensure_ascii        Pass ensure_ascii=True to json.dumps()
   --indent INDENT       Number of spaces for indentation (Default 2)
+  --strict              Raise on duplicate keys, missing separators, empty keys/values, and similar structural issues instead of repairing them
 ```
 ## Adding to requirements

json_repair-0.54.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,22 @@
+json_repair/__init__.py,sha256=JQ4Nm8YzR8Id2a527Ql0Az-rKapTp8DCMPKybLtQ620,180
+json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
+json_repair/json_parser.py,sha256=nATFDlcEnPD8G2NDSKj2nme_v1la_cCcFZrdQvEjTZs,8495
+json_repair/json_repair.py,sha256=iT-OJgpBnKUJVIV4IUlXmMUkOyW6bNnKCZLB7Fys8hk,12758
+json_repair/parse_array.py,sha256=rZfnRiS86vBATOUHqSx2T5fE79Ndlk2NoTsg9Wek7l4,2239
+json_repair/parse_comment.py,sha256=MUDxrx8BFfAaKvx6x4gWviJNvwRi2yv5qnrR6honmas,2660
+json_repair/parse_number.py,sha256=Ddv3Dih1VYfdasUe5DxQWAqy7YAE3aZJ7iePCfdi1EQ,1292
+json_repair/parse_object.py,sha256=noaiP10kzl-jA-1jc6tMmtFoJMIputpB3zFxcAuYQvY,6986
+json_repair/parse_string.py,sha256=L4McLWzRkbW_7Xx_hSGOmfpoPMwbYTGEKBAjqwanLEs,26146
+json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+json_repair/parse_string_helpers/parse_boolean_or_null.py,sha256=pGmH1QATBls70kTvUlJv4F8NiPaBWcyGhRL03sTOnto,871
+json_repair/parse_string_helpers/parse_json_llm_block.py,sha256=wPSm-8RY30Ek8HxzjCkCRtdLq4-Cez-PJB3vOk_vP3w,670
+json_repair/utils/constants.py,sha256=cv2gvyosuq0me0600WyTysM9avrtfXPuXYR26tawcuo,158
+json_repair/utils/json_context.py,sha256=WsMOjqpGSr6aaDONcrk8UFtTurzWon2Qq9AoBBYseoI,934
+json_repair/utils/object_comparer.py,sha256=XKV3MRab8H7_v4sm-wpEa5le0XX9OeycWo5S-MFm-GI,1716
+json_repair/utils/string_file_wrapper.py,sha256=Zlm0ZfJAw_VPlIy-QldL_OKYrPk3TYGq1JVAFPv7SnQ,6862
+json_repair-0.54.dist-info/licenses/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
+json_repair-0.54.dist-info/METADATA,sha256=xoD5G1EZ7muIRVbzdjsgD10OQbxS-K06sNGqlNDvvdQ,12220
+json_repair-0.54.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+json_repair-0.54.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
+json_repair-0.54.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
+json_repair-0.54.dist-info/RECORD,,

json_repair/parse_boolean_or_null.py DELETED Viewed

@@ -1,30 +0,0 @@
-from typing import TYPE_CHECKING
-if TYPE_CHECKING:
-    from .json_parser import JSONParser
-def parse_boolean_or_null(self: "JSONParser") -> bool | str | None:
-    # <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
-    starting_index = self.index
-    char = (self.get_char_at() or "").lower()
-    value: tuple[str, bool | None] | None = None
-    if char == "t":
-        value = ("true", True)
-    elif char == "f":
-        value = ("false", False)
-    elif char == "n":
-        value = ("null", None)
-    if value:
-        i = 0
-        while char and i < len(value[0]) and char == value[0][i]:
-            i += 1
-            self.index += 1
-            char = (self.get_char_at() or "").lower()
-        if i == len(value[0]):
-            return value[1]
-    # If nothing works reset the index before returning
-    self.index = starting_index
-    return ""

json_repair/string_file_wrapper.py DELETED Viewed

@@ -1,108 +0,0 @@
-import os
-from typing import TextIO
-class StringFileWrapper:
-    # This is a trick to simplify the code, transform the filedescriptor handling into a string handling
-    def __init__(self, fd: TextIO, chunk_length: int) -> None:
-        """
-        Initialize the StringFileWrapper with a file descriptor and chunk length.
-        Args:
-            fd (TextIO): The file descriptor to wrap.
-            CHUNK_LENGTH (int): The length of each chunk to read from the file.
-        Attributes:
-            fd (TextIO): The wrapped file descriptor.
-            length (int): The total length of the file content.
-            buffers (dict[int, str]): Dictionary to store chunks of file content.
-            buffer_length (int): The length of each buffer chunk.
-        """
-        self.fd = fd
-        self.length: int = 0
-        # Buffers are 1MB strings that are read from the file
-        # and kept in memory to keep reads low
-        self.buffers: dict[int, str] = {}
-        # chunk_length is in bytes
-        if not chunk_length or chunk_length < 2:
-            chunk_length = 1_000_000
-        self.buffer_length = chunk_length
-    def get_buffer(self, index: int) -> str:
-        """
-        Retrieve or load a buffer chunk from the file.
-        Args:
-            index (int): The index of the buffer chunk to retrieve.
-        Returns:
-            str: The buffer chunk at the specified index.
-        """
-        if self.buffers.get(index) is None:
-            self.fd.seek(index * self.buffer_length)
-            self.buffers[index] = self.fd.read(self.buffer_length)
-            # Save memory by keeping max 2MB buffer chunks and min 2 chunks
-            if len(self.buffers) > max(2, 2_000_000 / self.buffer_length):
-                oldest_key = next(iter(self.buffers))
-                if oldest_key != index:
-                    self.buffers.pop(oldest_key)
-        return self.buffers[index]
-    def __getitem__(self, index: int | slice) -> str:
-        """
-        Retrieve a character or a slice of characters from the file.
-        Args:
-            index (Union[int, slice]): The index or slice of characters to retrieve.
-        Returns:
-            str: The character(s) at the specified index or slice.
-        """
-        # The buffer is an array that is seek like a RAM:
-        # self.buffers[index]: the row in the array of length 1MB, index is `i` modulo CHUNK_LENGTH
-        # self.buffures[index][j]: the column of the row that is `i` remainder CHUNK_LENGTH
-        if isinstance(index, slice):
-            buffer_index = index.start // self.buffer_length
-            buffer_end = index.stop // self.buffer_length
-            if buffer_index == buffer_end:
-                return self.get_buffer(buffer_index)[index.start % self.buffer_length : index.stop % self.buffer_length]
-            else:
-                start_slice = self.get_buffer(buffer_index)[index.start % self.buffer_length :]
-                end_slice = self.get_buffer(buffer_end)[: index.stop % self.buffer_length]
-                middle_slices = [self.get_buffer(i) for i in range(buffer_index + 1, buffer_end)]
-                return start_slice + "".join(middle_slices) + end_slice
-        else:
-            buffer_index = index // self.buffer_length
-            return self.get_buffer(buffer_index)[index % self.buffer_length]
-    def __len__(self) -> int:
-        """
-        Get the total length of the file.
-        Returns:
-            int: The total number of characters in the file.
-        """
-        if self.length < 1:
-            current_position = self.fd.tell()
-            self.fd.seek(0, os.SEEK_END)
-            self.length = self.fd.tell()
-            self.fd.seek(current_position)
-        return self.length
-    def __setitem__(self, index: int | slice, value: str) -> None:  # pragma: no cover
-        """
-        Set a character or a slice of characters in the file.
-        Args:
-            index (slice): The slice of characters to set.
-            value (str): The value to set at the specified index or slice.
-        """
-        start = index.start or 0 if isinstance(index, slice) else index or 0
-        if start < 0:
-            start += len(self)
-        current_position = self.fd.tell()
-        self.fd.seek(start)
-        self.fd.write(value)
-        self.fd.seek(current_position)

json_repair-0.53.0.dist-info/RECORD DELETED Viewed

@@ -1,22 +0,0 @@
-json_repair/__init__.py,sha256=JdJIZNCKV3MfIviryqK8NH8yGssCta2-192CekcwH-o,174
-json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
-json_repair/constants.py,sha256=cv2gvyosuq0me0600WyTysM9avrtfXPuXYR26tawcuo,158
-json_repair/json_context.py,sha256=WsMOjqpGSr6aaDONcrk8UFtTurzWon2Qq9AoBBYseoI,934
-json_repair/json_parser.py,sha256=vy5Z8aiJUVhVmvYEgy0dkYy5WgUmyOeS6PEFiR3cW44,7948
-json_repair/json_repair.py,sha256=sDhXzDZxu0QmaFzICPTtf_q7yOY1A1Lf_iQG6Potsco,11572
-json_repair/object_comparer.py,sha256=XKV3MRab8H7_v4sm-wpEa5le0XX9OeycWo5S-MFm-GI,1716
-json_repair/parse_array.py,sha256=-rh65JcfT-FtXiR6s8RYlMfI-6LzVr08ytlDh6Z2CFE,2181
-json_repair/parse_boolean_or_null.py,sha256=WMSkvvxsp4wvauBcDqtt9WnLMD5SMoxeRfZFXp3FEBc,890
-json_repair/parse_comment.py,sha256=JHtQ_QlxOvPNnMh7lhUaoTjFGelqjhTNq7qn9xUE7SU,2648
-json_repair/parse_number.py,sha256=33zAtkbuVzi9Lqjxu7cXn9WlVzd3WjRx9Ln_LFzVL4o,1259
-json_repair/parse_object.py,sha256=rnuH5Oxo98OrXhktF0wrOC1vRb5Th_m819Li1EFJzm4,5571
-json_repair/parse_string.py,sha256=--coxoyH4nxl7osxgs1fIu31IEtB0HHwVbbOewypG4g,26146
-json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-json_repair/string_file_wrapper.py,sha256=tGkWBEUPE-CZPf4uSM5NE9oSDTpskX0myJiXsl-gbds,4333
-json_repair/parse_string_helpers/parse_json_llm_block.py,sha256=taREF3pwb35kGBGJYbUHkTybATX3GI-SOwOz3yXaEQs,644
-json_repair-0.53.0.dist-info/licenses/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
-json_repair-0.53.0.dist-info/METADATA,sha256=JvMUVYGDDIzmym7MqbQ6k6PjbnuuskW_myvk0EWp7V8,11027
-json_repair-0.53.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-json_repair-0.53.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
-json_repair-0.53.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
-json_repair-0.53.0.dist-info/RECORD,,

/json_repair/{constants.py → utils/constants.py} RENAMED Viewed

File without changes

/json_repair/{json_context.py → utils/json_context.py} RENAMED Viewed

File without changes

/json_repair/{object_comparer.py → utils/object_comparer.py} RENAMED Viewed

File without changes

{json_repair-0.53.0.dist-info → json_repair-0.54.dist-info}/WHEEL RENAMED Viewed

File without changes

{json_repair-0.53.0.dist-info → json_repair-0.54.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{json_repair-0.53.0.dist-info → json_repair-0.54.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{json_repair-0.53.0.dist-info → json_repair-0.54.dist-info}/top_level.txt RENAMED Viewed

File without changes

json-repair 0.53.0__py3-none-any.whl → 0.54__py3-none-any.whl

json-repair 0.53.0py3-none-any.whl → 0.54py3-none-any.whl