json-repair 0.53.0__tar.gz → 0.53.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {json_repair-0.53.0/src/json_repair.egg-info → json_repair-0.53.1}/PKG-INFO +1 -1
  2. {json_repair-0.53.0 → json_repair-0.53.1}/pyproject.toml +3 -1
  3. {json_repair-0.53.0 → json_repair-0.53.1}/src/json_repair/__init__.py +1 -1
  4. {json_repair-0.53.0 → json_repair-0.53.1}/src/json_repair/json_parser.py +36 -34
  5. {json_repair-0.53.0 → json_repair-0.53.1}/src/json_repair/json_repair.py +6 -6
  6. {json_repair-0.53.0 → json_repair-0.53.1}/src/json_repair/parse_array.py +8 -8
  7. {json_repair-0.53.0 → json_repair-0.53.1}/src/json_repair/parse_comment.py +2 -2
  8. {json_repair-0.53.0 → json_repair-0.53.1}/src/json_repair/parse_number.py +3 -2
  9. {json_repair-0.53.0 → json_repair-0.53.1}/src/json_repair/parse_object.py +18 -18
  10. {json_repair-0.53.0 → json_repair-0.53.1}/src/json_repair/parse_string.py +46 -54
  11. json_repair-0.53.1/src/json_repair/parse_string_helpers/parse_boolean_or_null.py +28 -0
  12. json_repair-0.53.1/src/json_repair/parse_string_helpers/parse_json_llm_block.py +19 -0
  13. {json_repair-0.53.0 → json_repair-0.53.1/src/json_repair.egg-info}/PKG-INFO +1 -1
  14. {json_repair-0.53.0 → json_repair-0.53.1}/src/json_repair.egg-info/SOURCES.txt +5 -6
  15. {json_repair-0.53.0 → json_repair-0.53.1}/tests/test_parse_string.py +11 -0
  16. {json_repair-0.53.0 → json_repair-0.53.1}/tests/test_repair_json_cli.py +5 -5
  17. json_repair-0.53.0/src/json_repair/parse_boolean_or_null.py +0 -30
  18. json_repair-0.53.0/src/json_repair/parse_string_helpers/parse_json_llm_block.py +0 -19
  19. json_repair-0.53.0/tests/test_parse_boolean_or_null.py +0 -12
  20. {json_repair-0.53.0 → json_repair-0.53.1}/LICENSE +0 -0
  21. {json_repair-0.53.0 → json_repair-0.53.1}/README.md +0 -0
  22. {json_repair-0.53.0 → json_repair-0.53.1}/setup.cfg +0 -0
  23. {json_repair-0.53.0 → json_repair-0.53.1}/src/json_repair/__main__.py +0 -0
  24. {json_repair-0.53.0 → json_repair-0.53.1}/src/json_repair/py.typed +0 -0
  25. {json_repair-0.53.0/src/json_repair → json_repair-0.53.1/src/json_repair/utils}/constants.py +0 -0
  26. {json_repair-0.53.0/src/json_repair → json_repair-0.53.1/src/json_repair/utils}/json_context.py +0 -0
  27. {json_repair-0.53.0/src/json_repair → json_repair-0.53.1/src/json_repair/utils}/object_comparer.py +0 -0
  28. {json_repair-0.53.0/src/json_repair → json_repair-0.53.1/src/json_repair/utils}/string_file_wrapper.py +0 -0
  29. {json_repair-0.53.0 → json_repair-0.53.1}/src/json_repair.egg-info/dependency_links.txt +0 -0
  30. {json_repair-0.53.0 → json_repair-0.53.1}/src/json_repair.egg-info/entry_points.txt +0 -0
  31. {json_repair-0.53.0 → json_repair-0.53.1}/src/json_repair.egg-info/top_level.txt +0 -0
  32. {json_repair-0.53.0 → json_repair-0.53.1}/tests/test_json_repair.py +0 -0
  33. {json_repair-0.53.0 → json_repair-0.53.1}/tests/test_parse_array.py +0 -0
  34. {json_repair-0.53.0 → json_repair-0.53.1}/tests/test_parse_comment.py +0 -0
  35. {json_repair-0.53.0 → json_repair-0.53.1}/tests/test_parse_number.py +0 -0
  36. {json_repair-0.53.0 → json_repair-0.53.1}/tests/test_parse_object.py +0 -0
  37. {json_repair-0.53.0 → json_repair-0.53.1}/tests/test_performance.py +0 -0
  38. {json_repair-0.53.0 → json_repair-0.53.1}/tests/test_repair_json_from_file.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: json_repair
3
- Version: 0.53.0
3
+ Version: 0.53.1
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License-Expression: MIT
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
3
3
  build-backend = "setuptools.build_meta"
4
4
  [project]
5
5
  name = "json_repair"
6
- version = "0.53.0"
6
+ version = "0.53.1"
7
7
  license = "MIT"
8
8
  license-files = ["LICENSE"]
9
9
  authors = [
@@ -117,3 +117,5 @@ line-ending = "auto"
117
117
  [tool.ruff.lint.per-file-ignores]
118
118
  # Explicit re-exports is fine in __init__.py, still a code smell elsewhere.
119
119
  "__init__.py" = ["PLC0414"]
120
+ [tool.mypy]
121
+ strict = true
@@ -1,4 +1,4 @@
1
- from .constants import JSONReturnType
2
1
  from .json_repair import from_file, load, loads, repair_json
2
+ from .utils.constants import JSONReturnType
3
3
 
4
4
  __all__ = ["from_file", "load", "loads", "repair_json", "JSONReturnType"]
@@ -1,36 +1,32 @@
1
- from typing import Literal, TextIO
1
+ from typing import TextIO
2
2
 
3
- from .constants import STRING_DELIMITERS, JSONReturnType
4
- from .json_context import JsonContext
5
- from .object_comparer import ObjectComparer
6
3
  from .parse_array import parse_array as _parse_array
7
- from .parse_boolean_or_null import parse_boolean_or_null as _parse_boolean_or_null
8
4
  from .parse_comment import parse_comment as _parse_comment
9
5
  from .parse_number import parse_number as _parse_number
10
6
  from .parse_object import parse_object as _parse_object
11
7
  from .parse_string import parse_string as _parse_string
12
- from .string_file_wrapper import StringFileWrapper
8
+ from .utils.constants import STRING_DELIMITERS, JSONReturnType
9
+ from .utils.json_context import JsonContext
10
+ from .utils.object_comparer import ObjectComparer
11
+ from .utils.string_file_wrapper import StringFileWrapper
13
12
 
14
13
 
15
14
  class JSONParser:
16
15
  # Split the parse methods into separate files because this one was like 3000 lines
17
- def parse_array(self, *args, **kwargs):
18
- return _parse_array(self, *args, **kwargs)
16
+ def parse_array(self) -> list[JSONReturnType]:
17
+ return _parse_array(self)
19
18
 
20
- def parse_boolean_or_null(self, *args, **kwargs):
21
- return _parse_boolean_or_null(self, *args, **kwargs)
19
+ def parse_comment(self) -> JSONReturnType:
20
+ return _parse_comment(self)
22
21
 
23
- def parse_comment(self, *args, **kwargs):
24
- return _parse_comment(self, *args, **kwargs)
22
+ def parse_number(self) -> JSONReturnType:
23
+ return _parse_number(self)
25
24
 
26
- def parse_number(self, *args, **kwargs):
27
- return _parse_number(self, *args, **kwargs)
25
+ def parse_object(self) -> JSONReturnType:
26
+ return _parse_object(self)
28
27
 
29
- def parse_object(self, *args, **kwargs):
30
- return _parse_object(self, *args, **kwargs)
31
-
32
- def parse_string(self, *args, **kwargs):
33
- return _parse_string(self, *args, **kwargs)
28
+ def parse_string(self) -> JSONReturnType:
29
+ return _parse_string(self)
34
30
 
35
31
  def __init__(
36
32
  self,
@@ -107,8 +103,8 @@ class JSONParser:
107
103
  ) -> JSONReturnType:
108
104
  while True:
109
105
  char = self.get_char_at()
110
- # False means that we are at the end of the string provided
111
- if char is False:
106
+ # None means that we are at the end of the string provided
107
+ if char is None:
112
108
  return ""
113
109
  # <object> starts with '{'
114
110
  elif char == "{":
@@ -130,30 +126,36 @@ class JSONParser:
130
126
  else:
131
127
  self.index += 1
132
128
 
133
- def get_char_at(self, count: int = 0) -> str | Literal[False]:
129
+ def get_char_at(self, count: int = 0) -> str | None:
134
130
  # Why not use something simpler? Because try/except in python is a faster alternative to an "if" statement that is often True
135
131
  try:
136
132
  return self.json_str[self.index + count]
137
133
  except IndexError:
138
- return False
134
+ return None
139
135
 
140
- def skip_whitespaces_at(self, idx: int = 0, move_main_index=True) -> int:
136
+ def skip_whitespaces(self) -> None:
141
137
  """
142
- This function quickly iterates on whitespaces, syntactic sugar to make the code more concise
138
+ This function quickly iterates on whitespaces, moving the self.index forward
143
139
  """
144
140
  try:
145
- char = self.json_str[self.index + idx]
146
- except IndexError:
147
- return idx
148
- while char.isspace():
149
- if move_main_index:
141
+ char = self.json_str[self.index]
142
+ while char.isspace():
150
143
  self.index += 1
151
- else:
144
+ char = self.json_str[self.index]
145
+ except IndexError:
146
+ pass
147
+
148
+ def scroll_whitespaces(self, idx: int = 0) -> int:
149
+ """
150
+ This function quickly iterates on whitespaces. Doesn't move the self.index and returns the offset from self.index
151
+ """
152
+ try:
153
+ char = self.json_str[self.index + idx]
154
+ while char.isspace():
152
155
  idx += 1
153
- try:
154
156
  char = self.json_str[self.index + idx]
155
- except IndexError:
156
- return idx
157
+ except IndexError:
158
+ pass
157
159
  return idx
158
160
 
159
161
  def skip_to_character(self, character: str | list[str], idx: int = 0) -> int:
@@ -25,10 +25,10 @@ All supported use cases are in the unit tests
25
25
  import argparse
26
26
  import json
27
27
  import sys
28
- from typing import Literal, TextIO, overload
28
+ from typing import Any, Literal, TextIO, overload
29
29
 
30
- from .constants import JSONReturnType
31
30
  from .json_parser import JSONParser
31
+ from .utils.constants import JSONReturnType
32
32
 
33
33
 
34
34
  @overload
@@ -40,7 +40,7 @@ def repair_json(
40
40
  json_fd: TextIO | None = None,
41
41
  chunk_length: int = 0,
42
42
  stream_stable: bool = False,
43
- **json_dumps_args,
43
+ **json_dumps_args: Any,
44
44
  ) -> str: ...
45
45
 
46
46
 
@@ -53,7 +53,7 @@ def repair_json(
53
53
  json_fd: TextIO | None = None,
54
54
  chunk_length: int = 0,
55
55
  stream_stable: bool = False,
56
- **json_dumps_args,
56
+ **json_dumps_args: Any,
57
57
  ) -> JSONReturnType | tuple[JSONReturnType, list[dict[str, str]]]: ...
58
58
 
59
59
 
@@ -65,8 +65,8 @@ def repair_json(
65
65
  json_fd: TextIO | None = None,
66
66
  chunk_length: int = 0,
67
67
  stream_stable: bool = False,
68
- **json_dumps_args,
69
- ) -> JSONReturnType | tuple[JSONReturnType, list[dict[str, str]]] | tuple[JSONReturnType, list]:
68
+ **json_dumps_args: Any,
69
+ ) -> JSONReturnType | tuple[JSONReturnType, list[dict[str, str]]]:
70
70
  """
71
71
  Given a json formatted string, it will try to decode it and, if it fails, it will try to fix it.
72
72
 
@@ -1,8 +1,8 @@
1
1
  from typing import TYPE_CHECKING
2
2
 
3
- from .constants import STRING_DELIMITERS, JSONReturnType
4
- from .json_context import ContextValues
5
- from .object_comparer import ObjectComparer
3
+ from .utils.constants import STRING_DELIMITERS, JSONReturnType
4
+ from .utils.json_context import ContextValues
5
+ from .utils.object_comparer import ObjectComparer
6
6
 
7
7
  if TYPE_CHECKING:
8
8
  from .json_parser import JSONParser
@@ -15,7 +15,7 @@ def parse_array(self: "JSONParser") -> list[JSONReturnType]:
15
15
  # Stop when you either find the closing parentheses or you have iterated over the entire string
16
16
  char = self.get_char_at()
17
17
  while char and char not in ["]", "}"]:
18
- self.skip_whitespaces_at()
18
+ self.skip_whitespaces()
19
19
  value: JSONReturnType = ""
20
20
  if char in STRING_DELIMITERS:
21
21
  # Sometimes it can happen that LLMs forget to start an object and then you think it's a string in an array
@@ -23,13 +23,13 @@ def parse_array(self: "JSONParser") -> list[JSONReturnType]:
23
23
  # And either parse the string or parse the object
24
24
  i = 1
25
25
  i = self.skip_to_character(char, i)
26
- i = self.skip_whitespaces_at(idx=i + 1, move_main_index=False)
26
+ i = self.scroll_whitespaces(idx=i + 1)
27
27
  value = self.parse_object() if self.get_char_at(i) == ":" else self.parse_string()
28
28
  else:
29
29
  value = self.parse_json()
30
30
 
31
- # It is possible that parse_json() returns nothing valid, so we increase by 1
32
- if ObjectComparer.is_strictly_empty(value):
31
+ # It is possible that parse_json() returns nothing valid, so we increase by 1, unless we find an array separator
32
+ if ObjectComparer.is_strictly_empty(value) and self.get_char_at() not in ["]", ","]:
33
33
  self.index += 1
34
34
  elif value == "..." and self.get_char_at(-1) == ".":
35
35
  self.log(
@@ -45,7 +45,7 @@ def parse_array(self: "JSONParser") -> list[JSONReturnType]:
45
45
  char = self.get_char_at()
46
46
 
47
47
  # Especially at the end of an LLM generated json you might miss the last "]"
48
- if char and char != "]":
48
+ if char != "]":
49
49
  self.log(
50
50
  "While parsing an array we missed the closing ], ignoring it",
51
51
  )
@@ -1,7 +1,7 @@
1
1
  from typing import TYPE_CHECKING
2
2
 
3
- from .constants import JSONReturnType
4
- from .json_context import ContextValues
3
+ from .utils.constants import JSONReturnType
4
+ from .utils.json_context import ContextValues
5
5
 
6
6
  if TYPE_CHECKING:
7
7
  from .json_parser import JSONParser
@@ -1,6 +1,7 @@
1
1
  from typing import TYPE_CHECKING
2
2
 
3
- from .json_context import ContextValues
3
+ from .utils.constants import JSONReturnType
4
+ from .utils.json_context import ContextValues
4
5
 
5
6
  NUMBER_CHARS: set[str] = set("0123456789-.eE/,")
6
7
 
@@ -9,7 +10,7 @@ if TYPE_CHECKING:
9
10
  from .json_parser import JSONParser
10
11
 
11
12
 
12
- def parse_number(self: "JSONParser") -> float | int | str | bool | None:
13
+ def parse_number(self: "JSONParser") -> JSONReturnType:
13
14
  # <number> is a valid real number expressed in one of a number of given formats
14
15
  number_str = ""
15
16
  char = self.get_char_at()
@@ -1,7 +1,7 @@
1
1
  from typing import TYPE_CHECKING
2
2
 
3
- from .constants import STRING_DELIMITERS, JSONReturnType
4
- from .json_context import ContextValues
3
+ from .utils.constants import STRING_DELIMITERS, JSONReturnType
4
+ from .utils.json_context import ContextValues
5
5
 
6
6
  if TYPE_CHECKING:
7
7
  from .json_parser import JSONParser
@@ -17,10 +17,10 @@ def parse_object(self: "JSONParser") -> JSONReturnType:
17
17
  # <member> ::= <string> ': ' <json>
18
18
 
19
19
  # Skip filler whitespaces
20
- self.skip_whitespaces_at()
20
+ self.skip_whitespaces()
21
21
 
22
22
  # Sometimes LLMs do weird things, if we find a ":" so early, we'll change it to "," and move on
23
- if (self.get_char_at() or "") == ":":
23
+ if self.get_char_at() == ":":
24
24
  self.log(
25
25
  "While parsing an object we found a : before a key, ignoring",
26
26
  )
@@ -53,14 +53,14 @@ def parse_object(self: "JSONParser") -> JSONReturnType:
53
53
  prev_value.extend(
54
54
  new_array[0] if len(new_array) == 1 and isinstance(new_array[0], list) else new_array
55
55
  )
56
- self.skip_whitespaces_at()
56
+ self.skip_whitespaces()
57
57
  if self.get_char_at() == ",":
58
58
  self.index += 1
59
- self.skip_whitespaces_at()
59
+ self.skip_whitespaces()
60
60
  continue
61
61
  key = str(self.parse_string())
62
62
  if key == "":
63
- self.skip_whitespaces_at()
63
+ self.skip_whitespaces()
64
64
  if key != "" or (key == "" and self.get_char_at() in [":", "}"]):
65
65
  # If the string is empty but there is a object divider, we are done here
66
66
  break
@@ -74,16 +74,16 @@ def parse_object(self: "JSONParser") -> JSONReturnType:
74
74
  break
75
75
 
76
76
  # Skip filler whitespaces
77
- self.skip_whitespaces_at()
77
+ self.skip_whitespaces()
78
78
 
79
79
  # We reached the end here
80
80
  if (self.get_char_at() or "}") == "}":
81
81
  continue
82
82
 
83
- self.skip_whitespaces_at()
83
+ self.skip_whitespaces()
84
84
 
85
85
  # An extreme case of missing ":" after a key
86
- if (self.get_char_at() or "") != ":":
86
+ if self.get_char_at() != ":":
87
87
  self.log(
88
88
  "While parsing an object we missed a : after a key",
89
89
  )
@@ -92,10 +92,10 @@ def parse_object(self: "JSONParser") -> JSONReturnType:
92
92
  self.context.reset()
93
93
  self.context.set(ContextValues.OBJECT_VALUE)
94
94
  # The value can be any valid json
95
- self.skip_whitespaces_at()
95
+ self.skip_whitespaces()
96
96
  # Corner case, a lone comma
97
97
  value: JSONReturnType = ""
98
- if (self.get_char_at() or "") in [",", "}"]:
98
+ if self.get_char_at() in [",", "}"]:
99
99
  self.log(
100
100
  "While parsing an object value we found a stray , ignoring it",
101
101
  )
@@ -106,11 +106,11 @@ def parse_object(self: "JSONParser") -> JSONReturnType:
106
106
  self.context.reset()
107
107
  obj[key] = value
108
108
 
109
- if (self.get_char_at() or "") in [",", "'", '"']:
109
+ if self.get_char_at() in [",", "'", '"']:
110
110
  self.index += 1
111
111
 
112
112
  # Remove trailing spaces
113
- self.skip_whitespaces_at()
113
+ self.skip_whitespaces()
114
114
 
115
115
  self.index += 1
116
116
 
@@ -126,12 +126,12 @@ def parse_object(self: "JSONParser") -> JSONReturnType:
126
126
  if not self.context.empty:
127
127
  return obj
128
128
 
129
- self.skip_whitespaces_at()
130
- if (self.get_char_at() or "") != ",":
129
+ self.skip_whitespaces()
130
+ if self.get_char_at() != ",":
131
131
  return obj
132
132
  self.index += 1
133
- self.skip_whitespaces_at()
134
- if (self.get_char_at() or "") not in STRING_DELIMITERS:
133
+ self.skip_whitespaces()
134
+ if self.get_char_at() not in STRING_DELIMITERS:
135
135
  return obj
136
136
  self.log(
137
137
  "Found a comma and string delimiter after object closing brace, checking for additional key-value pairs",
@@ -1,14 +1,22 @@
1
1
  from typing import TYPE_CHECKING
2
2
 
3
- from .constants import STRING_DELIMITERS, JSONReturnType
4
- from .json_context import ContextValues
3
+ from .parse_string_helpers.parse_boolean_or_null import parse_boolean_or_null
5
4
  from .parse_string_helpers.parse_json_llm_block import parse_json_llm_block
5
+ from .utils.constants import STRING_DELIMITERS, JSONReturnType
6
+ from .utils.json_context import ContextValues
6
7
 
7
8
  if TYPE_CHECKING:
8
9
  from .json_parser import JSONParser
9
10
 
10
11
 
11
12
  def parse_string(self: "JSONParser") -> JSONReturnType:
13
+ # Utility function to append a character to the accumulator and update the index
14
+ def _append_literal_char(acc: str, current_char: str | None) -> tuple[str, str | None]:
15
+ acc += str(current_char)
16
+ self.index += 1
17
+ char = self.get_char_at()
18
+ return acc, char
19
+
12
20
  # <string> is a string of valid characters enclosed in quotes
13
21
  # i.e. { name: "John" }
14
22
  # Somehow all weird cases in an invalid JSON happen to be resolved in this function, so be careful here
@@ -40,7 +48,7 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
40
48
  # This could be a <boolean> and not a string. Because (T)rue or (F)alse or (N)ull are valid
41
49
  # But remember, object keys are only of type string
42
50
  if char.lower() in ["t", "f", "n"] and self.context.current != ContextValues.OBJECT_KEY:
43
- value = self.parse_boolean_or_null()
51
+ value = parse_boolean_or_null(self)
44
52
  if value != "":
45
53
  return value
46
54
  self.log(
@@ -59,10 +67,12 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
59
67
  "While parsing a string, we found code fences but they did not enclose valid JSON, continuing parsing the string",
60
68
  )
61
69
  # There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
62
- if self.get_char_at() in STRING_DELIMITERS and self.get_char_at() == lstring_delimiter:
70
+ if self.get_char_at() == lstring_delimiter:
63
71
  # If it's an empty key, this was easy
64
- if (self.context.current == ContextValues.OBJECT_KEY and self.get_char_at(1) == ":") or (
65
- self.context.current == ContextValues.OBJECT_VALUE and self.get_char_at(1) in [",", "}"]
72
+ if (
73
+ (self.context.current == ContextValues.OBJECT_KEY and self.get_char_at(1) == ":")
74
+ or (self.context.current == ContextValues.OBJECT_VALUE and self.get_char_at(1) in [",", "}"])
75
+ or (self.context.current == ContextValues.ARRAY and self.get_char_at(1) in [",", "]"])
66
76
  ):
67
77
  self.index += 1
68
78
  return ""
@@ -77,7 +87,7 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
77
87
  next_c = self.get_char_at(i)
78
88
  # Now check that the next character is also a delimiter to ensure that we have "".....""
79
89
  # In that case we ignore this rstring delimiter
80
- if next_c and (self.get_char_at(i + 1) or "") == rstring_delimiter:
90
+ if self.get_char_at(i + 1) == rstring_delimiter:
81
91
  self.log(
82
92
  "While parsing a string, we found a valid starting doubled quote",
83
93
  )
@@ -85,7 +95,7 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
85
95
  self.index += 1
86
96
  else:
87
97
  # Ok this is not a doubled quote, check if this is an empty string or not
88
- i = self.skip_whitespaces_at(idx=1, move_main_index=False)
98
+ i = self.scroll_whitespaces(idx=1)
89
99
  next_c = self.get_char_at(i)
90
100
  if next_c in STRING_DELIMITERS + ["{", "["]:
91
101
  # something fishy is going on here
@@ -135,7 +145,7 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
135
145
  ):
136
146
  rstring_delimiter_missing = True
137
147
  # check if this is a case in which the closing comma is NOT missing instead
138
- self.skip_whitespaces_at()
148
+ self.skip_whitespaces()
139
149
  if self.get_char_at(1) == "\\":
140
150
  # Ok this is a quoted string, skip
141
151
  rstring_delimiter_missing = False
@@ -145,7 +155,7 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
145
155
  i += 1
146
156
  # found a delimiter, now we need to check that is followed strictly by a comma or brace
147
157
  # or the string ended
148
- i = self.skip_whitespaces_at(idx=i, move_main_index=False)
158
+ i = self.scroll_whitespaces(idx=i)
149
159
  next_c = self.get_char_at(i)
150
160
  if not next_c or next_c in [",", "}"]:
151
161
  rstring_delimiter_missing = False
@@ -160,7 +170,7 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
160
170
  else:
161
171
  # But again, this could just be something a bit stupid like "lorem, "ipsum" sic"
162
172
  # Check if we find a : afterwards (skipping space)
163
- i = self.skip_whitespaces_at(idx=i + 1, move_main_index=False)
173
+ i = self.scroll_whitespaces(idx=i + 1)
164
174
  next_c = self.get_char_at(i)
165
175
  if next_c and next_c != ":":
166
176
  rstring_delimiter_missing = False
@@ -175,7 +185,7 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
175
185
  break
176
186
  else:
177
187
  # skip any whitespace first
178
- i = self.skip_whitespaces_at(idx=1, move_main_index=False)
188
+ i = self.scroll_whitespaces(idx=1)
179
189
  # We couldn't find any rstring_delimeter before the end of the string
180
190
  # check if this is the last string of an object and therefore we can keep going
181
191
  # make an exception if this is the last char before the closing brace
@@ -212,19 +222,15 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
212
222
  if self.context.current == ContextValues.OBJECT_VALUE and char == "}":
213
223
  # We found the end of an object while parsing a value
214
224
  # Check if the object is really over, to avoid doubling the closing brace
215
- i = self.skip_whitespaces_at(idx=1, move_main_index=False)
225
+ i = self.scroll_whitespaces(idx=1)
216
226
  next_c = self.get_char_at(i)
217
- if next_c and next_c == "`":
227
+ if next_c == "`" and self.get_char_at(i + 1) == "`" and self.get_char_at(i + 2) == "`":
218
228
  # This could be a special case in which the LLM added code fences after the object
219
229
  # So we need to check if there are another two ` after this one`
220
- next_c = self.get_char_at(i + 1)
221
- if next_c and next_c == "`":
222
- next_c = self.get_char_at(i + 2)
223
- if next_c and next_c == "`":
224
- self.log(
225
- "While parsing a string in object value context, we found a } that closes the object before code fences, stopping here",
226
- )
227
- break
230
+ self.log(
231
+ "While parsing a string in object value context, we found a } that closes the object before code fences, stopping here",
232
+ )
233
+ break
228
234
  if not next_c:
229
235
  self.log(
230
236
  "While parsing a string in object value context, we found a } that closes the object, stopping here",
@@ -282,12 +288,11 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
282
288
  # found a second delimiter
283
289
  i += 1
284
290
  # Skip spaces
285
- i = self.skip_whitespaces_at(idx=i, move_main_index=False)
286
- next_c = self.get_char_at(i)
287
- if next_c and next_c in [",", "}"]:
291
+ i = self.scroll_whitespaces(idx=i)
292
+ if self.get_char_at(i) in [",", "}"]:
288
293
  # Ok then this is a missing right quote
289
294
  self.log(
290
- "While parsing a string missing the right delimiter in object key context, we found a :, stopping here",
295
+ "While parsing a string missing the right delimiter in object key context, we found a , or } stopping here",
291
296
  )
292
297
  break
293
298
  else:
@@ -316,9 +321,8 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
316
321
  # We found a quote, now let's make sure there's a ":" following
317
322
  i += 1
318
323
  # found a delimiter, now we need to check that is followed strictly by a comma or brace
319
- i = self.skip_whitespaces_at(idx=i, move_main_index=False)
320
- next_c = self.get_char_at(i)
321
- if next_c and next_c == ":":
324
+ i = self.scroll_whitespaces(idx=i)
325
+ if self.get_char_at(i) == ":":
322
326
  # Reset the cursor
323
327
  self.index -= 1
324
328
  char = self.get_char_at()
@@ -328,9 +332,7 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
328
332
  break
329
333
  elif unmatched_delimiter:
330
334
  unmatched_delimiter = False
331
- string_acc += str(char)
332
- self.index += 1
333
- char = self.get_char_at()
335
+ string_acc, char = _append_literal_char(string_acc, char)
334
336
  else:
335
337
  # Check if eventually there is a rstring delimiter, otherwise we bail
336
338
  i = 1
@@ -365,22 +367,20 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
365
367
  next_c = self.get_char_at(i)
366
368
  # Ok now I found a delimiter, let's skip whitespaces and see if next we find a } or a ,
367
369
  i += 1
368
- i = self.skip_whitespaces_at(idx=i, move_main_index=False)
370
+ i = self.scroll_whitespaces(idx=i)
369
371
  next_c = self.get_char_at(i)
370
372
  if next_c in ["}", ","]:
371
373
  self.log(
372
- "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
374
+ "While parsing a string, we found a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
373
375
  )
374
- string_acc += str(char)
375
- self.index += 1
376
- char = self.get_char_at()
376
+ string_acc, char = _append_literal_char(string_acc, char)
377
377
  continue
378
378
  elif next_c == rstring_delimiter and self.get_char_at(i - 1) != "\\":
379
379
  # Check if self.index:self.index+i is only whitespaces, break if that's the case
380
380
  if all(str(self.get_char_at(j)).isspace() for j in range(1, i) if self.get_char_at(j)):
381
381
  break
382
382
  if self.context.current == ContextValues.OBJECT_VALUE:
383
- i = self.skip_whitespaces_at(idx=i + 1, move_main_index=False)
383
+ i = self.scroll_whitespaces(idx=i + 1)
384
384
  if self.get_char_at(i) == ",":
385
385
  # So we found a comma, this could be a case of a single quote like "va"lue",
386
386
  # Search if it's followed by another key, starting with the first delimeter
@@ -388,15 +388,13 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
388
388
  i += 1
389
389
  i = self.skip_to_character(character=rstring_delimiter, idx=i + 1)
390
390
  i += 1
391
- i = self.skip_whitespaces_at(idx=i, move_main_index=False)
391
+ i = self.scroll_whitespaces(idx=i)
392
392
  next_c = self.get_char_at(i)
393
393
  if next_c == ":":
394
394
  self.log(
395
- "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
395
+ "While parsing a string, we found a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
396
396
  )
397
- string_acc += str(char)
398
- self.index += 1
399
- char = self.get_char_at()
397
+ string_acc, char = _append_literal_char(string_acc, char)
400
398
  continue
401
399
  # We found a delimiter and we need to check if this is a key
402
400
  # so find a rstring_delimiter and a colon after
@@ -413,12 +411,10 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
413
411
  # Only if we fail to find a ':' then we know this is misplaced quote
414
412
  if next_c != ":":
415
413
  self.log(
416
- "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
414
+ "While parsing a string, we found a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
417
415
  )
418
416
  unmatched_delimiter = not unmatched_delimiter
419
- string_acc += str(char)
420
- self.index += 1
421
- char = self.get_char_at()
417
+ string_acc, char = _append_literal_char(string_acc, char)
422
418
  elif self.context.current == ContextValues.ARRAY:
423
419
  # So here we can have a few valid cases:
424
420
  # ["bla bla bla "puppy" bla bla bla "kitty" bla bla"]
@@ -442,9 +438,7 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
442
438
  "While parsing a string in Array context, we detected a quoted section that would have closed the string but has a different meaning here, ignoring it",
443
439
  )
444
440
  unmatched_delimiter = not unmatched_delimiter
445
- string_acc += str(char)
446
- self.index += 1
447
- char = self.get_char_at()
441
+ string_acc, char = _append_literal_char(string_acc, char)
448
442
  else:
449
443
  break
450
444
  elif self.context.current == ContextValues.OBJECT_KEY:
@@ -452,14 +446,12 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
452
446
  self.log(
453
447
  "While parsing a string in Object Key context, we detected a quoted section that would have closed the string but has a different meaning here, ignoring it",
454
448
  )
455
- string_acc += str(char)
456
- self.index += 1
457
- char = self.get_char_at()
449
+ string_acc, char = _append_literal_char(string_acc, char)
458
450
  if char and missing_quotes and self.context.current == ContextValues.OBJECT_KEY and char.isspace():
459
451
  self.log(
460
452
  "While parsing a string, handling an extreme corner case in which the LLM added a comment instead of valid string, invalidate the string and return an empty value",
461
453
  )
462
- self.skip_whitespaces_at()
454
+ self.skip_whitespaces()
463
455
  if self.get_char_at() not in [":", ","]:
464
456
  return ""
465
457
 
@@ -0,0 +1,28 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ if TYPE_CHECKING:
4
+ from ..json_parser import JSONParser # noqa: TID252
5
+
6
+
7
+ def parse_boolean_or_null(parser: "JSONParser") -> bool | str | None:
8
+ # <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
9
+ char = (parser.get_char_at() or "").lower()
10
+ value_map: dict[str, tuple[str, bool | None]] = {
11
+ "t": ("true", True),
12
+ "f": ("false", False),
13
+ "n": ("null", None),
14
+ }
15
+ value: tuple[str, bool | None] = value_map[char]
16
+
17
+ i = 0
18
+ starting_index = parser.index
19
+ while char and i < len(value[0]) and char == value[0][i]:
20
+ i += 1
21
+ parser.index += 1
22
+ char = (parser.get_char_at() or "").lower()
23
+ if i == len(value[0]):
24
+ return value[1]
25
+
26
+ # If nothing works reset the index before returning
27
+ parser.index = starting_index
28
+ return ""
@@ -0,0 +1,19 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from ..utils.constants import JSONReturnType # noqa: TID252
4
+
5
+ if TYPE_CHECKING:
6
+ from ..json_parser import JSONParser # noqa: TID252
7
+
8
+
9
+ def parse_json_llm_block(parser: "JSONParser") -> JSONReturnType:
10
+ """
11
+ Extracts and normalizes JSON enclosed in ```json ... ``` blocks.
12
+ """
13
+ # Try to find a ```json ... ``` block
14
+ if parser.json_str[parser.index : parser.index + 7] == "```json":
15
+ i = parser.skip_to_character("`", idx=7)
16
+ if parser.json_str[parser.index + i : parser.index + i + 3] == "```":
17
+ parser.index += 7 # Move past ```json
18
+ return parser.parse_json()
19
+ return False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: json_repair
3
- Version: 0.53.0
3
+ Version: 0.53.1
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License-Expression: MIT
@@ -3,28 +3,27 @@ README.md
3
3
  pyproject.toml
4
4
  src/json_repair/__init__.py
5
5
  src/json_repair/__main__.py
6
- src/json_repair/constants.py
7
- src/json_repair/json_context.py
8
6
  src/json_repair/json_parser.py
9
7
  src/json_repair/json_repair.py
10
- src/json_repair/object_comparer.py
11
8
  src/json_repair/parse_array.py
12
- src/json_repair/parse_boolean_or_null.py
13
9
  src/json_repair/parse_comment.py
14
10
  src/json_repair/parse_number.py
15
11
  src/json_repair/parse_object.py
16
12
  src/json_repair/parse_string.py
17
13
  src/json_repair/py.typed
18
- src/json_repair/string_file_wrapper.py
19
14
  src/json_repair.egg-info/PKG-INFO
20
15
  src/json_repair.egg-info/SOURCES.txt
21
16
  src/json_repair.egg-info/dependency_links.txt
22
17
  src/json_repair.egg-info/entry_points.txt
23
18
  src/json_repair.egg-info/top_level.txt
19
+ src/json_repair/parse_string_helpers/parse_boolean_or_null.py
24
20
  src/json_repair/parse_string_helpers/parse_json_llm_block.py
21
+ src/json_repair/utils/constants.py
22
+ src/json_repair/utils/json_context.py
23
+ src/json_repair/utils/object_comparer.py
24
+ src/json_repair/utils/string_file_wrapper.py
25
25
  tests/test_json_repair.py
26
26
  tests/test_parse_array.py
27
- tests/test_parse_boolean_or_null.py
28
27
  tests/test_parse_comment.py
29
28
  tests/test_parse_number.py
30
29
  tests/test_parse_object.py
@@ -110,3 +110,14 @@ def test_string_json_llm_block():
110
110
  == '{"key": {"key": [{"key1": 1}, {"key2": 2}]}}'
111
111
  )
112
112
  assert repair_json('{"response": "```json{}"') == '{"response": "```json{}"}'
113
+
114
+
115
+ def test_parse_boolean_or_null():
116
+ assert repair_json("True", return_objects=True) == ""
117
+ assert repair_json("False", return_objects=True) == ""
118
+ assert repair_json("Null", return_objects=True) == ""
119
+ assert repair_json("true", return_objects=True)
120
+ assert not repair_json("false", return_objects=True)
121
+ assert repair_json("null", return_objects=True) is None
122
+ assert repair_json(' {"key": true, "key2": false, "key3": null}') == '{"key": true, "key2": false, "key3": null}'
123
+ assert repair_json('{"key": TRUE, "key2": FALSE, "key3": Null} ') == '{"key": true, "key2": false, "key3": null}'
@@ -11,23 +11,23 @@ from src.json_repair.json_repair import cli
11
11
  def test_cli(capsys):
12
12
  # Create a temporary file
13
13
  temp_fd, temp_path = tempfile.mkstemp(suffix=".json")
14
+ _, tempout_path = tempfile.mkstemp(suffix=".json")
14
15
  try:
15
16
  # Write content to the temporary file
16
17
  with os.fdopen(temp_fd, "w") as tmp:
17
18
  tmp.write("{key:value")
18
- cli(inline_args=[temp_path, "--indent", 0, "--ensure_ascii"])
19
+ cli(inline_args=[temp_path, "--indent", "0", "--ensure_ascii"])
19
20
  captured = capsys.readouterr()
20
21
  assert captured.out == '{\n"key": "value"\n}\n'
21
22
 
22
23
  # Test the output option
23
- tempout_fd, tempout_path = tempfile.mkstemp(suffix=".json")
24
- cli(inline_args=[temp_path, "--indent", 0, "-o", tempout_path])
24
+ cli(inline_args=[temp_path, "--indent", "0", "-o", tempout_path])
25
25
  with open(tempout_path) as tmp:
26
26
  out = tmp.read()
27
27
  assert out == '{\n"key": "value"\n}'
28
28
 
29
29
  # Test the inline option
30
- cli(inline_args=[temp_path, "--indent", 0, "-i"])
30
+ cli(inline_args=[temp_path, "--indent", "0", "-i"])
31
31
  with open(temp_path) as tmp:
32
32
  out = tmp.read()
33
33
  assert out == '{\n"key": "value"\n}'
@@ -43,7 +43,7 @@ def test_cli(capsys):
43
43
  expected_output = '{\n"key": "value"\n}\n'
44
44
  # Patch sys.stdin so that cli() reads from it instead of a file.
45
45
  with patch("sys.stdin", new=io.StringIO(test_input)):
46
- cli(inline_args=["--indent", 0])
46
+ cli(inline_args=["--indent", "0"])
47
47
  captured = capsys.readouterr()
48
48
  assert captured.out == expected_output
49
49
 
@@ -1,30 +0,0 @@
1
- from typing import TYPE_CHECKING
2
-
3
- if TYPE_CHECKING:
4
- from .json_parser import JSONParser
5
-
6
-
7
- def parse_boolean_or_null(self: "JSONParser") -> bool | str | None:
8
- # <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
9
- starting_index = self.index
10
- char = (self.get_char_at() or "").lower()
11
- value: tuple[str, bool | None] | None = None
12
- if char == "t":
13
- value = ("true", True)
14
- elif char == "f":
15
- value = ("false", False)
16
- elif char == "n":
17
- value = ("null", None)
18
-
19
- if value:
20
- i = 0
21
- while char and i < len(value[0]) and char == value[0][i]:
22
- i += 1
23
- self.index += 1
24
- char = (self.get_char_at() or "").lower()
25
- if i == len(value[0]):
26
- return value[1]
27
-
28
- # If nothing works reset the index before returning
29
- self.index = starting_index
30
- return ""
@@ -1,19 +0,0 @@
1
- from typing import TYPE_CHECKING
2
-
3
- from ..constants import JSONReturnType # noqa: TID252
4
-
5
- if TYPE_CHECKING:
6
- from ..json_parser import JSONParser # noqa: TID252
7
-
8
-
9
- def parse_json_llm_block(self: "JSONParser") -> JSONReturnType:
10
- """
11
- Extracts and normalizes JSON enclosed in ```json ... ``` blocks.
12
- """
13
- # Try to find a ```json ... ``` block
14
- if self.json_str[self.index : self.index + 7] == "```json":
15
- i = self.skip_to_character("`", idx=7)
16
- if self.json_str[self.index + i : self.index + i + 3] == "```":
17
- self.index += 7 # Move past ```json
18
- return self.parse_json()
19
- return False
@@ -1,12 +0,0 @@
1
- from src.json_repair.json_repair import repair_json
2
-
3
-
4
- def test_parse_boolean_or_null():
5
- assert repair_json("True", return_objects=True) == ""
6
- assert repair_json("False", return_objects=True) == ""
7
- assert repair_json("Null", return_objects=True) == ""
8
- assert repair_json("true", return_objects=True)
9
- assert not repair_json("false", return_objects=True)
10
- assert repair_json("null", return_objects=True) is None
11
- assert repair_json(' {"key": true, "key2": false, "key3": null}') == '{"key": true, "key2": false, "key3": null}'
12
- assert repair_json('{"key": TRUE, "key2": FALSE, "key3": Null} ') == '{"key": true, "key2": false, "key3": null}'
File without changes
File without changes
File without changes