json-repair 0.52.4__tar.gz → 0.53.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {json_repair-0.52.4/src/json_repair.egg-info → json_repair-0.53.0}/PKG-INFO +1 -1
  2. {json_repair-0.52.4 → json_repair-0.53.0}/pyproject.toml +1 -1
  3. {json_repair-0.52.4 → json_repair-0.53.0}/src/json_repair/json_parser.py +25 -16
  4. {json_repair-0.52.4 → json_repair-0.53.0}/src/json_repair/parse_object.py +1 -1
  5. {json_repair-0.52.4 → json_repair-0.53.0}/src/json_repair/parse_string.py +9 -1
  6. json_repair-0.53.0/src/json_repair/parse_string_helpers/parse_json_llm_block.py +19 -0
  7. {json_repair-0.52.4 → json_repair-0.53.0/src/json_repair.egg-info}/PKG-INFO +1 -1
  8. {json_repair-0.52.4 → json_repair-0.53.0}/src/json_repair.egg-info/SOURCES.txt +1 -0
  9. {json_repair-0.52.4 → json_repair-0.53.0}/tests/test_parse_string.py +10 -0
  10. {json_repair-0.52.4 → json_repair-0.53.0}/LICENSE +0 -0
  11. {json_repair-0.52.4 → json_repair-0.53.0}/README.md +0 -0
  12. {json_repair-0.52.4 → json_repair-0.53.0}/setup.cfg +0 -0
  13. {json_repair-0.52.4 → json_repair-0.53.0}/src/json_repair/__init__.py +0 -0
  14. {json_repair-0.52.4 → json_repair-0.53.0}/src/json_repair/__main__.py +0 -0
  15. {json_repair-0.52.4 → json_repair-0.53.0}/src/json_repair/constants.py +0 -0
  16. {json_repair-0.52.4 → json_repair-0.53.0}/src/json_repair/json_context.py +0 -0
  17. {json_repair-0.52.4 → json_repair-0.53.0}/src/json_repair/json_repair.py +0 -0
  18. {json_repair-0.52.4 → json_repair-0.53.0}/src/json_repair/object_comparer.py +0 -0
  19. {json_repair-0.52.4 → json_repair-0.53.0}/src/json_repair/parse_array.py +0 -0
  20. {json_repair-0.52.4 → json_repair-0.53.0}/src/json_repair/parse_boolean_or_null.py +0 -0
  21. {json_repair-0.52.4 → json_repair-0.53.0}/src/json_repair/parse_comment.py +0 -0
  22. {json_repair-0.52.4 → json_repair-0.53.0}/src/json_repair/parse_number.py +0 -0
  23. {json_repair-0.52.4 → json_repair-0.53.0}/src/json_repair/py.typed +0 -0
  24. {json_repair-0.52.4 → json_repair-0.53.0}/src/json_repair/string_file_wrapper.py +0 -0
  25. {json_repair-0.52.4 → json_repair-0.53.0}/src/json_repair.egg-info/dependency_links.txt +0 -0
  26. {json_repair-0.52.4 → json_repair-0.53.0}/src/json_repair.egg-info/entry_points.txt +0 -0
  27. {json_repair-0.52.4 → json_repair-0.53.0}/src/json_repair.egg-info/top_level.txt +0 -0
  28. {json_repair-0.52.4 → json_repair-0.53.0}/tests/test_json_repair.py +0 -0
  29. {json_repair-0.52.4 → json_repair-0.53.0}/tests/test_parse_array.py +0 -0
  30. {json_repair-0.52.4 → json_repair-0.53.0}/tests/test_parse_boolean_or_null.py +0 -0
  31. {json_repair-0.52.4 → json_repair-0.53.0}/tests/test_parse_comment.py +0 -0
  32. {json_repair-0.52.4 → json_repair-0.53.0}/tests/test_parse_number.py +0 -0
  33. {json_repair-0.52.4 → json_repair-0.53.0}/tests/test_parse_object.py +0 -0
  34. {json_repair-0.52.4 → json_repair-0.53.0}/tests/test_performance.py +0 -0
  35. {json_repair-0.52.4 → json_repair-0.53.0}/tests/test_repair_json_cli.py +0 -0
  36. {json_repair-0.52.4 → json_repair-0.53.0}/tests/test_repair_json_from_file.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: json_repair
3
- Version: 0.52.4
3
+ Version: 0.53.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License-Expression: MIT
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
3
3
  build-backend = "setuptools.build_meta"
4
4
  [project]
5
5
  name = "json_repair"
6
- version = "0.52.4"
6
+ version = "0.53.0"
7
7
  license = "MIT"
8
8
  license-files = ["LICENSE"]
9
9
  authors = [
@@ -158,23 +158,32 @@ class JSONParser:
158
158
 
159
159
  def skip_to_character(self, character: str | list[str], idx: int = 0) -> int:
160
160
  """
161
- This function quickly iterates to find a character, syntactic sugar to make the code more concise
161
+ Advance from (self.index + idx) until we hit an *unescaped* target character.
162
+ Returns the offset (idx) from self.index to that position, or the distance to the end if not found.
162
163
  """
163
- try:
164
- char = self.json_str[self.index + idx]
165
- except IndexError:
166
- return idx
167
- character_list = character if isinstance(character, list) else [character]
168
- while char not in character_list:
169
- idx += 1
170
- try:
171
- char = self.json_str[self.index + idx]
172
- except IndexError:
173
- return idx
174
- if self.json_str[self.index + idx - 1] == "\\":
175
- # Ah shoot this was actually escaped, continue
176
- return self.skip_to_character(character, idx + 1)
177
- return idx
164
+ targets = set(character) if isinstance(character, list) else {character}
165
+ i = self.index + idx
166
+ n = len(self.json_str)
167
+ backslashes = 0 # count of consecutive '\' immediately before current char
168
+
169
+ while i < n:
170
+ ch = self.json_str[i]
171
+
172
+ if ch == "\\":
173
+ backslashes += 1
174
+ i += 1
175
+ continue
176
+
177
+ # ch is not a backslash; if it's a target and not escaped (even backslashes), we're done
178
+ if ch in targets and (backslashes % 2 == 0):
179
+ return i - self.index
180
+
181
+ # reset backslash run when we see a non-backslash
182
+ backslashes = 0
183
+ i += 1
184
+
185
+ # not found; return distance to end
186
+ return n - self.index
178
187
 
179
188
  def _log(self, text: str) -> None:
180
189
  window: int = 10
@@ -7,7 +7,7 @@ if TYPE_CHECKING:
7
7
  from .json_parser import JSONParser
8
8
 
9
9
 
10
- def parse_object(self: "JSONParser") -> dict[str, JSONReturnType]:
10
+ def parse_object(self: "JSONParser") -> JSONReturnType:
11
11
  # <object> ::= '{' [ <member> *(', ' <member>) ] '}' ; A sequence of 'members'
12
12
  obj: dict[str, JSONReturnType] = {}
13
13
  start_index = self.index
@@ -2,6 +2,7 @@ from typing import TYPE_CHECKING
2
2
 
3
3
  from .constants import STRING_DELIMITERS, JSONReturnType
4
4
  from .json_context import ContextValues
5
+ from .parse_string_helpers.parse_json_llm_block import parse_json_llm_block
5
6
 
6
7
  if TYPE_CHECKING:
7
8
  from .json_parser import JSONParser
@@ -49,7 +50,14 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
49
50
 
50
51
  if not missing_quotes:
51
52
  self.index += 1
52
-
53
+ if self.get_char_at() == "`":
54
+ ret_val = parse_json_llm_block(self)
55
+ # If we found a valid JSON block, return it, otherwise continue parsing the string
56
+ if ret_val is not False:
57
+ return ret_val
58
+ self.log(
59
+ "While parsing a string, we found code fences but they did not enclose valid JSON, continuing parsing the string",
60
+ )
53
61
  # There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
54
62
  if self.get_char_at() in STRING_DELIMITERS and self.get_char_at() == lstring_delimiter:
55
63
  # If it's an empty key, this was easy
@@ -0,0 +1,19 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from ..constants import JSONReturnType # noqa: TID252
4
+
5
+ if TYPE_CHECKING:
6
+ from ..json_parser import JSONParser # noqa: TID252
7
+
8
+
9
+ def parse_json_llm_block(self: "JSONParser") -> JSONReturnType:
10
+ """
11
+ Extracts and normalizes JSON enclosed in ```json ... ``` blocks.
12
+ """
13
+ # Try to find a ```json ... ``` block
14
+ if self.json_str[self.index : self.index + 7] == "```json":
15
+ i = self.skip_to_character("`", idx=7)
16
+ if self.json_str[self.index + i : self.index + i + 3] == "```":
17
+ self.index += 7 # Move past ```json
18
+ return self.parse_json()
19
+ return False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: json_repair
3
- Version: 0.52.4
3
+ Version: 0.53.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License-Expression: MIT
@@ -21,6 +21,7 @@ src/json_repair.egg-info/SOURCES.txt
21
21
  src/json_repair.egg-info/dependency_links.txt
22
22
  src/json_repair.egg-info/entry_points.txt
23
23
  src/json_repair.egg-info/top_level.txt
24
+ src/json_repair/parse_string_helpers/parse_json_llm_block.py
24
25
  tests/test_json_repair.py
25
26
  tests/test_parse_array.py
26
27
  tests/test_parse_boolean_or_null.py
@@ -100,3 +100,13 @@ def test_leading_trailing_characters():
100
100
  ```""")
101
101
  == '{"key": "value"}'
102
102
  )
103
+
104
+
105
+ def test_string_json_llm_block():
106
+ assert repair_json('{"key": "``"') == '{"key": "``"}'
107
+ assert repair_json('{"key": "```json"') == '{"key": "```json"}'
108
+ assert (
109
+ repair_json('{"key": "```json {"key": [{"key1": 1},{"key2": 2}]}```"}')
110
+ == '{"key": {"key": [{"key1": 1}, {"key2": 2}]}}'
111
+ )
112
+ assert repair_json('{"response": "```json{}"') == '{"response": "```json{}"}'
File without changes
File without changes
File without changes