json-repair 0.52.4__py3-none-any.whl → 0.53.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_parser.py +25 -16
- json_repair/parse_object.py +1 -1
- json_repair/parse_string.py +9 -1
- json_repair/parse_string_helpers/parse_json_llm_block.py +19 -0
- {json_repair-0.52.4.dist-info → json_repair-0.53.0.dist-info}/METADATA +1 -1
- {json_repair-0.52.4.dist-info → json_repair-0.53.0.dist-info}/RECORD +10 -9
- {json_repair-0.52.4.dist-info → json_repair-0.53.0.dist-info}/WHEEL +0 -0
- {json_repair-0.52.4.dist-info → json_repair-0.53.0.dist-info}/entry_points.txt +0 -0
- {json_repair-0.52.4.dist-info → json_repair-0.53.0.dist-info}/licenses/LICENSE +0 -0
- {json_repair-0.52.4.dist-info → json_repair-0.53.0.dist-info}/top_level.txt +0 -0
json_repair/json_parser.py
CHANGED
|
@@ -158,23 +158,32 @@ class JSONParser:
|
|
|
158
158
|
|
|
159
159
|
def skip_to_character(self, character: str | list[str], idx: int = 0) -> int:
|
|
160
160
|
"""
|
|
161
|
-
|
|
161
|
+
Advance from (self.index + idx) until we hit an *unescaped* target character.
|
|
162
|
+
Returns the offset (idx) from self.index to that position, or the distance to the end if not found.
|
|
162
163
|
"""
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
while
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
164
|
+
targets = set(character) if isinstance(character, list) else {character}
|
|
165
|
+
i = self.index + idx
|
|
166
|
+
n = len(self.json_str)
|
|
167
|
+
backslashes = 0 # count of consecutive '\' immediately before current char
|
|
168
|
+
|
|
169
|
+
while i < n:
|
|
170
|
+
ch = self.json_str[i]
|
|
171
|
+
|
|
172
|
+
if ch == "\\":
|
|
173
|
+
backslashes += 1
|
|
174
|
+
i += 1
|
|
175
|
+
continue
|
|
176
|
+
|
|
177
|
+
# ch is not a backslash; if it's a target and not escaped (even backslashes), we're done
|
|
178
|
+
if ch in targets and (backslashes % 2 == 0):
|
|
179
|
+
return i - self.index
|
|
180
|
+
|
|
181
|
+
# reset backslash run when we see a non-backslash
|
|
182
|
+
backslashes = 0
|
|
183
|
+
i += 1
|
|
184
|
+
|
|
185
|
+
# not found; return distance to end
|
|
186
|
+
return n - self.index
|
|
178
187
|
|
|
179
188
|
def _log(self, text: str) -> None:
|
|
180
189
|
window: int = 10
|
json_repair/parse_object.py
CHANGED
|
@@ -7,7 +7,7 @@ if TYPE_CHECKING:
|
|
|
7
7
|
from .json_parser import JSONParser
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
def parse_object(self: "JSONParser") ->
|
|
10
|
+
def parse_object(self: "JSONParser") -> JSONReturnType:
|
|
11
11
|
# <object> ::= '{' [ <member> *(', ' <member>) ] '}' ; A sequence of 'members'
|
|
12
12
|
obj: dict[str, JSONReturnType] = {}
|
|
13
13
|
start_index = self.index
|
json_repair/parse_string.py
CHANGED
|
@@ -2,6 +2,7 @@ from typing import TYPE_CHECKING
|
|
|
2
2
|
|
|
3
3
|
from .constants import STRING_DELIMITERS, JSONReturnType
|
|
4
4
|
from .json_context import ContextValues
|
|
5
|
+
from .parse_string_helpers.parse_json_llm_block import parse_json_llm_block
|
|
5
6
|
|
|
6
7
|
if TYPE_CHECKING:
|
|
7
8
|
from .json_parser import JSONParser
|
|
@@ -49,7 +50,14 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
|
|
|
49
50
|
|
|
50
51
|
if not missing_quotes:
|
|
51
52
|
self.index += 1
|
|
52
|
-
|
|
53
|
+
if self.get_char_at() == "`":
|
|
54
|
+
ret_val = parse_json_llm_block(self)
|
|
55
|
+
# If we found a valid JSON block, return it, otherwise continue parsing the string
|
|
56
|
+
if ret_val is not False:
|
|
57
|
+
return ret_val
|
|
58
|
+
self.log(
|
|
59
|
+
"While parsing a string, we found code fences but they did not enclose valid JSON, continuing parsing the string",
|
|
60
|
+
)
|
|
53
61
|
# There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
|
|
54
62
|
if self.get_char_at() in STRING_DELIMITERS and self.get_char_at() == lstring_delimiter:
|
|
55
63
|
# If it's an empty key, this was easy
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from typing import TYPE_CHECKING
|
|
2
|
+
|
|
3
|
+
from ..constants import JSONReturnType # noqa: TID252
|
|
4
|
+
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
from ..json_parser import JSONParser # noqa: TID252
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def parse_json_llm_block(self: "JSONParser") -> JSONReturnType:
|
|
10
|
+
"""
|
|
11
|
+
Extracts and normalizes JSON enclosed in ```json ... ``` blocks.
|
|
12
|
+
"""
|
|
13
|
+
# Try to find a ```json ... ``` block
|
|
14
|
+
if self.json_str[self.index : self.index + 7] == "```json":
|
|
15
|
+
i = self.skip_to_character("`", idx=7)
|
|
16
|
+
if self.json_str[self.index + i : self.index + i + 3] == "```":
|
|
17
|
+
self.index += 7 # Move past ```json
|
|
18
|
+
return self.parse_json()
|
|
19
|
+
return False
|
|
@@ -2,20 +2,21 @@ json_repair/__init__.py,sha256=JdJIZNCKV3MfIviryqK8NH8yGssCta2-192CekcwH-o,174
|
|
|
2
2
|
json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
|
|
3
3
|
json_repair/constants.py,sha256=cv2gvyosuq0me0600WyTysM9avrtfXPuXYR26tawcuo,158
|
|
4
4
|
json_repair/json_context.py,sha256=WsMOjqpGSr6aaDONcrk8UFtTurzWon2Qq9AoBBYseoI,934
|
|
5
|
-
json_repair/json_parser.py,sha256=
|
|
5
|
+
json_repair/json_parser.py,sha256=vy5Z8aiJUVhVmvYEgy0dkYy5WgUmyOeS6PEFiR3cW44,7948
|
|
6
6
|
json_repair/json_repair.py,sha256=sDhXzDZxu0QmaFzICPTtf_q7yOY1A1Lf_iQG6Potsco,11572
|
|
7
7
|
json_repair/object_comparer.py,sha256=XKV3MRab8H7_v4sm-wpEa5le0XX9OeycWo5S-MFm-GI,1716
|
|
8
8
|
json_repair/parse_array.py,sha256=-rh65JcfT-FtXiR6s8RYlMfI-6LzVr08ytlDh6Z2CFE,2181
|
|
9
9
|
json_repair/parse_boolean_or_null.py,sha256=WMSkvvxsp4wvauBcDqtt9WnLMD5SMoxeRfZFXp3FEBc,890
|
|
10
10
|
json_repair/parse_comment.py,sha256=JHtQ_QlxOvPNnMh7lhUaoTjFGelqjhTNq7qn9xUE7SU,2648
|
|
11
11
|
json_repair/parse_number.py,sha256=33zAtkbuVzi9Lqjxu7cXn9WlVzd3WjRx9Ln_LFzVL4o,1259
|
|
12
|
-
json_repair/parse_object.py,sha256=
|
|
13
|
-
json_repair/parse_string.py,sha256
|
|
12
|
+
json_repair/parse_object.py,sha256=rnuH5Oxo98OrXhktF0wrOC1vRb5Th_m819Li1EFJzm4,5571
|
|
13
|
+
json_repair/parse_string.py,sha256=--coxoyH4nxl7osxgs1fIu31IEtB0HHwVbbOewypG4g,26146
|
|
14
14
|
json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
15
|
json_repair/string_file_wrapper.py,sha256=tGkWBEUPE-CZPf4uSM5NE9oSDTpskX0myJiXsl-gbds,4333
|
|
16
|
-
json_repair
|
|
17
|
-
json_repair-0.
|
|
18
|
-
json_repair-0.
|
|
19
|
-
json_repair-0.
|
|
20
|
-
json_repair-0.
|
|
21
|
-
json_repair-0.
|
|
16
|
+
json_repair/parse_string_helpers/parse_json_llm_block.py,sha256=taREF3pwb35kGBGJYbUHkTybATX3GI-SOwOz3yXaEQs,644
|
|
17
|
+
json_repair-0.53.0.dist-info/licenses/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
|
18
|
+
json_repair-0.53.0.dist-info/METADATA,sha256=JvMUVYGDDIzmym7MqbQ6k6PjbnuuskW_myvk0EWp7V8,11027
|
|
19
|
+
json_repair-0.53.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
20
|
+
json_repair-0.53.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
|
|
21
|
+
json_repair-0.53.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
|
22
|
+
json_repair-0.53.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|