json-repair 0.54.3__tar.gz → 0.55.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {json_repair-0.54.3/src/json_repair.egg-info → json_repair-0.55.0}/PKG-INFO +1 -1
- {json_repair-0.54.3 → json_repair-0.55.0}/pyproject.toml +1 -1
- {json_repair-0.54.3 → json_repair-0.55.0}/src/json_repair/parse_number.py +1 -1
- {json_repair-0.54.3 → json_repair-0.55.0}/src/json_repair/parse_object.py +17 -3
- {json_repair-0.54.3 → json_repair-0.55.0}/src/json_repair/parse_string.py +20 -11
- {json_repair-0.54.3 → json_repair-0.55.0}/src/json_repair/utils/string_file_wrapper.py +1 -0
- {json_repair-0.54.3 → json_repair-0.55.0/src/json_repair.egg-info}/PKG-INFO +1 -1
- {json_repair-0.54.3 → json_repair-0.55.0}/tests/test_parse_object.py +1 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/LICENSE +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/README.md +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/setup.cfg +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/src/json_repair/__init__.py +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/src/json_repair/__main__.py +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/src/json_repair/json_parser.py +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/src/json_repair/json_repair.py +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/src/json_repair/parse_array.py +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/src/json_repair/parse_comment.py +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/src/json_repair/parse_string_helpers/parse_boolean_or_null.py +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/src/json_repair/parse_string_helpers/parse_json_llm_block.py +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/src/json_repair/py.typed +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/src/json_repair/utils/constants.py +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/src/json_repair/utils/json_context.py +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/src/json_repair/utils/object_comparer.py +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/src/json_repair.egg-info/SOURCES.txt +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/src/json_repair.egg-info/entry_points.txt +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/src/json_repair.egg-info/top_level.txt +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/tests/test_json_repair.py +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/tests/test_parse_array.py +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/tests/test_parse_comment.py +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/tests/test_parse_number.py +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/tests/test_parse_string.py +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/tests/test_performance.py +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/tests/test_repair_json_cli.py +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/tests/test_repair_json_from_file.py +0 -0
- {json_repair-0.54.3 → json_repair-0.55.0}/tests/test_strict_mode.py +0 -0
|
@@ -30,7 +30,7 @@ def parse_number(self: "JSONParser") -> JSONReturnType:
|
|
|
30
30
|
return self.parse_string()
|
|
31
31
|
try:
|
|
32
32
|
if "," in number_str:
|
|
33
|
-
return
|
|
33
|
+
return number_str
|
|
34
34
|
if "." in number_str or "e" in number_str or "E" in number_str:
|
|
35
35
|
return float(number_str)
|
|
36
36
|
else:
|
|
@@ -58,7 +58,9 @@ def parse_object(self: "JSONParser") -> JSONReturnType:
|
|
|
58
58
|
self.index += 1
|
|
59
59
|
self.skip_whitespaces()
|
|
60
60
|
continue
|
|
61
|
-
|
|
61
|
+
raw_key = self.parse_string()
|
|
62
|
+
assert isinstance(raw_key, str)
|
|
63
|
+
key = raw_key
|
|
62
64
|
if key == "":
|
|
63
65
|
self.skip_whitespaces()
|
|
64
66
|
if key != "" or (key == "" and self.get_char_at() in [":", "}"]):
|
|
@@ -108,9 +110,10 @@ def parse_object(self: "JSONParser") -> JSONReturnType:
|
|
|
108
110
|
self.skip_whitespaces()
|
|
109
111
|
# Corner case, a lone comma
|
|
110
112
|
value: JSONReturnType = ""
|
|
111
|
-
|
|
113
|
+
char = self.get_char_at()
|
|
114
|
+
if char in [",", "}"]:
|
|
112
115
|
self.log(
|
|
113
|
-
"While parsing an object value we found a stray
|
|
116
|
+
f"While parsing an object value we found a stray {char}, ignoring it",
|
|
114
117
|
)
|
|
115
118
|
else:
|
|
116
119
|
value = self.parse_json()
|
|
@@ -151,6 +154,17 @@ def parse_object(self: "JSONParser") -> JSONReturnType:
|
|
|
151
154
|
# This handles cases like '{"key": "value"}, "key2": "value2"}'
|
|
152
155
|
# But only if we're not in a nested context
|
|
153
156
|
if not self.context.empty:
|
|
157
|
+
# Sometimes there could be an extra closing brace that closes the object twice
|
|
158
|
+
# So we check the context to see if the next one in the stack is an object or not
|
|
159
|
+
# If not we skip it
|
|
160
|
+
if self.get_char_at() == "}" and self.context.current not in [
|
|
161
|
+
ContextValues.OBJECT_KEY,
|
|
162
|
+
ContextValues.OBJECT_VALUE,
|
|
163
|
+
]:
|
|
164
|
+
self.log(
|
|
165
|
+
"Found an extra closing brace that shouldn't be there, skipping it",
|
|
166
|
+
)
|
|
167
|
+
self.index += 1
|
|
154
168
|
return obj
|
|
155
169
|
|
|
156
170
|
self.skip_whitespaces()
|
|
@@ -11,8 +11,8 @@ if TYPE_CHECKING:
|
|
|
11
11
|
|
|
12
12
|
def parse_string(self: "JSONParser") -> JSONReturnType:
|
|
13
13
|
# Utility function to append a character to the accumulator and update the index
|
|
14
|
-
def _append_literal_char(acc: str, current_char: str
|
|
15
|
-
acc +=
|
|
14
|
+
def _append_literal_char(acc: str, current_char: str) -> tuple[str, str | None]:
|
|
15
|
+
acc += current_char
|
|
16
16
|
self.index += 1
|
|
17
17
|
char = self.get_char_at()
|
|
18
18
|
return acc, char
|
|
@@ -246,10 +246,12 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
|
|
|
246
246
|
string_acc += char
|
|
247
247
|
self.index += 1
|
|
248
248
|
char = self.get_char_at()
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
string_acc
|
|
252
|
-
|
|
249
|
+
if char is None:
|
|
250
|
+
# Unclosed string ends with a \ character. This character is ignored if stream_stable = True.
|
|
251
|
+
if self.stream_stable and string_acc and string_acc[-1] == "\\":
|
|
252
|
+
string_acc = string_acc[:-1]
|
|
253
|
+
break
|
|
254
|
+
if string_acc and string_acc[-1] == "\\":
|
|
253
255
|
# This is a special case, if people use real strings this might happen
|
|
254
256
|
self.log("Found a stray escape sequence, normalizing it")
|
|
255
257
|
if char in [rstring_delimiter, "t", "n", "r", "b", "\\"]:
|
|
@@ -296,12 +298,11 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
|
|
|
296
298
|
i += 1
|
|
297
299
|
# Skip spaces
|
|
298
300
|
i = self.scroll_whitespaces(idx=i)
|
|
299
|
-
|
|
301
|
+
ch = self.get_char_at(i)
|
|
302
|
+
if ch in [",", "}"]:
|
|
300
303
|
# Ok then this is a missing right quote
|
|
301
304
|
self.log(
|
|
302
|
-
"While parsing a string missing the right delimiter in object key context, we found a "
|
|
303
|
-
+ str(self.get_char_at(i))
|
|
304
|
-
+ " stopping here",
|
|
305
|
+
f"While parsing a string missing the right delimiter in object key context, we found a {ch} stopping here",
|
|
305
306
|
)
|
|
306
307
|
break
|
|
307
308
|
else:
|
|
@@ -386,7 +387,7 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
|
|
|
386
387
|
continue
|
|
387
388
|
elif next_c == rstring_delimiter and self.get_char_at(i - 1) != "\\":
|
|
388
389
|
# Check if self.index:self.index+i is only whitespaces, break if that's the case
|
|
389
|
-
if
|
|
390
|
+
if _only_whitespace_until(self, i):
|
|
390
391
|
break
|
|
391
392
|
if self.context.current == ContextValues.OBJECT_VALUE:
|
|
392
393
|
i = self.scroll_whitespaces(idx=i + 1)
|
|
@@ -481,3 +482,11 @@ def parse_string(self: "JSONParser") -> JSONReturnType:
|
|
|
481
482
|
string_acc = string_acc.rstrip()
|
|
482
483
|
|
|
483
484
|
return string_acc
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
def _only_whitespace_until(self: "JSONParser", end: int) -> bool:
|
|
488
|
+
for j in range(1, end):
|
|
489
|
+
c = self.get_char_at(j)
|
|
490
|
+
if c is not None and not c.isspace():
|
|
491
|
+
return False
|
|
492
|
+
return True
|
|
@@ -137,6 +137,7 @@ class StringFileWrapper:
|
|
|
137
137
|
while self.length is None:
|
|
138
138
|
chunk_index = len(self._chunk_positions)
|
|
139
139
|
self._ensure_chunk_position(chunk_index)
|
|
140
|
+
assert self.length is not None
|
|
140
141
|
return self.length
|
|
141
142
|
|
|
142
143
|
def __setitem__(self, index: int | slice, value: str) -> None: # pragma: no cover
|
|
@@ -88,6 +88,7 @@ def test_parse_object_edge_cases():
|
|
|
88
88
|
repair_json('{"array":[{"key": "value"], "key2": "value2"}')
|
|
89
89
|
== '{"array": [{"key": "value"}], "key2": "value2"}'
|
|
90
90
|
)
|
|
91
|
+
assert repair_json('[{"key":"value"}},{"key":"value"}]') == '[{"key": "value"}, {"key": "value"}]'
|
|
91
92
|
|
|
92
93
|
|
|
93
94
|
def test_parse_object_merge_at_the_end():
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|