json-repair 0.33.0__tar.gz → 0.34.0__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {json_repair-0.33.0/src/json_repair.egg-info → json_repair-0.34.0}/PKG-INFO +1 -1
- {json_repair-0.33.0 → json_repair-0.34.0}/pyproject.toml +1 -1
- {json_repair-0.33.0 → json_repair-0.34.0}/src/json_repair/json_parser.py +65 -26
- {json_repair-0.33.0 → json_repair-0.34.0}/src/json_repair/string_file_wrapper.py +21 -0
- {json_repair-0.33.0 → json_repair-0.34.0/src/json_repair.egg-info}/PKG-INFO +1 -1
- {json_repair-0.33.0 → json_repair-0.34.0}/tests/test_json_repair.py +2 -2
- {json_repair-0.33.0 → json_repair-0.34.0}/LICENSE +0 -0
- {json_repair-0.33.0 → json_repair-0.34.0}/README.md +0 -0
- {json_repair-0.33.0 → json_repair-0.34.0}/setup.cfg +0 -0
- {json_repair-0.33.0 → json_repair-0.34.0}/src/json_repair/__init__.py +0 -0
- {json_repair-0.33.0 → json_repair-0.34.0}/src/json_repair/__main__.py +0 -0
- {json_repair-0.33.0 → json_repair-0.34.0}/src/json_repair/json_context.py +0 -0
- {json_repair-0.33.0 → json_repair-0.34.0}/src/json_repair/json_repair.py +0 -0
- {json_repair-0.33.0 → json_repair-0.34.0}/src/json_repair/py.typed +0 -0
- {json_repair-0.33.0 → json_repair-0.34.0}/src/json_repair.egg-info/SOURCES.txt +0 -0
- {json_repair-0.33.0 → json_repair-0.34.0}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.33.0 → json_repair-0.34.0}/src/json_repair.egg-info/entry_points.txt +0 -0
- {json_repair-0.33.0 → json_repair-0.34.0}/src/json_repair.egg-info/top_level.txt +0 -0
- {json_repair-0.33.0 → json_repair-0.34.0}/tests/test_coverage.py +0 -0
- {json_repair-0.33.0 → json_repair-0.34.0}/tests/test_performance.py +0 -0
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.
|
6
|
+
version = "0.34.0"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -7,6 +7,9 @@ JSONReturnType = Union[Dict[str, Any], List[Any], str, float, int, bool, None]
|
|
7
7
|
|
8
8
|
|
9
9
|
class JSONParser:
|
10
|
+
# Constants
|
11
|
+
STRING_DELIMITERS = ['"', "'", "“", "”"]
|
12
|
+
|
10
13
|
def __init__(
|
11
14
|
self,
|
12
15
|
json_str: Union[str, StringFileWrapper],
|
@@ -89,7 +92,9 @@ class JSONParser:
|
|
89
92
|
)
|
90
93
|
return ""
|
91
94
|
# <string> starts with a quote
|
92
|
-
elif not self.context.empty and (
|
95
|
+
elif not self.context.empty and (
|
96
|
+
char in self.STRING_DELIMITERS or char.isalpha()
|
97
|
+
):
|
93
98
|
return self.parse_string()
|
94
99
|
# <number> starts with [0-9] or minus
|
95
100
|
elif not self.context.empty and (
|
@@ -130,6 +135,8 @@ class JSONParser:
|
|
130
135
|
# <member> starts with a <string>
|
131
136
|
key = ""
|
132
137
|
while self.get_char_at():
|
138
|
+
# The rollback index needs to be updated here in case the key is empty
|
139
|
+
rollback_index = self.index
|
133
140
|
key = str(self.parse_string())
|
134
141
|
|
135
142
|
if key != "" or (key == "" and self.get_char_at() == ":"):
|
@@ -140,6 +147,12 @@ class JSONParser:
|
|
140
147
|
"While parsing an object we found a duplicate key, closing the object here and rolling back the index",
|
141
148
|
)
|
142
149
|
self.index = rollback_index - 1
|
150
|
+
# add an opening curly brace to make this work
|
151
|
+
self.json_str = (
|
152
|
+
self.json_str[: self.index + 1]
|
153
|
+
+ "{"
|
154
|
+
+ self.json_str[self.index + 1 :]
|
155
|
+
)
|
143
156
|
break
|
144
157
|
|
145
158
|
# Skip filler whitespaces
|
@@ -227,7 +240,7 @@ class JSONParser:
|
|
227
240
|
|
228
241
|
char = self.get_char_at()
|
229
242
|
# A valid string can only start with a valid quote or, in our case, with a literal
|
230
|
-
while char and char not in
|
243
|
+
while char and char not in self.STRING_DELIMITERS and not char.isalnum():
|
231
244
|
self.index += 1
|
232
245
|
char = self.get_char_at()
|
233
246
|
|
@@ -262,35 +275,61 @@ class JSONParser:
|
|
262
275
|
if not missing_quotes:
|
263
276
|
self.index += 1
|
264
277
|
|
278
|
+
self.skip_whitespaces_at()
|
265
279
|
# There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
|
266
|
-
if self.get_char_at()
|
267
|
-
# If
|
268
|
-
if (
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
self.index += 1
|
285
|
-
else:
|
286
|
-
# Ok this is not a doubled quote, check if this is an empty string or not
|
287
|
-
i = self.skip_whitespaces_at(idx=1, move_main_index=False)
|
280
|
+
if self.get_char_at() in self.STRING_DELIMITERS:
|
281
|
+
# If the next character is the same type of quote, then we manage it as double quotes
|
282
|
+
if self.get_char_at() == lstring_delimiter:
|
283
|
+
# If it's an empty key, this was easy
|
284
|
+
if (
|
285
|
+
self.context.current == ContextValues.OBJECT_KEY
|
286
|
+
and self.get_char_at(1) == ":"
|
287
|
+
):
|
288
|
+
self.index += 1
|
289
|
+
return ""
|
290
|
+
if self.get_char_at(1) == lstring_delimiter:
|
291
|
+
# There's something fishy about this, we found doubled quotes and then again quotes
|
292
|
+
self.log(
|
293
|
+
"While parsing a string, we found a doubled quote and then a quote again, ignoring it",
|
294
|
+
)
|
295
|
+
return ""
|
296
|
+
# Find the next delimiter
|
297
|
+
i = self.skip_to_character(character=rstring_delimiter, idx=1)
|
288
298
|
next_c = self.get_char_at(i)
|
289
|
-
|
299
|
+
# Now check that the next character is also a delimiter to ensure that we have "".....""
|
300
|
+
# In that case we ignore this rstring delimiter
|
301
|
+
if next_c and (self.get_char_at(i + 1) or "") == rstring_delimiter:
|
290
302
|
self.log(
|
291
|
-
"While parsing a string, we found a
|
303
|
+
"While parsing a string, we found a valid starting doubled quote",
|
292
304
|
)
|
305
|
+
doubled_quotes = True
|
293
306
|
self.index += 1
|
307
|
+
else:
|
308
|
+
# Ok this is not a doubled quote, check if this is an empty string or not
|
309
|
+
i = self.skip_whitespaces_at(idx=1, move_main_index=False)
|
310
|
+
next_c = self.get_char_at(i)
|
311
|
+
if next_c in self.STRING_DELIMITERS + ["{", "["]:
|
312
|
+
# something fishy is going on here
|
313
|
+
self.log(
|
314
|
+
"While parsing a string, we found a doubled quote but also another quote afterwards, ignoring it",
|
315
|
+
)
|
316
|
+
self.index += 1
|
317
|
+
return ""
|
318
|
+
elif next_c not in [",", "]", "}"]:
|
319
|
+
self.log(
|
320
|
+
"While parsing a string, we found a doubled quote but it was a mistake, removing one quote",
|
321
|
+
)
|
322
|
+
self.index += 1
|
323
|
+
else:
|
324
|
+
# Otherwise we need to do another check before continuing
|
325
|
+
i = self.skip_to_character(character=rstring_delimiter, idx=1)
|
326
|
+
next_c = self.get_char_at(i)
|
327
|
+
if not next_c:
|
328
|
+
# mmmm that delimiter never appears again, this is a mistake
|
329
|
+
self.log(
|
330
|
+
"While parsing a string, we found a quote but it was a mistake, ignoring it",
|
331
|
+
)
|
332
|
+
return ""
|
294
333
|
|
295
334
|
# Initialize our return value
|
296
335
|
string_acc = ""
|
@@ -96,3 +96,24 @@ class StringFileWrapper:
|
|
96
96
|
self.length = self.fd.tell()
|
97
97
|
self.fd.seek(current_position)
|
98
98
|
return self.length
|
99
|
+
|
100
|
+
def __setitem__(self, index: Union[int, slice], value: str) -> None:
|
101
|
+
"""
|
102
|
+
Set a character or a slice of characters in the file.
|
103
|
+
|
104
|
+
Args:
|
105
|
+
index (slice): The slice of characters to set.
|
106
|
+
value (str): The value to set at the specified index or slice.
|
107
|
+
"""
|
108
|
+
if isinstance(index, slice):
|
109
|
+
start = index.start or 0
|
110
|
+
else:
|
111
|
+
start = index or 0
|
112
|
+
|
113
|
+
if start < 0:
|
114
|
+
start += len(self)
|
115
|
+
|
116
|
+
current_position = self.fd.tell()
|
117
|
+
self.fd.seek(start)
|
118
|
+
self.fd.write(value)
|
119
|
+
self.fd.seek(current_position)
|
@@ -146,14 +146,14 @@ def test_object_edge_cases():
|
|
146
146
|
assert repair_json('{"lorem": ipsum, sic, datum.",}') == '{"lorem": "ipsum, sic, datum."}'
|
147
147
|
assert repair_json('{"lorem": sic tamet. "ipsum": sic tamet, quick brown fox. "sic": ipsum}') == '{"lorem": "sic tamet.", "ipsum": "sic tamet", "sic": "ipsum"}'
|
148
148
|
assert repair_json('{"lorem_ipsum": "sic tamet, quick brown fox. }') == '{"lorem_ipsum": "sic tamet, quick brown fox."}'
|
149
|
-
assert repair_json('{"key":value, " key2":"value2" }') == '{"key": "value", "
|
149
|
+
assert repair_json('{"key":value, " key2":"value2" }') == '{"key": "value", "key2": "value2"}'
|
150
150
|
assert repair_json('{"key":value "key2":"value2" }') == '{"key": "value", "key2": "value2"}'
|
151
151
|
assert repair_json("{'text': 'words{words in brackets}more words'}") == '{"text": "words{words in brackets}more words"}'
|
152
152
|
assert repair_json('{text:words{words in brackets}}') == '{"text": "words{words in brackets}"}'
|
153
153
|
assert repair_json('{text:words{words in brackets}m}') == '{"text": "words{words in brackets}m"}'
|
154
154
|
assert repair_json('{"key": "value, value2"```') == '{"key": "value, value2"}'
|
155
155
|
assert repair_json('{key:value,key2:value2}') == '{"key": "value", "key2": "value2"}'
|
156
|
-
assert repair_json('[{"lorem": {"ipsum": "sic"}, "lorem": {"ipsum": "sic"}]') == '[{"lorem": {"ipsum": "sic"}}, "lorem"
|
156
|
+
assert repair_json('[{"lorem": {"ipsum": "sic"}, """" "lorem": {"ipsum": "sic"}]') == '[{"lorem": {"ipsum": "sic"}}, {"lorem": {"ipsum": "sic"}}]'
|
157
157
|
|
158
158
|
def test_number_edge_cases():
|
159
159
|
assert repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|