json-repair 0.29.3__tar.gz → 0.29.5__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {json_repair-0.29.3/src/json_repair.egg-info → json_repair-0.29.5}/PKG-INFO +1 -1
- {json_repair-0.29.3 → json_repair-0.29.5}/pyproject.toml +1 -1
- json_repair-0.29.5/src/json_repair/json_context.py +45 -0
- {json_repair-0.29.3 → json_repair-0.29.5}/src/json_repair/json_parser.py +42 -43
- {json_repair-0.29.3 → json_repair-0.29.5/src/json_repair.egg-info}/PKG-INFO +1 -1
- {json_repair-0.29.3 → json_repair-0.29.5}/tests/test_performance.py +6 -6
- json_repair-0.29.3/src/json_repair/json_context.py +0 -69
- {json_repair-0.29.3 → json_repair-0.29.5}/LICENSE +0 -0
- {json_repair-0.29.3 → json_repair-0.29.5}/README.md +0 -0
- {json_repair-0.29.3 → json_repair-0.29.5}/setup.cfg +0 -0
- {json_repair-0.29.3 → json_repair-0.29.5}/src/json_repair/__init__.py +0 -0
- {json_repair-0.29.3 → json_repair-0.29.5}/src/json_repair/__main__.py +0 -0
- {json_repair-0.29.3 → json_repair-0.29.5}/src/json_repair/json_repair.py +0 -0
- {json_repair-0.29.3 → json_repair-0.29.5}/src/json_repair/py.typed +0 -0
- {json_repair-0.29.3 → json_repair-0.29.5}/src/json_repair/string_file_wrapper.py +0 -0
- {json_repair-0.29.3 → json_repair-0.29.5}/src/json_repair.egg-info/SOURCES.txt +0 -0
- {json_repair-0.29.3 → json_repair-0.29.5}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.29.3 → json_repair-0.29.5}/src/json_repair.egg-info/entry_points.txt +0 -0
- {json_repair-0.29.3 → json_repair-0.29.5}/src/json_repair.egg-info/top_level.txt +0 -0
- {json_repair-0.29.3 → json_repair-0.29.5}/tests/test_coverage.py +0 -0
- {json_repair-0.29.3 → json_repair-0.29.5}/tests/test_json_repair.py +0 -0
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.29.
|
6
|
+
version = "0.29.5"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -0,0 +1,45 @@
|
|
1
|
+
from enum import Enum, auto
|
2
|
+
from typing import List, Optional
|
3
|
+
|
4
|
+
|
5
|
+
class ContextValues(Enum):
|
6
|
+
OBJECT_KEY = auto()
|
7
|
+
OBJECT_VALUE = auto()
|
8
|
+
ARRAY = auto()
|
9
|
+
|
10
|
+
|
11
|
+
class JsonContext:
|
12
|
+
def __init__(self) -> None:
|
13
|
+
self.context: List[ContextValues] = []
|
14
|
+
self.current: Optional[ContextValues] = None
|
15
|
+
self.empty: bool = True
|
16
|
+
|
17
|
+
def set(self, value: ContextValues) -> None:
|
18
|
+
"""
|
19
|
+
Set a new context value.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
value (ContextValues): The context value to be added.
|
23
|
+
|
24
|
+
Returns:
|
25
|
+
None
|
26
|
+
"""
|
27
|
+
# If a value is provided update the context variable and save in stack
|
28
|
+
if value:
|
29
|
+
self.context.append(value)
|
30
|
+
self.current = value
|
31
|
+
self.empty = False
|
32
|
+
|
33
|
+
def reset(self) -> None:
|
34
|
+
"""
|
35
|
+
Remove the most recent context value.
|
36
|
+
|
37
|
+
Returns:
|
38
|
+
None
|
39
|
+
"""
|
40
|
+
try:
|
41
|
+
self.context.pop()
|
42
|
+
self.current = self.context[-1]
|
43
|
+
except IndexError:
|
44
|
+
self.current = None
|
45
|
+
self.empty = True
|
@@ -34,7 +34,8 @@ class JSONParser:
|
|
34
34
|
self.logger: List[Dict[str, str]] = []
|
35
35
|
self.log = self._log
|
36
36
|
else:
|
37
|
-
|
37
|
+
# No-op
|
38
|
+
self.log = lambda *args, **kwargs: None
|
38
39
|
|
39
40
|
def parse(
|
40
41
|
self,
|
@@ -88,12 +89,10 @@ class JSONParser:
|
|
88
89
|
)
|
89
90
|
return ""
|
90
91
|
# <string> starts with a quote
|
91
|
-
elif not self.context.
|
92
|
-
char in ['"', "'", "“"] or char.isalpha()
|
93
|
-
):
|
92
|
+
elif not self.context.empty and (char in ['"', "'", "“"] or char.isalpha()):
|
94
93
|
return self.parse_string()
|
95
94
|
# <number> starts with [0-9] or minus
|
96
|
-
elif not self.context.
|
95
|
+
elif not self.context.empty and (
|
97
96
|
char.isdigit() or char == "-" or char == "."
|
98
97
|
):
|
99
98
|
return self.parse_number()
|
@@ -234,8 +233,9 @@ class JSONParser:
|
|
234
233
|
elif char.isalnum():
|
235
234
|
# This could be a <boolean> and not a string. Because (T)rue or (F)alse or (N)ull are valid
|
236
235
|
# But remember, object keys are only of type string
|
237
|
-
if
|
238
|
-
|
236
|
+
if (
|
237
|
+
char.lower() in ["t", "f", "n"]
|
238
|
+
and self.context.current != ContextValues.OBJECT_KEY
|
239
239
|
):
|
240
240
|
value = self.parse_boolean_or_null()
|
241
241
|
if value != "":
|
@@ -255,15 +255,13 @@ class JSONParser:
|
|
255
255
|
if self.get_char_at() == lstring_delimiter:
|
256
256
|
# If it's an empty key, this was easy
|
257
257
|
if (
|
258
|
-
self.context.
|
258
|
+
self.context.current == ContextValues.OBJECT_KEY
|
259
259
|
and self.get_char_at(1) == ":"
|
260
260
|
):
|
261
261
|
self.index += 1
|
262
262
|
return ""
|
263
263
|
# Find the next delimiter
|
264
|
-
i = self.skip_to_character(
|
265
|
-
character=rstring_delimiter, idx=1, move_main_index=False
|
266
|
-
)
|
264
|
+
i = self.skip_to_character(character=rstring_delimiter, idx=1)
|
267
265
|
next_c = self.get_char_at(i)
|
268
266
|
# Now check that the next character is also a delimiter to ensure that we have "".....""
|
269
267
|
# In that case we ignore this rstring delimiter
|
@@ -296,22 +294,23 @@ class JSONParser:
|
|
296
294
|
while char and char != rstring_delimiter:
|
297
295
|
if (
|
298
296
|
missing_quotes
|
299
|
-
and self.context.
|
297
|
+
and self.context.current == ContextValues.OBJECT_KEY
|
300
298
|
and (char == ":" or char.isspace())
|
301
299
|
):
|
302
300
|
self.log(
|
303
301
|
"While parsing a string missing the left delimiter in object key context, we found a :, stopping here",
|
304
302
|
)
|
305
303
|
break
|
306
|
-
if self.context.
|
304
|
+
if self.context.current == ContextValues.OBJECT_VALUE and char in [
|
307
305
|
",",
|
308
306
|
"}",
|
309
307
|
]:
|
310
308
|
rstring_delimiter_missing = True
|
311
309
|
# check if this is a case in which the closing comma is NOT missing instead
|
312
|
-
i = self.skip_to_character(
|
313
|
-
|
314
|
-
)
|
310
|
+
i = self.skip_to_character(character=rstring_delimiter, idx=1)
|
311
|
+
# If the rstring_delimeter is escaped then it's not what we are looking for
|
312
|
+
while self.get_char_at(i - 1) == "\\":
|
313
|
+
i = self.skip_to_character(character=rstring_delimiter, idx=i + 1)
|
315
314
|
next_c = self.get_char_at(i)
|
316
315
|
if next_c:
|
317
316
|
i += 1
|
@@ -345,8 +344,9 @@ class JSONParser:
|
|
345
344
|
"While parsing a string, we found a doubled quote, ignoring it"
|
346
345
|
)
|
347
346
|
self.index += 1
|
348
|
-
elif
|
349
|
-
|
347
|
+
elif (
|
348
|
+
missing_quotes
|
349
|
+
and self.context.current == ContextValues.OBJECT_VALUE
|
350
350
|
):
|
351
351
|
# In case of missing starting quote I need to check if the delimeter is the end or the beginning of a key
|
352
352
|
i = 1
|
@@ -387,20 +387,20 @@ class JSONParser:
|
|
387
387
|
# If we are in an object context, let's check for the right delimiters
|
388
388
|
if (
|
389
389
|
(
|
390
|
-
self.context.
|
390
|
+
ContextValues.OBJECT_KEY in self.context.context
|
391
391
|
and next_c in [":", "}"]
|
392
392
|
)
|
393
393
|
or (
|
394
|
-
self.context.
|
394
|
+
ContextValues.OBJECT_VALUE in self.context.context
|
395
395
|
and next_c == "}"
|
396
396
|
)
|
397
397
|
or (
|
398
|
-
self.context.
|
398
|
+
ContextValues.ARRAY in self.context.context
|
399
399
|
and next_c in ["]", ","]
|
400
400
|
)
|
401
401
|
or (
|
402
402
|
check_comma_in_object_value
|
403
|
-
and self.context.
|
403
|
+
and self.context.current == ContextValues.OBJECT_VALUE
|
404
404
|
and next_c == ","
|
405
405
|
)
|
406
406
|
):
|
@@ -408,13 +408,17 @@ class JSONParser:
|
|
408
408
|
i += 1
|
409
409
|
next_c = self.get_char_at(i)
|
410
410
|
# If we stopped for a comma in object_value context, let's check if find a "} at the end of the string
|
411
|
-
if
|
412
|
-
|
411
|
+
if (
|
412
|
+
next_c == ","
|
413
|
+
and self.context.current == ContextValues.OBJECT_VALUE
|
413
414
|
):
|
414
415
|
i += 1
|
415
|
-
i = self.skip_to_character(
|
416
|
-
|
417
|
-
)
|
416
|
+
i = self.skip_to_character(character=rstring_delimiter, idx=i)
|
417
|
+
# If the rstring_delimeter is escaped then it's not what we are looking for
|
418
|
+
while self.get_char_at(i - 1) == "\\":
|
419
|
+
i = self.skip_to_character(
|
420
|
+
character=rstring_delimiter, idx=i + 1
|
421
|
+
)
|
418
422
|
next_c = self.get_char_at(i)
|
419
423
|
# Ok now I found a delimiter, let's skip whitespaces and see if next we find a }
|
420
424
|
i += 1
|
@@ -429,16 +433,19 @@ class JSONParser:
|
|
429
433
|
self.index += 1
|
430
434
|
char = self.get_char_at()
|
431
435
|
elif next_c == rstring_delimiter:
|
432
|
-
if self.context.
|
436
|
+
if self.context.current == ContextValues.OBJECT_VALUE:
|
433
437
|
# But this might not be it! This could be just a missing comma
|
434
438
|
# We found a delimiter and we need to check if this is a key
|
435
439
|
# so find a rstring_delimiter and a colon after
|
436
440
|
i += 1
|
437
441
|
i = self.skip_to_character(
|
438
|
-
character=rstring_delimiter,
|
439
|
-
idx=i,
|
440
|
-
move_main_index=False,
|
442
|
+
character=rstring_delimiter, idx=i
|
441
443
|
)
|
444
|
+
# If the rstring_delimeter is escaped then it's not what we are looking for
|
445
|
+
while self.get_char_at(i - 1) == "\\":
|
446
|
+
i = self.skip_to_character(
|
447
|
+
character=rstring_delimiter, idx=i + 1
|
448
|
+
)
|
442
449
|
i += 1
|
443
450
|
next_c = self.get_char_at(i)
|
444
451
|
while next_c and next_c != ":":
|
@@ -462,7 +469,7 @@ class JSONParser:
|
|
462
469
|
if (
|
463
470
|
char
|
464
471
|
and missing_quotes
|
465
|
-
and self.context.
|
472
|
+
and self.context.current == ContextValues.OBJECT_KEY
|
466
473
|
and char.isspace()
|
467
474
|
):
|
468
475
|
self.log(
|
@@ -488,7 +495,7 @@ class JSONParser:
|
|
488
495
|
number_str = ""
|
489
496
|
number_chars = set("0123456789-.eE/,")
|
490
497
|
char = self.get_char_at()
|
491
|
-
is_array = self.context.
|
498
|
+
is_array = self.context.current == ContextValues.ARRAY
|
492
499
|
while char and char in number_chars and (char != "," or not is_array):
|
493
500
|
number_str += char
|
494
501
|
self.index += 1
|
@@ -561,9 +568,7 @@ class JSONParser:
|
|
561
568
|
return idx
|
562
569
|
return idx
|
563
570
|
|
564
|
-
def skip_to_character(
|
565
|
-
self, character: str, idx: int = 0, move_main_index=True
|
566
|
-
) -> int:
|
571
|
+
def skip_to_character(self, character: str, idx: int = 0) -> int:
|
567
572
|
"""
|
568
573
|
This function quickly iterates to find a character, syntactic sugar to make the code more concise
|
569
574
|
"""
|
@@ -572,10 +577,7 @@ class JSONParser:
|
|
572
577
|
except IndexError:
|
573
578
|
return idx
|
574
579
|
while char != character:
|
575
|
-
|
576
|
-
self.index += 1
|
577
|
-
else:
|
578
|
-
idx += 1
|
580
|
+
idx += 1
|
579
581
|
try:
|
580
582
|
char = self.json_str[self.index + idx]
|
581
583
|
except IndexError:
|
@@ -593,6 +595,3 @@ class JSONParser:
|
|
593
595
|
"context": context,
|
594
596
|
}
|
595
597
|
)
|
596
|
-
|
597
|
-
def noop(*args: Any, **kwargs: Any) -> None:
|
598
|
-
pass
|
@@ -19,7 +19,7 @@ def test_true_true_correct(benchmark):
|
|
19
19
|
mean_time = benchmark.stats.get("median")
|
20
20
|
|
21
21
|
# Define your time threshold in seconds
|
22
|
-
max_time =
|
22
|
+
max_time = 1.8 / 10 ** 3 # 1.8 millisecond
|
23
23
|
|
24
24
|
# Assert that the average time is below the threshold
|
25
25
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -31,7 +31,7 @@ def test_true_true_incorrect(benchmark):
|
|
31
31
|
mean_time = benchmark.stats.get("median")
|
32
32
|
|
33
33
|
# Define your time threshold in seconds
|
34
|
-
max_time =
|
34
|
+
max_time = 1.8 / 10 ** 3 # 1.8 millisecond
|
35
35
|
|
36
36
|
# Assert that the average time is below the threshold
|
37
37
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -53,7 +53,7 @@ def test_true_false_incorrect(benchmark):
|
|
53
53
|
mean_time = benchmark.stats.get("median")
|
54
54
|
|
55
55
|
# Define your time threshold in seconds
|
56
|
-
max_time =
|
56
|
+
max_time = 1.8 / 10 ** 3 # 1.8 millisecond
|
57
57
|
|
58
58
|
# Assert that the average time is below the threshold
|
59
59
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -64,7 +64,7 @@ def test_false_true_correct(benchmark):
|
|
64
64
|
mean_time = benchmark.stats.get("median")
|
65
65
|
|
66
66
|
# Define your time threshold in seconds
|
67
|
-
max_time =
|
67
|
+
max_time = 1.8 / 10 ** 3 # 1.8 millisecond
|
68
68
|
|
69
69
|
# Assert that the average time is below the threshold
|
70
70
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -75,7 +75,7 @@ def test_false_true_incorrect(benchmark):
|
|
75
75
|
mean_time = benchmark.stats.get("median")
|
76
76
|
|
77
77
|
# Define your time threshold in seconds
|
78
|
-
max_time =
|
78
|
+
max_time = 1.8 / 10 ** 3 # 1.8 millisecond
|
79
79
|
|
80
80
|
# Assert that the average time is below the threshold
|
81
81
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -97,7 +97,7 @@ def test_false_false_incorrect(benchmark):
|
|
97
97
|
mean_time = benchmark.stats.get("median")
|
98
98
|
|
99
99
|
# Define your time threshold in seconds
|
100
|
-
max_time =
|
100
|
+
max_time = 1.8 / 10 ** 3 # 1.8 millisecond
|
101
101
|
|
102
102
|
# Assert that the average time is below the threshold
|
103
103
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -1,69 +0,0 @@
|
|
1
|
-
from enum import Enum, auto
|
2
|
-
from typing import List
|
3
|
-
|
4
|
-
|
5
|
-
class ContextValues(Enum):
|
6
|
-
OBJECT_KEY = auto()
|
7
|
-
OBJECT_VALUE = auto()
|
8
|
-
ARRAY = auto()
|
9
|
-
|
10
|
-
|
11
|
-
class JsonContext:
|
12
|
-
def __init__(self) -> None:
|
13
|
-
self.context: List[ContextValues] = []
|
14
|
-
|
15
|
-
def set(self, value: ContextValues) -> None:
|
16
|
-
"""
|
17
|
-
Set a new context value.
|
18
|
-
|
19
|
-
Args:
|
20
|
-
value (ContextValues): The context value to be added.
|
21
|
-
|
22
|
-
Returns:
|
23
|
-
None
|
24
|
-
"""
|
25
|
-
# If a value is provided update the context variable and save in stack
|
26
|
-
if value:
|
27
|
-
self.context.append(value)
|
28
|
-
|
29
|
-
def reset(self) -> None:
|
30
|
-
"""
|
31
|
-
Remove the most recent context value.
|
32
|
-
|
33
|
-
Returns:
|
34
|
-
None
|
35
|
-
"""
|
36
|
-
self.context.pop()
|
37
|
-
|
38
|
-
def is_current(self, context: ContextValues) -> bool:
|
39
|
-
"""
|
40
|
-
Check if the given context is the current (most recent) context.
|
41
|
-
|
42
|
-
Args:
|
43
|
-
context (ContextValues): The context value to check.
|
44
|
-
|
45
|
-
Returns:
|
46
|
-
bool: True if the given context is the same as the most recent context in the stack, False otherwise.
|
47
|
-
"""
|
48
|
-
return self.context[-1] == context
|
49
|
-
|
50
|
-
def is_any(self, context: ContextValues) -> bool:
|
51
|
-
"""
|
52
|
-
Check if the given context exists anywhere in the context stack.
|
53
|
-
|
54
|
-
Args:
|
55
|
-
context (ContextValues): The context value to check.
|
56
|
-
|
57
|
-
Returns:
|
58
|
-
bool: True if the given context exists in the stack, False otherwise.
|
59
|
-
"""
|
60
|
-
return context in self.context
|
61
|
-
|
62
|
-
def is_empty(self) -> bool:
|
63
|
-
"""
|
64
|
-
Check if the context stack is empty.
|
65
|
-
|
66
|
-
Returns:
|
67
|
-
bool: True if the context stack is empty, False otherwise.
|
68
|
-
"""
|
69
|
-
return len(self.context) == 0
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|