json-repair 0.29.3__tar.gz → 0.29.4__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {json_repair-0.29.3/src/json_repair.egg-info → json_repair-0.29.4}/PKG-INFO +1 -1
- {json_repair-0.29.3 → json_repair-0.29.4}/pyproject.toml +1 -1
- json_repair-0.29.4/src/json_repair/json_context.py +45 -0
- {json_repair-0.29.3 → json_repair-0.29.4}/src/json_repair/json_parser.py +29 -43
- {json_repair-0.29.3 → json_repair-0.29.4/src/json_repair.egg-info}/PKG-INFO +1 -1
- {json_repair-0.29.3 → json_repair-0.29.4}/tests/test_performance.py +6 -6
- json_repair-0.29.3/src/json_repair/json_context.py +0 -69
- {json_repair-0.29.3 → json_repair-0.29.4}/LICENSE +0 -0
- {json_repair-0.29.3 → json_repair-0.29.4}/README.md +0 -0
- {json_repair-0.29.3 → json_repair-0.29.4}/setup.cfg +0 -0
- {json_repair-0.29.3 → json_repair-0.29.4}/src/json_repair/__init__.py +0 -0
- {json_repair-0.29.3 → json_repair-0.29.4}/src/json_repair/__main__.py +0 -0
- {json_repair-0.29.3 → json_repair-0.29.4}/src/json_repair/json_repair.py +0 -0
- {json_repair-0.29.3 → json_repair-0.29.4}/src/json_repair/py.typed +0 -0
- {json_repair-0.29.3 → json_repair-0.29.4}/src/json_repair/string_file_wrapper.py +0 -0
- {json_repair-0.29.3 → json_repair-0.29.4}/src/json_repair.egg-info/SOURCES.txt +0 -0
- {json_repair-0.29.3 → json_repair-0.29.4}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.29.3 → json_repair-0.29.4}/src/json_repair.egg-info/entry_points.txt +0 -0
- {json_repair-0.29.3 → json_repair-0.29.4}/src/json_repair.egg-info/top_level.txt +0 -0
- {json_repair-0.29.3 → json_repair-0.29.4}/tests/test_coverage.py +0 -0
- {json_repair-0.29.3 → json_repair-0.29.4}/tests/test_json_repair.py +0 -0
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.29.
|
6
|
+
version = "0.29.4"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -0,0 +1,45 @@
|
|
1
|
+
from enum import Enum, auto
|
2
|
+
from typing import List, Optional
|
3
|
+
|
4
|
+
|
5
|
+
class ContextValues(Enum):
|
6
|
+
OBJECT_KEY = auto()
|
7
|
+
OBJECT_VALUE = auto()
|
8
|
+
ARRAY = auto()
|
9
|
+
|
10
|
+
|
11
|
+
class JsonContext:
|
12
|
+
def __init__(self) -> None:
|
13
|
+
self.context: List[ContextValues] = []
|
14
|
+
self.current: Optional[ContextValues] = None
|
15
|
+
self.empty: bool = True
|
16
|
+
|
17
|
+
def set(self, value: ContextValues) -> None:
|
18
|
+
"""
|
19
|
+
Set a new context value.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
value (ContextValues): The context value to be added.
|
23
|
+
|
24
|
+
Returns:
|
25
|
+
None
|
26
|
+
"""
|
27
|
+
# If a value is provided update the context variable and save in stack
|
28
|
+
if value:
|
29
|
+
self.context.append(value)
|
30
|
+
self.current = value
|
31
|
+
self.empty = False
|
32
|
+
|
33
|
+
def reset(self) -> None:
|
34
|
+
"""
|
35
|
+
Remove the most recent context value.
|
36
|
+
|
37
|
+
Returns:
|
38
|
+
None
|
39
|
+
"""
|
40
|
+
try:
|
41
|
+
self.context.pop()
|
42
|
+
self.current = self.context[-1]
|
43
|
+
except IndexError:
|
44
|
+
self.current = None
|
45
|
+
self.empty = True
|
@@ -34,7 +34,8 @@ class JSONParser:
|
|
34
34
|
self.logger: List[Dict[str, str]] = []
|
35
35
|
self.log = self._log
|
36
36
|
else:
|
37
|
-
|
37
|
+
# No-op
|
38
|
+
self.log = lambda *args, **kwargs: None
|
38
39
|
|
39
40
|
def parse(
|
40
41
|
self,
|
@@ -88,12 +89,10 @@ class JSONParser:
|
|
88
89
|
)
|
89
90
|
return ""
|
90
91
|
# <string> starts with a quote
|
91
|
-
elif not self.context.
|
92
|
-
char in ['"', "'", "“"] or char.isalpha()
|
93
|
-
):
|
92
|
+
elif not self.context.empty and (char in ['"', "'", "“"] or char.isalpha()):
|
94
93
|
return self.parse_string()
|
95
94
|
# <number> starts with [0-9] or minus
|
96
|
-
elif not self.context.
|
95
|
+
elif not self.context.empty and (
|
97
96
|
char.isdigit() or char == "-" or char == "."
|
98
97
|
):
|
99
98
|
return self.parse_number()
|
@@ -234,8 +233,9 @@ class JSONParser:
|
|
234
233
|
elif char.isalnum():
|
235
234
|
# This could be a <boolean> and not a string. Because (T)rue or (F)alse or (N)ull are valid
|
236
235
|
# But remember, object keys are only of type string
|
237
|
-
if
|
238
|
-
|
236
|
+
if (
|
237
|
+
char.lower() in ["t", "f", "n"]
|
238
|
+
and self.context.current != ContextValues.OBJECT_KEY
|
239
239
|
):
|
240
240
|
value = self.parse_boolean_or_null()
|
241
241
|
if value != "":
|
@@ -255,15 +255,13 @@ class JSONParser:
|
|
255
255
|
if self.get_char_at() == lstring_delimiter:
|
256
256
|
# If it's an empty key, this was easy
|
257
257
|
if (
|
258
|
-
self.context.
|
258
|
+
self.context.current == ContextValues.OBJECT_KEY
|
259
259
|
and self.get_char_at(1) == ":"
|
260
260
|
):
|
261
261
|
self.index += 1
|
262
262
|
return ""
|
263
263
|
# Find the next delimiter
|
264
|
-
i = self.skip_to_character(
|
265
|
-
character=rstring_delimiter, idx=1, move_main_index=False
|
266
|
-
)
|
264
|
+
i = self.skip_to_character(character=rstring_delimiter, idx=1)
|
267
265
|
next_c = self.get_char_at(i)
|
268
266
|
# Now check that the next character is also a delimiter to ensure that we have "".....""
|
269
267
|
# In that case we ignore this rstring delimiter
|
@@ -296,22 +294,20 @@ class JSONParser:
|
|
296
294
|
while char and char != rstring_delimiter:
|
297
295
|
if (
|
298
296
|
missing_quotes
|
299
|
-
and self.context.
|
297
|
+
and self.context.current == ContextValues.OBJECT_KEY
|
300
298
|
and (char == ":" or char.isspace())
|
301
299
|
):
|
302
300
|
self.log(
|
303
301
|
"While parsing a string missing the left delimiter in object key context, we found a :, stopping here",
|
304
302
|
)
|
305
303
|
break
|
306
|
-
if self.context.
|
304
|
+
if self.context.current == ContextValues.OBJECT_VALUE and char in [
|
307
305
|
",",
|
308
306
|
"}",
|
309
307
|
]:
|
310
308
|
rstring_delimiter_missing = True
|
311
309
|
# check if this is a case in which the closing comma is NOT missing instead
|
312
|
-
i = self.skip_to_character(
|
313
|
-
character=rstring_delimiter, idx=1, move_main_index=False
|
314
|
-
)
|
310
|
+
i = self.skip_to_character(character=rstring_delimiter, idx=1)
|
315
311
|
next_c = self.get_char_at(i)
|
316
312
|
if next_c:
|
317
313
|
i += 1
|
@@ -345,8 +341,9 @@ class JSONParser:
|
|
345
341
|
"While parsing a string, we found a doubled quote, ignoring it"
|
346
342
|
)
|
347
343
|
self.index += 1
|
348
|
-
elif
|
349
|
-
|
344
|
+
elif (
|
345
|
+
missing_quotes
|
346
|
+
and self.context.current == ContextValues.OBJECT_VALUE
|
350
347
|
):
|
351
348
|
# In case of missing starting quote I need to check if the delimeter is the end or the beginning of a key
|
352
349
|
i = 1
|
@@ -387,20 +384,20 @@ class JSONParser:
|
|
387
384
|
# If we are in an object context, let's check for the right delimiters
|
388
385
|
if (
|
389
386
|
(
|
390
|
-
self.context.
|
387
|
+
ContextValues.OBJECT_KEY in self.context.context
|
391
388
|
and next_c in [":", "}"]
|
392
389
|
)
|
393
390
|
or (
|
394
|
-
self.context.
|
391
|
+
ContextValues.OBJECT_VALUE in self.context.context
|
395
392
|
and next_c == "}"
|
396
393
|
)
|
397
394
|
or (
|
398
|
-
self.context.
|
395
|
+
ContextValues.ARRAY in self.context.context
|
399
396
|
and next_c in ["]", ","]
|
400
397
|
)
|
401
398
|
or (
|
402
399
|
check_comma_in_object_value
|
403
|
-
and self.context.
|
400
|
+
and self.context.current == ContextValues.OBJECT_VALUE
|
404
401
|
and next_c == ","
|
405
402
|
)
|
406
403
|
):
|
@@ -408,13 +405,12 @@ class JSONParser:
|
|
408
405
|
i += 1
|
409
406
|
next_c = self.get_char_at(i)
|
410
407
|
# If we stopped for a comma in object_value context, let's check if find a "} at the end of the string
|
411
|
-
if
|
412
|
-
|
408
|
+
if (
|
409
|
+
next_c == ","
|
410
|
+
and self.context.current == ContextValues.OBJECT_VALUE
|
413
411
|
):
|
414
412
|
i += 1
|
415
|
-
i = self.skip_to_character(
|
416
|
-
character=rstring_delimiter, idx=i, move_main_index=False
|
417
|
-
)
|
413
|
+
i = self.skip_to_character(character=rstring_delimiter, idx=i)
|
418
414
|
next_c = self.get_char_at(i)
|
419
415
|
# Ok now I found a delimiter, let's skip whitespaces and see if next we find a }
|
420
416
|
i += 1
|
@@ -429,15 +425,13 @@ class JSONParser:
|
|
429
425
|
self.index += 1
|
430
426
|
char = self.get_char_at()
|
431
427
|
elif next_c == rstring_delimiter:
|
432
|
-
if self.context.
|
428
|
+
if self.context.current == ContextValues.OBJECT_VALUE:
|
433
429
|
# But this might not be it! This could be just a missing comma
|
434
430
|
# We found a delimiter and we need to check if this is a key
|
435
431
|
# so find a rstring_delimiter and a colon after
|
436
432
|
i += 1
|
437
433
|
i = self.skip_to_character(
|
438
|
-
character=rstring_delimiter,
|
439
|
-
idx=i,
|
440
|
-
move_main_index=False,
|
434
|
+
character=rstring_delimiter, idx=i
|
441
435
|
)
|
442
436
|
i += 1
|
443
437
|
next_c = self.get_char_at(i)
|
@@ -462,7 +456,7 @@ class JSONParser:
|
|
462
456
|
if (
|
463
457
|
char
|
464
458
|
and missing_quotes
|
465
|
-
and self.context.
|
459
|
+
and self.context.current == ContextValues.OBJECT_KEY
|
466
460
|
and char.isspace()
|
467
461
|
):
|
468
462
|
self.log(
|
@@ -488,7 +482,7 @@ class JSONParser:
|
|
488
482
|
number_str = ""
|
489
483
|
number_chars = set("0123456789-.eE/,")
|
490
484
|
char = self.get_char_at()
|
491
|
-
is_array = self.context.
|
485
|
+
is_array = self.context.current == ContextValues.ARRAY
|
492
486
|
while char and char in number_chars and (char != "," or not is_array):
|
493
487
|
number_str += char
|
494
488
|
self.index += 1
|
@@ -561,9 +555,7 @@ class JSONParser:
|
|
561
555
|
return idx
|
562
556
|
return idx
|
563
557
|
|
564
|
-
def skip_to_character(
|
565
|
-
self, character: str, idx: int = 0, move_main_index=True
|
566
|
-
) -> int:
|
558
|
+
def skip_to_character(self, character: str, idx: int = 0) -> int:
|
567
559
|
"""
|
568
560
|
This function quickly iterates to find a character, syntactic sugar to make the code more concise
|
569
561
|
"""
|
@@ -572,10 +564,7 @@ class JSONParser:
|
|
572
564
|
except IndexError:
|
573
565
|
return idx
|
574
566
|
while char != character:
|
575
|
-
|
576
|
-
self.index += 1
|
577
|
-
else:
|
578
|
-
idx += 1
|
567
|
+
idx += 1
|
579
568
|
try:
|
580
569
|
char = self.json_str[self.index + idx]
|
581
570
|
except IndexError:
|
@@ -593,6 +582,3 @@ class JSONParser:
|
|
593
582
|
"context": context,
|
594
583
|
}
|
595
584
|
)
|
596
|
-
|
597
|
-
def noop(*args: Any, **kwargs: Any) -> None:
|
598
|
-
pass
|
@@ -19,7 +19,7 @@ def test_true_true_correct(benchmark):
|
|
19
19
|
mean_time = benchmark.stats.get("median")
|
20
20
|
|
21
21
|
# Define your time threshold in seconds
|
22
|
-
max_time =
|
22
|
+
max_time = 1.8 / 10 ** 3 # 1.8 millisecond
|
23
23
|
|
24
24
|
# Assert that the average time is below the threshold
|
25
25
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -31,7 +31,7 @@ def test_true_true_incorrect(benchmark):
|
|
31
31
|
mean_time = benchmark.stats.get("median")
|
32
32
|
|
33
33
|
# Define your time threshold in seconds
|
34
|
-
max_time =
|
34
|
+
max_time = 1.8 / 10 ** 3 # 1.8 millisecond
|
35
35
|
|
36
36
|
# Assert that the average time is below the threshold
|
37
37
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -53,7 +53,7 @@ def test_true_false_incorrect(benchmark):
|
|
53
53
|
mean_time = benchmark.stats.get("median")
|
54
54
|
|
55
55
|
# Define your time threshold in seconds
|
56
|
-
max_time =
|
56
|
+
max_time = 1.8 / 10 ** 3 # 1.8 millisecond
|
57
57
|
|
58
58
|
# Assert that the average time is below the threshold
|
59
59
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -64,7 +64,7 @@ def test_false_true_correct(benchmark):
|
|
64
64
|
mean_time = benchmark.stats.get("median")
|
65
65
|
|
66
66
|
# Define your time threshold in seconds
|
67
|
-
max_time =
|
67
|
+
max_time = 1.8 / 10 ** 3 # 1.8 millisecond
|
68
68
|
|
69
69
|
# Assert that the average time is below the threshold
|
70
70
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -75,7 +75,7 @@ def test_false_true_incorrect(benchmark):
|
|
75
75
|
mean_time = benchmark.stats.get("median")
|
76
76
|
|
77
77
|
# Define your time threshold in seconds
|
78
|
-
max_time =
|
78
|
+
max_time = 1.8 / 10 ** 3 # 1.8 millisecond
|
79
79
|
|
80
80
|
# Assert that the average time is below the threshold
|
81
81
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -97,7 +97,7 @@ def test_false_false_incorrect(benchmark):
|
|
97
97
|
mean_time = benchmark.stats.get("median")
|
98
98
|
|
99
99
|
# Define your time threshold in seconds
|
100
|
-
max_time =
|
100
|
+
max_time = 1.8 / 10 ** 3 # 1.8 millisecond
|
101
101
|
|
102
102
|
# Assert that the average time is below the threshold
|
103
103
|
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
@@ -1,69 +0,0 @@
|
|
1
|
-
from enum import Enum, auto
|
2
|
-
from typing import List
|
3
|
-
|
4
|
-
|
5
|
-
class ContextValues(Enum):
|
6
|
-
OBJECT_KEY = auto()
|
7
|
-
OBJECT_VALUE = auto()
|
8
|
-
ARRAY = auto()
|
9
|
-
|
10
|
-
|
11
|
-
class JsonContext:
|
12
|
-
def __init__(self) -> None:
|
13
|
-
self.context: List[ContextValues] = []
|
14
|
-
|
15
|
-
def set(self, value: ContextValues) -> None:
|
16
|
-
"""
|
17
|
-
Set a new context value.
|
18
|
-
|
19
|
-
Args:
|
20
|
-
value (ContextValues): The context value to be added.
|
21
|
-
|
22
|
-
Returns:
|
23
|
-
None
|
24
|
-
"""
|
25
|
-
# If a value is provided update the context variable and save in stack
|
26
|
-
if value:
|
27
|
-
self.context.append(value)
|
28
|
-
|
29
|
-
def reset(self) -> None:
|
30
|
-
"""
|
31
|
-
Remove the most recent context value.
|
32
|
-
|
33
|
-
Returns:
|
34
|
-
None
|
35
|
-
"""
|
36
|
-
self.context.pop()
|
37
|
-
|
38
|
-
def is_current(self, context: ContextValues) -> bool:
|
39
|
-
"""
|
40
|
-
Check if the given context is the current (most recent) context.
|
41
|
-
|
42
|
-
Args:
|
43
|
-
context (ContextValues): The context value to check.
|
44
|
-
|
45
|
-
Returns:
|
46
|
-
bool: True if the given context is the same as the most recent context in the stack, False otherwise.
|
47
|
-
"""
|
48
|
-
return self.context[-1] == context
|
49
|
-
|
50
|
-
def is_any(self, context: ContextValues) -> bool:
|
51
|
-
"""
|
52
|
-
Check if the given context exists anywhere in the context stack.
|
53
|
-
|
54
|
-
Args:
|
55
|
-
context (ContextValues): The context value to check.
|
56
|
-
|
57
|
-
Returns:
|
58
|
-
bool: True if the given context exists in the stack, False otherwise.
|
59
|
-
"""
|
60
|
-
return context in self.context
|
61
|
-
|
62
|
-
def is_empty(self) -> bool:
|
63
|
-
"""
|
64
|
-
Check if the context stack is empty.
|
65
|
-
|
66
|
-
Returns:
|
67
|
-
bool: True if the context stack is empty, False otherwise.
|
68
|
-
"""
|
69
|
-
return len(self.context) == 0
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|