json-repair 0.27.2__tar.gz → 0.28.1__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {json_repair-0.27.2/src/json_repair.egg-info → json_repair-0.28.1}/PKG-INFO +2 -1
- {json_repair-0.27.2 → json_repair-0.28.1}/pyproject.toml +6 -2
- {json_repair-0.27.2 → json_repair-0.28.1}/src/json_repair/json_repair.py +107 -24
- json_repair-0.28.1/src/json_repair/py.typed +0 -0
- {json_repair-0.27.2 → json_repair-0.28.1/src/json_repair.egg-info}/PKG-INFO +2 -1
- {json_repair-0.27.2 → json_repair-0.28.1}/src/json_repair.egg-info/SOURCES.txt +2 -0
- json_repair-0.28.1/tests/test_coverage.py +18 -0
- {json_repair-0.27.2 → json_repair-0.28.1}/tests/test_json_repair.py +8 -10
- {json_repair-0.27.2 → json_repair-0.28.1}/LICENSE +0 -0
- {json_repair-0.27.2 → json_repair-0.28.1}/README.md +0 -0
- {json_repair-0.27.2 → json_repair-0.28.1}/setup.cfg +0 -0
- {json_repair-0.27.2 → json_repair-0.28.1}/src/json_repair/__init__.py +0 -0
- {json_repair-0.27.2 → json_repair-0.28.1}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.27.2 → json_repair-0.28.1}/src/json_repair.egg-info/top_level.txt +0 -0
- {json_repair-0.27.2 → json_repair-0.28.1}/tests/test_performance.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.28.1
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -27,6 +27,7 @@ License: MIT License
|
|
27
27
|
|
28
28
|
Project-URL: Homepage, https://github.com/mangiucugna/json_repair/
|
29
29
|
Project-URL: Bug Tracker, https://github.com/mangiucugna/json_repair/issues
|
30
|
+
Project-URL: Live demo, https://mangiucugna.github.io/json_repair/
|
30
31
|
Keywords: JSON,REPAIR,LLM,PARSER
|
31
32
|
Classifier: Programming Language :: Python :: 3
|
32
33
|
Classifier: License :: OSI Approved :: MIT License
|
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.
|
6
|
+
version = "0.28.1"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -17,11 +17,15 @@ classifiers = [
|
|
17
17
|
"License :: OSI Approved :: MIT License",
|
18
18
|
"Operating System :: OS Independent",
|
19
19
|
]
|
20
|
-
|
21
20
|
[project.urls]
|
22
21
|
"Homepage" = "https://github.com/mangiucugna/json_repair/"
|
23
22
|
"Bug Tracker" = "https://github.com/mangiucugna/json_repair/issues"
|
23
|
+
"Live demo" = "https://mangiucugna.github.io/json_repair/"
|
24
24
|
[tool.pytest.ini_options]
|
25
25
|
pythonpath = [
|
26
26
|
"."
|
27
27
|
]
|
28
|
+
[tool.setuptools.package-data]
|
29
|
+
"pkgname" = ["py.typed"]
|
30
|
+
[tool.setuptools.packages.find]
|
31
|
+
where = ["src"]
|
@@ -24,7 +24,7 @@ All supported use cases are in the unit tests
|
|
24
24
|
|
25
25
|
import os
|
26
26
|
import json
|
27
|
-
from typing import Any, Dict, List, Optional, Union, TextIO, Tuple
|
27
|
+
from typing import Any, Dict, List, Optional, Union, TextIO, Tuple, overload, Literal
|
28
28
|
|
29
29
|
|
30
30
|
class StringFileWrapper:
|
@@ -51,9 +51,6 @@ class StringFileWrapper:
|
|
51
51
|
self.fd.seek(current_position)
|
52
52
|
return self.length
|
53
53
|
|
54
|
-
def __setitem__(self) -> None:
|
55
|
-
raise Exception("This is read-only!")
|
56
|
-
|
57
54
|
|
58
55
|
class LoggerConfig:
|
59
56
|
# This is a type class to simplify the declaration
|
@@ -180,7 +177,7 @@ class JSONParser:
|
|
180
177
|
# <member> starts with a <string>
|
181
178
|
key = ""
|
182
179
|
while self.get_char_at():
|
183
|
-
key = self.parse_string()
|
180
|
+
key = str(self.parse_string())
|
184
181
|
|
185
182
|
if key != "" or (key == "" and self.get_char_at() == ":"):
|
186
183
|
# If the string is empty but there is a object divider, we are done here
|
@@ -258,7 +255,7 @@ class JSONParser:
|
|
258
255
|
self.reset_context()
|
259
256
|
return arr
|
260
257
|
|
261
|
-
def parse_string(self) -> Union[str,
|
258
|
+
def parse_string(self) -> Union[str, bool, None]:
|
262
259
|
# <string> is a string of valid characters enclosed in quotes
|
263
260
|
# i.e. { name: "John" }
|
264
261
|
# Somehow all weird cases in an invalid JSON happen to be resolved in this function, so be careful here
|
@@ -310,8 +307,7 @@ class JSONParser:
|
|
310
307
|
if self.get_context() == "object_key" and self.get_char_at(1) == ":":
|
311
308
|
self.index += 1
|
312
309
|
return ""
|
313
|
-
|
314
|
-
# This is a valid exception only if it's closed by a double delimiter again
|
310
|
+
# Find the next delimiter
|
315
311
|
i = 1
|
316
312
|
next_c = self.get_char_at(i)
|
317
313
|
while next_c and next_c != rstring_delimiter:
|
@@ -386,7 +382,7 @@ class JSONParser:
|
|
386
382
|
string_acc += char
|
387
383
|
self.index += 1
|
388
384
|
char = self.get_char_at()
|
389
|
-
if len(string_acc) > 0 and string_acc[-1] == "\\":
|
385
|
+
if char and len(string_acc) > 0 and string_acc[-1] == "\\":
|
390
386
|
# This is a special case, if people use real strings this might happen
|
391
387
|
self.log("Found a stray escape sequence, normalizing it", "info")
|
392
388
|
string_acc = string_acc[:-1]
|
@@ -442,7 +438,7 @@ class JSONParser:
|
|
442
438
|
]:
|
443
439
|
# This is a bit of a weird workaround, essentially in object_value context we don't always break on commas
|
444
440
|
# This is because the routine after will make sure to correct any bad guess and this solves a corner case
|
445
|
-
if next_c.isalpha():
|
441
|
+
if check_comma_in_object_value and next_c.isalpha():
|
446
442
|
check_comma_in_object_value = False
|
447
443
|
# If we are in an object context, let's check for the right delimiters
|
448
444
|
if (
|
@@ -477,7 +473,7 @@ class JSONParser:
|
|
477
473
|
"While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here since this is the last element of the object, ignoring it",
|
478
474
|
"info",
|
479
475
|
)
|
480
|
-
string_acc += char
|
476
|
+
string_acc += str(char)
|
481
477
|
self.index += 1
|
482
478
|
char = self.get_char_at()
|
483
479
|
elif next_c == rstring_delimiter:
|
@@ -507,7 +503,7 @@ class JSONParser:
|
|
507
503
|
"While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
|
508
504
|
"info",
|
509
505
|
)
|
510
|
-
string_acc += char
|
506
|
+
string_acc += str(char)
|
511
507
|
self.index += 1
|
512
508
|
char = self.get_char_at()
|
513
509
|
|
@@ -525,7 +521,8 @@ class JSONParser:
|
|
525
521
|
if self.get_char_at() not in [":", ","]:
|
526
522
|
return ""
|
527
523
|
|
528
|
-
# A fallout of the previous special case in the while loop,
|
524
|
+
# A fallout of the previous special case in the while loop,
|
525
|
+
# we need to update the index only if we had a closing quote
|
529
526
|
if char != rstring_delimiter:
|
530
527
|
self.log(
|
531
528
|
"While parsing a string, we missed the closing quote, ignoring",
|
@@ -567,6 +564,7 @@ class JSONParser:
|
|
567
564
|
# <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
|
568
565
|
starting_index = self.index
|
569
566
|
char = (self.get_char_at() or "").lower()
|
567
|
+
value = None
|
570
568
|
if char == "t":
|
571
569
|
value = ("true", True)
|
572
570
|
elif char == "f":
|
@@ -587,7 +585,7 @@ class JSONParser:
|
|
587
585
|
self.index = starting_index
|
588
586
|
return ""
|
589
587
|
|
590
|
-
def get_char_at(self, count: int = 0) -> Union[str,
|
588
|
+
def get_char_at(self, count: int = 0) -> Union[str, Literal[False]]:
|
591
589
|
# Why not use something simpler? Because try/except in python is a faster alternative to an "if" statement that is often True
|
592
590
|
try:
|
593
591
|
return self.json_str[self.index + count]
|
@@ -615,16 +613,10 @@ class JSONParser:
|
|
615
613
|
self.context.append(value)
|
616
614
|
|
617
615
|
def reset_context(self) -> None:
|
618
|
-
|
619
|
-
self.context.pop()
|
620
|
-
except Exception:
|
621
|
-
return
|
616
|
+
self.context.pop()
|
622
617
|
|
623
618
|
def get_context(self) -> str:
|
624
|
-
|
625
|
-
return self.context[-1]
|
626
|
-
except Exception:
|
627
|
-
return ""
|
619
|
+
return self.context[-1]
|
628
620
|
|
629
621
|
def log(self, text: str, level: str) -> None:
|
630
622
|
if level == self.logger.log_level:
|
@@ -640,6 +632,50 @@ class JSONParser:
|
|
640
632
|
)
|
641
633
|
|
642
634
|
|
635
|
+
@overload
|
636
|
+
def repair_json(
|
637
|
+
json_str: str = "",
|
638
|
+
return_objects: Optional[Literal[False]] = False,
|
639
|
+
skip_json_loads: Optional[bool] = False,
|
640
|
+
logging: Optional[Literal[False]] = False, # None is treated as False
|
641
|
+
json_fd: Optional[TextIO] = None,
|
642
|
+
ensure_ascii: Optional[bool] = True,
|
643
|
+
) -> str: ...
|
644
|
+
|
645
|
+
|
646
|
+
@overload
|
647
|
+
def repair_json(
|
648
|
+
json_str: str = "",
|
649
|
+
return_objects: Literal[True] = True,
|
650
|
+
skip_json_loads: Optional[bool] = False,
|
651
|
+
logging: Optional[Literal[False]] = False, # None is treated as False
|
652
|
+
json_fd: Optional[TextIO] = None,
|
653
|
+
ensure_ascii: Optional[bool] = True,
|
654
|
+
) -> JSONReturnType: ...
|
655
|
+
|
656
|
+
|
657
|
+
@overload
|
658
|
+
def repair_json(
|
659
|
+
json_str: str = "",
|
660
|
+
return_objects: Optional[Literal[False]] = False, # None is treated as False
|
661
|
+
skip_json_loads: Optional[bool] = False,
|
662
|
+
logging: Literal[True] = True,
|
663
|
+
json_fd: Optional[TextIO] = None,
|
664
|
+
ensure_ascii: Optional[bool] = True,
|
665
|
+
) -> Tuple[str, List[Dict[str, str]]]: ...
|
666
|
+
|
667
|
+
|
668
|
+
@overload
|
669
|
+
def repair_json(
|
670
|
+
json_str: str = "",
|
671
|
+
return_objects: Literal[True] = True,
|
672
|
+
skip_json_loads: Optional[bool] = False,
|
673
|
+
logging: Literal[True] = True,
|
674
|
+
json_fd: Optional[TextIO] = None,
|
675
|
+
ensure_ascii: Optional[bool] = True,
|
676
|
+
) -> Tuple[JSONReturnType, List[Dict[str, str]]]: ...
|
677
|
+
|
678
|
+
|
643
679
|
def repair_json(
|
644
680
|
json_str: str = "",
|
645
681
|
return_objects: Optional[bool] = False,
|
@@ -653,7 +689,7 @@ def repair_json(
|
|
653
689
|
It will return the fixed string by default.
|
654
690
|
When `return_objects=True` is passed, it will return the decoded data structure instead.
|
655
691
|
When `skip_json_loads=True` is passed, it will not call the built-in json.loads() function
|
656
|
-
When `logging=True` is passed, it will return
|
692
|
+
When `logging=True` is passed, it will return a tuple with the repaired json and a log of all repair actions
|
657
693
|
"""
|
658
694
|
parser = JSONParser(json_str, json_fd, logging)
|
659
695
|
if skip_json_loads:
|
@@ -666,12 +702,29 @@ def repair_json(
|
|
666
702
|
parsed_json = json.loads(json_str)
|
667
703
|
except json.JSONDecodeError:
|
668
704
|
parsed_json = parser.parse()
|
669
|
-
# It's useful to return the actual object instead of the json string,
|
705
|
+
# It's useful to return the actual object instead of the json string,
|
706
|
+
# it allows this lib to be a replacement of the json library
|
670
707
|
if return_objects or logging:
|
671
708
|
return parsed_json
|
672
709
|
return json.dumps(parsed_json, ensure_ascii=ensure_ascii)
|
673
710
|
|
674
711
|
|
712
|
+
@overload
|
713
|
+
def loads(
|
714
|
+
json_str: str,
|
715
|
+
skip_json_loads: Optional[bool] = False,
|
716
|
+
logging: Optional[Literal[False]] = False, # None is treated as False
|
717
|
+
) -> JSONReturnType: ...
|
718
|
+
|
719
|
+
|
720
|
+
@overload
|
721
|
+
def loads(
|
722
|
+
json_str: str,
|
723
|
+
skip_json_loads: Optional[bool] = False,
|
724
|
+
logging: Literal[True] = True,
|
725
|
+
) -> Tuple[JSONReturnType, List[Dict[str, str]]]: ...
|
726
|
+
|
727
|
+
|
675
728
|
def loads(
|
676
729
|
json_str: str,
|
677
730
|
skip_json_loads: Optional[bool] = False,
|
@@ -689,6 +742,20 @@ def loads(
|
|
689
742
|
)
|
690
743
|
|
691
744
|
|
745
|
+
@overload
|
746
|
+
def load(
|
747
|
+
fd: TextIO,
|
748
|
+
skip_json_loads: Optional[bool] = False,
|
749
|
+
logging: Optional[Literal[False]] = False,
|
750
|
+
) -> JSONReturnType: ...
|
751
|
+
|
752
|
+
|
753
|
+
@overload
|
754
|
+
def load(
|
755
|
+
fd: TextIO, skip_json_loads: Optional[bool] = False, logging: Literal[True] = True
|
756
|
+
) -> Tuple[JSONReturnType, List[Dict[str, str]]]: ...
|
757
|
+
|
758
|
+
|
692
759
|
def load(
|
693
760
|
fd: TextIO, skip_json_loads: Optional[bool] = False, logging: Optional[bool] = False
|
694
761
|
) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
|
@@ -704,6 +771,22 @@ def load(
|
|
704
771
|
)
|
705
772
|
|
706
773
|
|
774
|
+
@overload
|
775
|
+
def from_file(
|
776
|
+
filename: str,
|
777
|
+
skip_json_loads: Optional[bool] = False,
|
778
|
+
logging: Optional[Literal[False]] = False,
|
779
|
+
) -> JSONReturnType: ...
|
780
|
+
|
781
|
+
|
782
|
+
@overload
|
783
|
+
def from_file(
|
784
|
+
filename: str,
|
785
|
+
skip_json_loads: Optional[bool] = False,
|
786
|
+
logging: Literal[True] = True,
|
787
|
+
) -> Tuple[JSONReturnType, List[Dict[str, str]]]: ...
|
788
|
+
|
789
|
+
|
707
790
|
def from_file(
|
708
791
|
filename: str,
|
709
792
|
skip_json_loads: Optional[bool] = False,
|
File without changes
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.28.1
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -27,6 +27,7 @@ License: MIT License
|
|
27
27
|
|
28
28
|
Project-URL: Homepage, https://github.com/mangiucugna/json_repair/
|
29
29
|
Project-URL: Bug Tracker, https://github.com/mangiucugna/json_repair/issues
|
30
|
+
Project-URL: Live demo, https://mangiucugna.github.io/json_repair/
|
30
31
|
Keywords: JSON,REPAIR,LLM,PARSER
|
31
32
|
Classifier: Programming Language :: Python :: 3
|
32
33
|
Classifier: License :: OSI Approved :: MIT License
|
@@ -3,9 +3,11 @@ README.md
|
|
3
3
|
pyproject.toml
|
4
4
|
src/json_repair/__init__.py
|
5
5
|
src/json_repair/json_repair.py
|
6
|
+
src/json_repair/py.typed
|
6
7
|
src/json_repair.egg-info/PKG-INFO
|
7
8
|
src/json_repair.egg-info/SOURCES.txt
|
8
9
|
src/json_repair.egg-info/dependency_links.txt
|
9
10
|
src/json_repair.egg-info/top_level.txt
|
11
|
+
tests/test_coverage.py
|
10
12
|
tests/test_json_repair.py
|
11
13
|
tests/test_performance.py
|
@@ -0,0 +1,18 @@
|
|
1
|
+
import coverage
|
2
|
+
import sys
|
3
|
+
|
4
|
+
COVERAGE_THRESHOLD = 100
|
5
|
+
|
6
|
+
cov = coverage.Coverage()
|
7
|
+
cov.start()
|
8
|
+
|
9
|
+
import pytest
|
10
|
+
retcode = pytest.main(["./tests/test_json_repair.py", "--cov-config=.coveragerc"])
|
11
|
+
|
12
|
+
cov.stop()
|
13
|
+
cov.save()
|
14
|
+
coverage_percent = cov.report(show_missing=True)
|
15
|
+
|
16
|
+
if coverage_percent < COVERAGE_THRESHOLD:
|
17
|
+
print(f"ERROR: Coverage {coverage_percent:.2f}% is below the threshold of {COVERAGE_THRESHOLD}%")
|
18
|
+
sys.exit(1) # This will prevent the commit/push
|
@@ -94,10 +94,12 @@ def test_missing_and_mixed_quotes():
|
|
94
94
|
repair_json('{"name": "John", "age": 30, "city": "New')
|
95
95
|
== '{"name": "John", "age": 30, "city": "New"}'
|
96
96
|
)
|
97
|
-
assert repair_json('[{"key": "value", COMMENT "notes": "lorem "ipsum", sic."}]') == '[{"key": "value", "notes": "lorem \\"ipsum\\", sic."}]'
|
97
|
+
assert repair_json('[{"key": "value", COMMENT "notes": "lorem "ipsum", sic." }]') == '[{"key": "value", "notes": "lorem \\"ipsum\\", sic."}]'
|
98
98
|
assert repair_json('{"key": ""value"}') == '{"key": "value"}'
|
99
99
|
assert repair_json('{"key": "value", 5: "value"}') == '{"key": "value", "5": "value"}'
|
100
100
|
assert repair_json('{"foo": "\\"bar\\""') == '{"foo": "\\"bar\\""}'
|
101
|
+
assert repair_json('{"" key":"val"') == '{" key": "val"}'
|
102
|
+
assert repair_json('{"key": value "key2" : "value2" ') == '{"key": "value", "key2": "value2"}'
|
101
103
|
|
102
104
|
def test_array_edge_cases():
|
103
105
|
assert repair_json("[1, 2, 3,") == "[1, 2, 3]"
|
@@ -106,16 +108,9 @@ def test_array_edge_cases():
|
|
106
108
|
assert repair_json("[1, 2, '...', 3]") == '[1, 2, "...", 3]'
|
107
109
|
assert repair_json("[true, false, null, ...]") == '[true, false, null]'
|
108
110
|
assert repair_json('["a" "b" "c" 1') == '["a", "b", "c", 1]'
|
109
|
-
assert (
|
110
|
-
|
111
|
-
== '{"employees": ["John", "Anna"]}'
|
112
|
-
)
|
113
|
-
assert (
|
114
|
-
repair_json('{"employees":["John", "Anna", "Peter')
|
115
|
-
== '{"employees": ["John", "Anna", "Peter"]}'
|
116
|
-
)
|
111
|
+
assert repair_json('{"employees":["John", "Anna",') == '{"employees": ["John", "Anna"]}'
|
112
|
+
assert repair_json('{"employees":["John", "Anna", "Peter') == '{"employees": ["John", "Anna", "Peter"]}'
|
117
113
|
assert repair_json('{"key1": {"key2": [1, 2, 3') == '{"key1": {"key2": [1, 2, 3]}}'
|
118
|
-
|
119
114
|
|
120
115
|
def test_escaping():
|
121
116
|
assert repair_json("'\"'") == '""'
|
@@ -127,6 +122,7 @@ def test_escaping():
|
|
127
122
|
|
128
123
|
def test_object_edge_cases():
|
129
124
|
assert repair_json('{ ') == '{}'
|
125
|
+
assert repair_json('{"": "value"') == '{"": "value"}'
|
130
126
|
assert repair_json('{"value_1": true, COMMENT "value_2": "data"}') == '{"value_1": true, "value_2": "data"}'
|
131
127
|
assert repair_json('{"value_1": true, SHOULD_NOT_EXIST "value_2": "data" AAAA }') == '{"value_1": true, "value_2": "data"}'
|
132
128
|
assert repair_json('{"" : true, "key2": "value2"}') == '{"": true, "key2": "value2"}'
|
@@ -138,6 +134,8 @@ def test_object_edge_cases():
|
|
138
134
|
assert repair_json('{"key": "Lorem "ipsum" s,"}') == '{"key": "Lorem \\"ipsum\\" s,"}'
|
139
135
|
assert repair_json('{"lorem": ipsum, sic, datum.",}') == '{"lorem": "ipsum, sic, datum."}'
|
140
136
|
assert repair_json('{"lorem": sic tamet. "ipsum": sic tamet, quick brown fox. "sic": ipsum}') == '{"lorem": "sic tamet.", "ipsum": "sic tamet", "sic": "ipsum"}'
|
137
|
+
assert repair_json('{"key":value, " key2":"value2" }') == '{"key": "value", " key2": "value2"}'
|
138
|
+
assert repair_json('{"key":value "key2":"value2" }') == '{"key": "value", "key2": "value2"}'
|
141
139
|
|
142
140
|
def test_number_edge_cases():
|
143
141
|
assert repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|