json-repair 0.27.2__tar.gz → 0.28.1__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.27.2
3
+ Version: 0.28.1
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -27,6 +27,7 @@ License: MIT License
27
27
 
28
28
  Project-URL: Homepage, https://github.com/mangiucugna/json_repair/
29
29
  Project-URL: Bug Tracker, https://github.com/mangiucugna/json_repair/issues
30
+ Project-URL: Live demo, https://mangiucugna.github.io/json_repair/
30
31
  Keywords: JSON,REPAIR,LLM,PARSER
31
32
  Classifier: Programming Language :: Python :: 3
32
33
  Classifier: License :: OSI Approved :: MIT License
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
3
3
  build-backend = "setuptools.build_meta"
4
4
  [project]
5
5
  name = "json_repair"
6
- version = "0.27.2"
6
+ version = "0.28.1"
7
7
  license = {file = "LICENSE"}
8
8
  authors = [
9
9
  { name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
@@ -17,11 +17,15 @@ classifiers = [
17
17
  "License :: OSI Approved :: MIT License",
18
18
  "Operating System :: OS Independent",
19
19
  ]
20
-
21
20
  [project.urls]
22
21
  "Homepage" = "https://github.com/mangiucugna/json_repair/"
23
22
  "Bug Tracker" = "https://github.com/mangiucugna/json_repair/issues"
23
+ "Live demo" = "https://mangiucugna.github.io/json_repair/"
24
24
  [tool.pytest.ini_options]
25
25
  pythonpath = [
26
26
  "."
27
27
  ]
28
+ [tool.setuptools.package-data]
29
+ "pkgname" = ["py.typed"]
30
+ [tool.setuptools.packages.find]
31
+ where = ["src"]
@@ -24,7 +24,7 @@ All supported use cases are in the unit tests
24
24
 
25
25
  import os
26
26
  import json
27
- from typing import Any, Dict, List, Optional, Union, TextIO, Tuple
27
+ from typing import Any, Dict, List, Optional, Union, TextIO, Tuple, overload, Literal
28
28
 
29
29
 
30
30
  class StringFileWrapper:
@@ -51,9 +51,6 @@ class StringFileWrapper:
51
51
  self.fd.seek(current_position)
52
52
  return self.length
53
53
 
54
- def __setitem__(self) -> None:
55
- raise Exception("This is read-only!")
56
-
57
54
 
58
55
  class LoggerConfig:
59
56
  # This is a type class to simplify the declaration
@@ -180,7 +177,7 @@ class JSONParser:
180
177
  # <member> starts with a <string>
181
178
  key = ""
182
179
  while self.get_char_at():
183
- key = self.parse_string()
180
+ key = str(self.parse_string())
184
181
 
185
182
  if key != "" or (key == "" and self.get_char_at() == ":"):
186
183
  # If the string is empty but there is a object divider, we are done here
@@ -258,7 +255,7 @@ class JSONParser:
258
255
  self.reset_context()
259
256
  return arr
260
257
 
261
- def parse_string(self) -> Union[str, JSONReturnType]:
258
+ def parse_string(self) -> Union[str, bool, None]:
262
259
  # <string> is a string of valid characters enclosed in quotes
263
260
  # i.e. { name: "John" }
264
261
  # Somehow all weird cases in an invalid JSON happen to be resolved in this function, so be careful here
@@ -310,8 +307,7 @@ class JSONParser:
310
307
  if self.get_context() == "object_key" and self.get_char_at(1) == ":":
311
308
  self.index += 1
312
309
  return ""
313
-
314
- # This is a valid exception only if it's closed by a double delimiter again
310
+ # Find the next delimiter
315
311
  i = 1
316
312
  next_c = self.get_char_at(i)
317
313
  while next_c and next_c != rstring_delimiter:
@@ -386,7 +382,7 @@ class JSONParser:
386
382
  string_acc += char
387
383
  self.index += 1
388
384
  char = self.get_char_at()
389
- if len(string_acc) > 0 and string_acc[-1] == "\\":
385
+ if char and len(string_acc) > 0 and string_acc[-1] == "\\":
390
386
  # This is a special case, if people use real strings this might happen
391
387
  self.log("Found a stray escape sequence, normalizing it", "info")
392
388
  string_acc = string_acc[:-1]
@@ -442,7 +438,7 @@ class JSONParser:
442
438
  ]:
443
439
  # This is a bit of a weird workaround, essentially in object_value context we don't always break on commas
444
440
  # This is because the routine after will make sure to correct any bad guess and this solves a corner case
445
- if next_c.isalpha():
441
+ if check_comma_in_object_value and next_c.isalpha():
446
442
  check_comma_in_object_value = False
447
443
  # If we are in an object context, let's check for the right delimiters
448
444
  if (
@@ -477,7 +473,7 @@ class JSONParser:
477
473
  "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here since this is the last element of the object, ignoring it",
478
474
  "info",
479
475
  )
480
- string_acc += char
476
+ string_acc += str(char)
481
477
  self.index += 1
482
478
  char = self.get_char_at()
483
479
  elif next_c == rstring_delimiter:
@@ -507,7 +503,7 @@ class JSONParser:
507
503
  "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
508
504
  "info",
509
505
  )
510
- string_acc += char
506
+ string_acc += str(char)
511
507
  self.index += 1
512
508
  char = self.get_char_at()
513
509
 
@@ -525,7 +521,8 @@ class JSONParser:
525
521
  if self.get_char_at() not in [":", ","]:
526
522
  return ""
527
523
 
528
- # A fallout of the previous special case in the while loop, we need to update the index only if we had a closing quote
524
+ # A fallout of the previous special case in the while loop,
525
+ # we need to update the index only if we had a closing quote
529
526
  if char != rstring_delimiter:
530
527
  self.log(
531
528
  "While parsing a string, we missed the closing quote, ignoring",
@@ -567,6 +564,7 @@ class JSONParser:
567
564
  # <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
568
565
  starting_index = self.index
569
566
  char = (self.get_char_at() or "").lower()
567
+ value = None
570
568
  if char == "t":
571
569
  value = ("true", True)
572
570
  elif char == "f":
@@ -587,7 +585,7 @@ class JSONParser:
587
585
  self.index = starting_index
588
586
  return ""
589
587
 
590
- def get_char_at(self, count: int = 0) -> Union[str, bool]:
588
+ def get_char_at(self, count: int = 0) -> Union[str, Literal[False]]:
591
589
  # Why not use something simpler? Because try/except in python is a faster alternative to an "if" statement that is often True
592
590
  try:
593
591
  return self.json_str[self.index + count]
@@ -615,16 +613,10 @@ class JSONParser:
615
613
  self.context.append(value)
616
614
 
617
615
  def reset_context(self) -> None:
618
- try:
619
- self.context.pop()
620
- except Exception:
621
- return
616
+ self.context.pop()
622
617
 
623
618
  def get_context(self) -> str:
624
- try:
625
- return self.context[-1]
626
- except Exception:
627
- return ""
619
+ return self.context[-1]
628
620
 
629
621
  def log(self, text: str, level: str) -> None:
630
622
  if level == self.logger.log_level:
@@ -640,6 +632,50 @@ class JSONParser:
640
632
  )
641
633
 
642
634
 
635
+ @overload
636
+ def repair_json(
637
+ json_str: str = "",
638
+ return_objects: Optional[Literal[False]] = False,
639
+ skip_json_loads: Optional[bool] = False,
640
+ logging: Optional[Literal[False]] = False, # None is treated as False
641
+ json_fd: Optional[TextIO] = None,
642
+ ensure_ascii: Optional[bool] = True,
643
+ ) -> str: ...
644
+
645
+
646
+ @overload
647
+ def repair_json(
648
+ json_str: str = "",
649
+ return_objects: Literal[True] = True,
650
+ skip_json_loads: Optional[bool] = False,
651
+ logging: Optional[Literal[False]] = False, # None is treated as False
652
+ json_fd: Optional[TextIO] = None,
653
+ ensure_ascii: Optional[bool] = True,
654
+ ) -> JSONReturnType: ...
655
+
656
+
657
+ @overload
658
+ def repair_json(
659
+ json_str: str = "",
660
+ return_objects: Optional[Literal[False]] = False, # None is treated as False
661
+ skip_json_loads: Optional[bool] = False,
662
+ logging: Literal[True] = True,
663
+ json_fd: Optional[TextIO] = None,
664
+ ensure_ascii: Optional[bool] = True,
665
+ ) -> Tuple[str, List[Dict[str, str]]]: ...
666
+
667
+
668
+ @overload
669
+ def repair_json(
670
+ json_str: str = "",
671
+ return_objects: Literal[True] = True,
672
+ skip_json_loads: Optional[bool] = False,
673
+ logging: Literal[True] = True,
674
+ json_fd: Optional[TextIO] = None,
675
+ ensure_ascii: Optional[bool] = True,
676
+ ) -> Tuple[JSONReturnType, List[Dict[str, str]]]: ...
677
+
678
+
643
679
  def repair_json(
644
680
  json_str: str = "",
645
681
  return_objects: Optional[bool] = False,
@@ -653,7 +689,7 @@ def repair_json(
653
689
  It will return the fixed string by default.
654
690
  When `return_objects=True` is passed, it will return the decoded data structure instead.
655
691
  When `skip_json_loads=True` is passed, it will not call the built-in json.loads() function
656
- When `logging=True` is passed, it will return an tuple with the repaired json and a log of all repair actions
692
+ When `logging=True` is passed, it will return a tuple with the repaired json and a log of all repair actions
657
693
  """
658
694
  parser = JSONParser(json_str, json_fd, logging)
659
695
  if skip_json_loads:
@@ -666,12 +702,29 @@ def repair_json(
666
702
  parsed_json = json.loads(json_str)
667
703
  except json.JSONDecodeError:
668
704
  parsed_json = parser.parse()
669
- # It's useful to return the actual object instead of the json string, it allows this lib to be a replacement of the json library
705
+ # It's useful to return the actual object instead of the json string,
706
+ # it allows this lib to be a replacement of the json library
670
707
  if return_objects or logging:
671
708
  return parsed_json
672
709
  return json.dumps(parsed_json, ensure_ascii=ensure_ascii)
673
710
 
674
711
 
712
+ @overload
713
+ def loads(
714
+ json_str: str,
715
+ skip_json_loads: Optional[bool] = False,
716
+ logging: Optional[Literal[False]] = False, # None is treated as False
717
+ ) -> JSONReturnType: ...
718
+
719
+
720
+ @overload
721
+ def loads(
722
+ json_str: str,
723
+ skip_json_loads: Optional[bool] = False,
724
+ logging: Literal[True] = True,
725
+ ) -> Tuple[JSONReturnType, List[Dict[str, str]]]: ...
726
+
727
+
675
728
  def loads(
676
729
  json_str: str,
677
730
  skip_json_loads: Optional[bool] = False,
@@ -689,6 +742,20 @@ def loads(
689
742
  )
690
743
 
691
744
 
745
+ @overload
746
+ def load(
747
+ fd: TextIO,
748
+ skip_json_loads: Optional[bool] = False,
749
+ logging: Optional[Literal[False]] = False,
750
+ ) -> JSONReturnType: ...
751
+
752
+
753
+ @overload
754
+ def load(
755
+ fd: TextIO, skip_json_loads: Optional[bool] = False, logging: Literal[True] = True
756
+ ) -> Tuple[JSONReturnType, List[Dict[str, str]]]: ...
757
+
758
+
692
759
  def load(
693
760
  fd: TextIO, skip_json_loads: Optional[bool] = False, logging: Optional[bool] = False
694
761
  ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
@@ -704,6 +771,22 @@ def load(
704
771
  )
705
772
 
706
773
 
774
+ @overload
775
+ def from_file(
776
+ filename: str,
777
+ skip_json_loads: Optional[bool] = False,
778
+ logging: Optional[Literal[False]] = False,
779
+ ) -> JSONReturnType: ...
780
+
781
+
782
+ @overload
783
+ def from_file(
784
+ filename: str,
785
+ skip_json_loads: Optional[bool] = False,
786
+ logging: Literal[True] = True,
787
+ ) -> Tuple[JSONReturnType, List[Dict[str, str]]]: ...
788
+
789
+
707
790
  def from_file(
708
791
  filename: str,
709
792
  skip_json_loads: Optional[bool] = False,
File without changes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.27.2
3
+ Version: 0.28.1
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -27,6 +27,7 @@ License: MIT License
27
27
 
28
28
  Project-URL: Homepage, https://github.com/mangiucugna/json_repair/
29
29
  Project-URL: Bug Tracker, https://github.com/mangiucugna/json_repair/issues
30
+ Project-URL: Live demo, https://mangiucugna.github.io/json_repair/
30
31
  Keywords: JSON,REPAIR,LLM,PARSER
31
32
  Classifier: Programming Language :: Python :: 3
32
33
  Classifier: License :: OSI Approved :: MIT License
@@ -3,9 +3,11 @@ README.md
3
3
  pyproject.toml
4
4
  src/json_repair/__init__.py
5
5
  src/json_repair/json_repair.py
6
+ src/json_repair/py.typed
6
7
  src/json_repair.egg-info/PKG-INFO
7
8
  src/json_repair.egg-info/SOURCES.txt
8
9
  src/json_repair.egg-info/dependency_links.txt
9
10
  src/json_repair.egg-info/top_level.txt
11
+ tests/test_coverage.py
10
12
  tests/test_json_repair.py
11
13
  tests/test_performance.py
@@ -0,0 +1,18 @@
1
+ import coverage
2
+ import sys
3
+
4
+ COVERAGE_THRESHOLD = 100
5
+
6
+ cov = coverage.Coverage()
7
+ cov.start()
8
+
9
+ import pytest
10
+ retcode = pytest.main(["./tests/test_json_repair.py", "--cov-config=.coveragerc"])
11
+
12
+ cov.stop()
13
+ cov.save()
14
+ coverage_percent = cov.report(show_missing=True)
15
+
16
+ if coverage_percent < COVERAGE_THRESHOLD:
17
+ print(f"ERROR: Coverage {coverage_percent:.2f}% is below the threshold of {COVERAGE_THRESHOLD}%")
18
+ sys.exit(1) # This will prevent the commit/push
@@ -94,10 +94,12 @@ def test_missing_and_mixed_quotes():
94
94
  repair_json('{"name": "John", "age": 30, "city": "New')
95
95
  == '{"name": "John", "age": 30, "city": "New"}'
96
96
  )
97
- assert repair_json('[{"key": "value", COMMENT "notes": "lorem "ipsum", sic."}]') == '[{"key": "value", "notes": "lorem \\"ipsum\\", sic."}]'
97
+ assert repair_json('[{"key": "value", COMMENT "notes": "lorem "ipsum", sic." }]') == '[{"key": "value", "notes": "lorem \\"ipsum\\", sic."}]'
98
98
  assert repair_json('{"key": ""value"}') == '{"key": "value"}'
99
99
  assert repair_json('{"key": "value", 5: "value"}') == '{"key": "value", "5": "value"}'
100
100
  assert repair_json('{"foo": "\\"bar\\""') == '{"foo": "\\"bar\\""}'
101
+ assert repair_json('{"" key":"val"') == '{" key": "val"}'
102
+ assert repair_json('{"key": value "key2" : "value2" ') == '{"key": "value", "key2": "value2"}'
101
103
 
102
104
  def test_array_edge_cases():
103
105
  assert repair_json("[1, 2, 3,") == "[1, 2, 3]"
@@ -106,16 +108,9 @@ def test_array_edge_cases():
106
108
  assert repair_json("[1, 2, '...', 3]") == '[1, 2, "...", 3]'
107
109
  assert repair_json("[true, false, null, ...]") == '[true, false, null]'
108
110
  assert repair_json('["a" "b" "c" 1') == '["a", "b", "c", 1]'
109
- assert (
110
- repair_json('{"employees":["John", "Anna",')
111
- == '{"employees": ["John", "Anna"]}'
112
- )
113
- assert (
114
- repair_json('{"employees":["John", "Anna", "Peter')
115
- == '{"employees": ["John", "Anna", "Peter"]}'
116
- )
111
+ assert repair_json('{"employees":["John", "Anna",') == '{"employees": ["John", "Anna"]}'
112
+ assert repair_json('{"employees":["John", "Anna", "Peter') == '{"employees": ["John", "Anna", "Peter"]}'
117
113
  assert repair_json('{"key1": {"key2": [1, 2, 3') == '{"key1": {"key2": [1, 2, 3]}}'
118
-
119
114
 
120
115
  def test_escaping():
121
116
  assert repair_json("'\"'") == '""'
@@ -127,6 +122,7 @@ def test_escaping():
127
122
 
128
123
  def test_object_edge_cases():
129
124
  assert repair_json('{ ') == '{}'
125
+ assert repair_json('{"": "value"') == '{"": "value"}'
130
126
  assert repair_json('{"value_1": true, COMMENT "value_2": "data"}') == '{"value_1": true, "value_2": "data"}'
131
127
  assert repair_json('{"value_1": true, SHOULD_NOT_EXIST "value_2": "data" AAAA }') == '{"value_1": true, "value_2": "data"}'
132
128
  assert repair_json('{"" : true, "key2": "value2"}') == '{"": true, "key2": "value2"}'
@@ -138,6 +134,8 @@ def test_object_edge_cases():
138
134
  assert repair_json('{"key": "Lorem "ipsum" s,"}') == '{"key": "Lorem \\"ipsum\\" s,"}'
139
135
  assert repair_json('{"lorem": ipsum, sic, datum.",}') == '{"lorem": "ipsum, sic, datum."}'
140
136
  assert repair_json('{"lorem": sic tamet. "ipsum": sic tamet, quick brown fox. "sic": ipsum}') == '{"lorem": "sic tamet.", "ipsum": "sic tamet", "sic": "ipsum"}'
137
+ assert repair_json('{"key":value, " key2":"value2" }') == '{"key": "value", " key2": "value2"}'
138
+ assert repair_json('{"key":value "key2":"value2" }') == '{"key": "value", "key2": "value2"}'
141
139
 
142
140
  def test_number_edge_cases():
143
141
  assert repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'
File without changes
File without changes
File without changes