json-repair 0.21.0__tar.gz → 0.23.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.21.0
3
+ Version: 0.23.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
3
3
  build-backend = "setuptools.build_meta"
4
4
  [project]
5
5
  name = "json_repair"
6
- version = "0.21.0"
6
+ version = "0.23.0"
7
7
  license = {file = "LICENSE"}
8
8
  authors = [
9
9
  { name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
@@ -1,7 +1,7 @@
1
1
  """
2
2
  This module will parse the JSON file following the BNF definition:
3
3
 
4
- <json> ::= <primitive> | <container>
4
+ <json> ::= <container>
5
5
 
6
6
  <primitive> ::= <number> | <string> | <boolean>
7
7
  ; Where:
@@ -89,15 +89,32 @@ class JSONParser:
89
89
  def parse(
90
90
  self,
91
91
  ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
92
+ json = self.parse_json()
93
+ if self.index < len(self.json_str):
94
+ json = [json]
95
+ last_index = self.index
96
+ while self.index < len(self.json_str):
97
+ j = self.parse_json()
98
+ if j != "":
99
+ json.append(j)
100
+ if self.index == last_index:
101
+ self.index += 1
102
+ last_index = self.index
103
+ if len(json) == 1:
104
+ json = json[0]
105
+ elif len(json) == 0:
106
+ json = ""
92
107
  if self.logger.log_level == "none":
93
- return self.parse_json()
108
+ return json
94
109
  else:
95
- return self.parse_json(), self.logger.log
110
+ return json, self.logger.log
96
111
 
97
112
  def parse_json(
98
113
  self,
99
114
  ) -> JSONReturnType:
100
115
  char = self.get_char_at()
116
+ # This parser will ignore any basic element (string or number) that is not inside an array or object
117
+ is_in_context = len(self.context) > 0
101
118
  # False means that we are at the end of the string provided, is the base case for recursion
102
119
  if char is False:
103
120
  return ""
@@ -120,10 +137,10 @@ class JSONParser:
120
137
  )
121
138
  return ""
122
139
  # <string> starts with a quote
123
- elif char in ['"', "'", "“"] or char.isalpha():
140
+ elif is_in_context and (char in ['"', "'", "“"] or char.isalpha()):
124
141
  return self.parse_string()
125
142
  # <number> starts with [0-9] or minus
126
- elif char.isdigit() or char == "-" or char == ".":
143
+ elif is_in_context and (char.isdigit() or char == "-" or char == "."):
127
144
  return self.parse_number()
128
145
  # If everything else fails, we just ignore and move on
129
146
  else:
@@ -304,14 +321,6 @@ class JSONParser:
304
321
  "While parsing a string, we found a literal instead of a quote",
305
322
  "info",
306
323
  )
307
- if self.get_context() == "":
308
- # A string literal in the wild isn't a valid json and not something we can fix
309
- self.log(
310
- "While parsing a string, we found a literal outside of context, ignoring it",
311
- "info",
312
- )
313
- self.index += 1
314
- return self.parse_json()
315
324
  self.log(
316
325
  "While parsing a string, we found no starting quote. Will add the quote back",
317
326
  "info",
@@ -656,3 +665,6 @@ def from_file(
656
665
  fd.close()
657
666
 
658
667
  return jsonobj
668
+
669
+
670
+ repair_json("[]{}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.21.0
3
+ Version: 0.23.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -1,9 +1,9 @@
1
1
  from src.json_repair.json_repair import from_file, repair_json, loads
2
2
 
3
3
  def test_basic_types_valid():
4
- assert repair_json("True", return_objects=True) == True
5
- assert repair_json("False", return_objects=True) == False
6
- assert repair_json("Null", return_objects=True) == None
4
+ assert repair_json("True", return_objects=True) == ""
5
+ assert repair_json("False", return_objects=True) == ""
6
+ assert repair_json("Null", return_objects=True) == ""
7
7
  assert repair_json("1", return_objects=True) == 1
8
8
  assert repair_json("[]", return_objects=True) == []
9
9
  assert repair_json("[1, 2, 3, 4]", return_objects=True) == [1, 2, 3, 4]
@@ -114,7 +114,7 @@ def test_array_edge_cases():
114
114
 
115
115
 
116
116
  def test_escaping():
117
- assert repair_json("'\"'") == '"\\\""'
117
+ assert repair_json("'\"'") == '""'
118
118
  assert repair_json("{\"key\": 'string\"\n\t\le'") == '{"key": "string\\"\\n\\tle"}'
119
119
  assert repair_json(r'{"real_content": "Some string: Some other string \t Some string <a href=\"https://domain.com\">Some link</a>"') == r'{"real_content": "Some string: Some other string \t Some string <a href=\"https://domain.com\">Some link</a>"}'
120
120
  assert repair_json('{"key_1\n": "value"}') == '{"key_1": "value"}'
@@ -164,9 +164,15 @@ def test_leading_trailing_characters():
164
164
  assert repair_json("""{ "a": "", "b": [ { "c": 1} ] \n}```""") == '{"a": "", "b": [{"c": 1}]}'
165
165
  assert repair_json("Based on the information extracted, here is the filled JSON output: ```json { 'a': 'b' } ```") == '{"a": "b"}'
166
166
  assert repair_json("""
167
+ The next 64 elements are:
167
168
  ```json
168
169
  { "key": "value" }
169
170
  ```""") == '{"key": "value"}'
171
+ def test_multiple_jsons():
172
+ assert repair_json("[]{}") == "[[], {}]"
173
+ assert repair_json("{}[]") == "[{}, []]"
174
+ assert repair_json('{"key":"value"}[1,2,3,True]') == '[{"key": "value"}, ["1,2,3", true]]'
175
+ assert repair_json('lorem ```json {"key":"value"} ``` ipsum ```json [1,2,3,True] ``` 42') == '[{"key": "value"}, ["1,2,3", true]]'
170
176
 
171
177
  def test_repair_json_with_objects():
172
178
  # Test with valid JSON strings
@@ -19,7 +19,7 @@ def test_true_true_correct(benchmark):
19
19
  mean_time = benchmark.stats.get("median")
20
20
 
21
21
  # Define your time threshold in seconds
22
- max_time = 13 / 10 ** 4 # 1.3 millisecond
22
+ max_time = 14 / 10 ** 4 # 1.4 millisecond
23
23
 
24
24
  # Assert that the average time is below the threshold
25
25
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -31,7 +31,7 @@ def test_true_true_incorrect(benchmark):
31
31
  mean_time = benchmark.stats.get("median")
32
32
 
33
33
  # Define your time threshold in seconds
34
- max_time = 13 / 10 ** 4 # 1.3 millisecond
34
+ max_time = 14 / 10 ** 4 # 1.4 millisecond
35
35
 
36
36
  # Assert that the average time is below the threshold
37
37
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -53,7 +53,7 @@ def test_true_false_incorrect(benchmark):
53
53
  mean_time = benchmark.stats.get("median")
54
54
 
55
55
  # Define your time threshold in seconds
56
- max_time = 13 / 10 ** 4 # 1.3 millisecond
56
+ max_time = 14 / 10 ** 4 # 1.4 millisecond
57
57
 
58
58
  # Assert that the average time is below the threshold
59
59
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
File without changes
File without changes
File without changes