json-repair 0.30.3__tar.gz → 0.32.0__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (20) hide show
  1. {json_repair-0.30.3/src/json_repair.egg-info → json_repair-0.32.0}/PKG-INFO +1 -1
  2. {json_repair-0.30.3 → json_repair-0.32.0}/pyproject.toml +1 -1
  3. {json_repair-0.30.3 → json_repair-0.32.0}/src/json_repair/json_parser.py +24 -1
  4. {json_repair-0.30.3 → json_repair-0.32.0/src/json_repair.egg-info}/PKG-INFO +1 -1
  5. {json_repair-0.30.3 → json_repair-0.32.0}/tests/test_json_repair.py +3 -1
  6. {json_repair-0.30.3 → json_repair-0.32.0}/tests/test_performance.py +5 -5
  7. {json_repair-0.30.3 → json_repair-0.32.0}/LICENSE +0 -0
  8. {json_repair-0.30.3 → json_repair-0.32.0}/README.md +0 -0
  9. {json_repair-0.30.3 → json_repair-0.32.0}/setup.cfg +0 -0
  10. {json_repair-0.30.3 → json_repair-0.32.0}/src/json_repair/__init__.py +0 -0
  11. {json_repair-0.30.3 → json_repair-0.32.0}/src/json_repair/__main__.py +0 -0
  12. {json_repair-0.30.3 → json_repair-0.32.0}/src/json_repair/json_context.py +0 -0
  13. {json_repair-0.30.3 → json_repair-0.32.0}/src/json_repair/json_repair.py +0 -0
  14. {json_repair-0.30.3 → json_repair-0.32.0}/src/json_repair/py.typed +0 -0
  15. {json_repair-0.30.3 → json_repair-0.32.0}/src/json_repair/string_file_wrapper.py +0 -0
  16. {json_repair-0.30.3 → json_repair-0.32.0}/src/json_repair.egg-info/SOURCES.txt +0 -0
  17. {json_repair-0.30.3 → json_repair-0.32.0}/src/json_repair.egg-info/dependency_links.txt +0 -0
  18. {json_repair-0.30.3 → json_repair-0.32.0}/src/json_repair.egg-info/entry_points.txt +0 -0
  19. {json_repair-0.30.3 → json_repair-0.32.0}/src/json_repair.egg-info/top_level.txt +0 -0
  20. {json_repair-0.30.3 → json_repair-0.32.0}/tests/test_coverage.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.30.3
3
+ Version: 0.32.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
3
3
  build-backend = "setuptools.build_meta"
4
4
  [project]
5
5
  name = "json_repair"
6
- version = "0.30.3"
6
+ version = "0.32.0"
7
7
  license = {file = "LICENSE"}
8
8
  authors = [
9
9
  { name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
@@ -124,6 +124,9 @@ class JSONParser:
124
124
 
125
125
  self.skip_whitespaces_at()
126
126
 
127
+ # Save this index in case we need find a duplicate key
128
+ rollback_index = self.index
129
+
127
130
  # <member> starts with a <string>
128
131
  key = ""
129
132
  while self.get_char_at():
@@ -132,7 +135,14 @@ class JSONParser:
132
135
  if key != "" or (key == "" and self.get_char_at() == ":"):
133
136
  # If the string is empty but there is a object divider, we are done here
134
137
  break
138
+ if ContextValues.ARRAY in self.context.context and key in obj:
139
+ self.log(
140
+ "While parsing an object we found a duplicate key, closing the object here and rolling back the index",
141
+ )
142
+ self.index = rollback_index - 1
143
+ break
135
144
 
145
+ # Skip filler whitespaces
136
146
  self.skip_whitespaces_at()
137
147
 
138
148
  # We reached the end here
@@ -322,11 +332,24 @@ class JSONParser:
322
332
  else:
323
333
  # OK but this could still be some garbage at the end of the string
324
334
  # So we need to check if we find a new lstring_delimiter afterwards
325
- # If we do, this is a missing delimiter
335
+ # If we do, maybe this is a missing delimiter
326
336
  i = self.skip_to_character(character=lstring_delimiter, idx=i)
337
+ if doubled_quotes:
338
+ i = self.skip_to_character(
339
+ character=lstring_delimiter, idx=i
340
+ )
327
341
  next_c = self.get_char_at(i)
328
342
  if not next_c:
329
343
  rstring_delimiter_missing = False
344
+ else:
345
+ # But again, this could just be something a bit stupid like "lorem, "ipsum" sic"
346
+ # Check if we find a : afterwards (skipping space)
347
+ i = self.skip_whitespaces_at(
348
+ idx=i + 1, move_main_index=False
349
+ )
350
+ next_c = self.get_char_at(i)
351
+ if next_c and next_c != ":":
352
+ rstring_delimiter_missing = False
330
353
  else:
331
354
  # There could be a case in which even the next key:value is missing delimeters
332
355
  # because it might be a systemic issue with the output
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.30.3
3
+ Version: 0.32.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -108,6 +108,7 @@ def test_missing_and_mixed_quotes():
108
108
  assert repair_json('{"key": value "key2" : "value2" ') == '{"key": "value", "key2": "value2"}'
109
109
  assert repair_json('{"key": "lorem ipsum ... "sic " tamet. ...}') == '{"key": "lorem ipsum ... \\"sic \\" tamet. ..."}'
110
110
  assert repair_json('{"key": value , }') == '{"key": "value"}'
111
+ assert repair_json('{"comment": "lorem, "ipsum" sic "tamet". To improve"}') == '{"comment": "lorem, \\"ipsum\\" sic \\"tamet\\". To improve"}'
111
112
 
112
113
  def test_array_edge_cases():
113
114
  assert repair_json("[1, 2, 3,") == "[1, 2, 3]"
@@ -135,7 +136,7 @@ def test_object_edge_cases():
135
136
  assert repair_json('{"value_1": true, COMMENT "value_2": "data"}') == '{"value_1": true, "value_2": "data"}'
136
137
  assert repair_json('{"value_1": true, SHOULD_NOT_EXIST "value_2": "data" AAAA }') == '{"value_1": true, "value_2": "data"}'
137
138
  assert repair_json('{"" : true, "key2": "value2"}') == '{"": true, "key2": "value2"}'
138
- assert repair_json('{""answer"":[{""traits"":''Female aged 60+'',""answer1"":""5""}]}') == '{"answer": [{"traits": "Female aged 60+", "answer1": "5"}]}'
139
+ assert repair_json("""{""answer"":[{""traits"":''Female aged 60+'',""answer1"":""5""}]}""") == '{"answer": [{"traits": "Female aged 60+", "answer1": "5"}]}'
139
140
  assert repair_json('{ "words": abcdef", "numbers": 12345", "words2": ghijkl" }') == '{"words": "abcdef", "numbers": 12345, "words2": "ghijkl"}'
140
141
  assert repair_json('''{"number": 1,"reason": "According...""ans": "YES"}''') == '{"number": 1, "reason": "According...", "ans": "YES"}'
141
142
  assert repair_json('''{ "a" : "{ b": {} }" }''') == '{"a": "{ b"}'
@@ -151,6 +152,7 @@ def test_object_edge_cases():
151
152
  assert repair_json('{text:words{words in brackets}m}') == '{"text": "words{words in brackets}m"}'
152
153
  assert repair_json('{"key": "value, value2"```') == '{"key": "value, value2"}'
153
154
  assert repair_json('{key:value,key2:value2}') == '{"key": "value", "key2": "value2"}'
155
+ assert repair_json('[{"lorem": {"ipsum": "sic"}, "lorem": {"ipsum": "sic"}]') == '[{"lorem": {"ipsum": "sic"}}, "lorem", {"ipsum": "sic"}]'
154
156
 
155
157
  def test_number_edge_cases():
156
158
  assert repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'
@@ -19,7 +19,7 @@ def test_true_true_correct(benchmark):
19
19
  mean_time = benchmark.stats.get("median")
20
20
 
21
21
  # Define your time threshold in seconds
22
- max_time = 1.8 / 10 ** 3 # 1.8 millisecond
22
+ max_time = 1.9 / 10 ** 3 # 1.9 millisecond
23
23
 
24
24
  # Assert that the average time is below the threshold
25
25
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -31,7 +31,7 @@ def test_true_true_incorrect(benchmark):
31
31
  mean_time = benchmark.stats.get("median")
32
32
 
33
33
  # Define your time threshold in seconds
34
- max_time = 1.8 / 10 ** 3 # 1.8 millisecond
34
+ max_time = 9 / 10 ** 3 # 1.9 millisecond
35
35
 
36
36
  # Assert that the average time is below the threshold
37
37
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -53,7 +53,7 @@ def test_true_false_incorrect(benchmark):
53
53
  mean_time = benchmark.stats.get("median")
54
54
 
55
55
  # Define your time threshold in seconds
56
- max_time = 1.8 / 10 ** 3 # 1.8 millisecond
56
+ max_time = 1.9 / 10 ** 3 # 1.9 millisecond
57
57
 
58
58
  # Assert that the average time is below the threshold
59
59
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -64,7 +64,7 @@ def test_false_true_correct(benchmark):
64
64
  mean_time = benchmark.stats.get("median")
65
65
 
66
66
  # Define your time threshold in seconds
67
- max_time = 1.8 / 10 ** 3 # 1.8 millisecond
67
+ max_time = 1.9 / 10 ** 3 # 1.9 millisecond
68
68
 
69
69
  # Assert that the average time is below the threshold
70
70
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -75,7 +75,7 @@ def test_false_true_incorrect(benchmark):
75
75
  mean_time = benchmark.stats.get("median")
76
76
 
77
77
  # Define your time threshold in seconds
78
- max_time = 1.8 / 10 ** 3 # 1.8 millisecond
78
+ max_time = 1.9 / 10 ** 3 # 1.9 millisecond
79
79
 
80
80
  # Assert that the average time is below the threshold
81
81
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
File without changes
File without changes
File without changes