json-repair 0.31.0__tar.gz → 0.33.0__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (20) hide show
  1. {json_repair-0.31.0/src/json_repair.egg-info → json_repair-0.33.0}/PKG-INFO +1 -1
  2. {json_repair-0.31.0 → json_repair-0.33.0}/pyproject.toml +1 -1
  3. {json_repair-0.31.0 → json_repair-0.33.0}/src/json_repair/json_parser.py +32 -2
  4. {json_repair-0.31.0 → json_repair-0.33.0/src/json_repair.egg-info}/PKG-INFO +1 -1
  5. {json_repair-0.31.0 → json_repair-0.33.0}/tests/test_json_repair.py +2 -0
  6. {json_repair-0.31.0 → json_repair-0.33.0}/tests/test_performance.py +5 -5
  7. {json_repair-0.31.0 → json_repair-0.33.0}/LICENSE +0 -0
  8. {json_repair-0.31.0 → json_repair-0.33.0}/README.md +0 -0
  9. {json_repair-0.31.0 → json_repair-0.33.0}/setup.cfg +0 -0
  10. {json_repair-0.31.0 → json_repair-0.33.0}/src/json_repair/__init__.py +0 -0
  11. {json_repair-0.31.0 → json_repair-0.33.0}/src/json_repair/__main__.py +0 -0
  12. {json_repair-0.31.0 → json_repair-0.33.0}/src/json_repair/json_context.py +0 -0
  13. {json_repair-0.31.0 → json_repair-0.33.0}/src/json_repair/json_repair.py +0 -0
  14. {json_repair-0.31.0 → json_repair-0.33.0}/src/json_repair/py.typed +0 -0
  15. {json_repair-0.31.0 → json_repair-0.33.0}/src/json_repair/string_file_wrapper.py +0 -0
  16. {json_repair-0.31.0 → json_repair-0.33.0}/src/json_repair.egg-info/SOURCES.txt +0 -0
  17. {json_repair-0.31.0 → json_repair-0.33.0}/src/json_repair.egg-info/dependency_links.txt +0 -0
  18. {json_repair-0.31.0 → json_repair-0.33.0}/src/json_repair.egg-info/entry_points.txt +0 -0
  19. {json_repair-0.31.0 → json_repair-0.33.0}/src/json_repair.egg-info/top_level.txt +0 -0
  20. {json_repair-0.31.0 → json_repair-0.33.0}/tests/test_coverage.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.31.0
3
+ Version: 0.33.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
3
3
  build-backend = "setuptools.build_meta"
4
4
  [project]
5
5
  name = "json_repair"
6
- version = "0.31.0"
6
+ version = "0.33.0"
7
7
  license = {file = "LICENSE"}
8
8
  authors = [
9
9
  { name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
@@ -124,6 +124,9 @@ class JSONParser:
124
124
 
125
125
  self.skip_whitespaces_at()
126
126
 
127
+ # Save this index in case we need find a duplicate key
128
+ rollback_index = self.index
129
+
127
130
  # <member> starts with a <string>
128
131
  key = ""
129
132
  while self.get_char_at():
@@ -132,7 +135,14 @@ class JSONParser:
132
135
  if key != "" or (key == "" and self.get_char_at() == ":"):
133
136
  # If the string is empty but there is a object divider, we are done here
134
137
  break
138
+ if ContextValues.ARRAY in self.context.context and key in obj:
139
+ self.log(
140
+ "While parsing an object we found a duplicate key, closing the object here and rolling back the index",
141
+ )
142
+ self.index = rollback_index - 1
143
+ break
135
144
 
145
+ # Skip filler whitespaces
136
146
  self.skip_whitespaces_at()
137
147
 
138
148
  # We reached the end here
@@ -498,9 +508,8 @@ class JSONParser:
498
508
  # But this might not be it! This could be just a missing comma
499
509
  # We found a delimiter and we need to check if this is a key
500
510
  # so find a rstring_delimiter and a colon after
501
- i += 1
502
511
  i = self.skip_to_character(
503
- character=rstring_delimiter, idx=i
512
+ character=rstring_delimiter, idx=i + 1
504
513
  )
505
514
  i += 1
506
515
  next_c = self.get_char_at(i)
@@ -521,6 +530,27 @@ class JSONParser:
521
530
  string_acc += str(char)
522
531
  self.index += 1
523
532
  char = self.get_char_at()
533
+ elif self.context.current == ContextValues.ARRAY:
534
+ # In array context this could be something like "lorem "ipsum" sic"
535
+ # So let's check if we find a rstring_delimiter forward otherwise end early
536
+ i = self.skip_to_character(rstring_delimiter, idx=i + 1)
537
+ next_c = self.get_char_at(i)
538
+ if next_c and next_c == rstring_delimiter:
539
+ # Ok now if I find a comma or a closing ], that can be have also an optional rstring_delimiter before them
540
+ # We can consider this a misplaced quote
541
+ i += 1
542
+ i = self.skip_whitespaces_at(
543
+ idx=i, move_main_index=False
544
+ )
545
+ next_c = self.get_char_at(i)
546
+ if next_c and next_c in [",", "]"]:
547
+ self.log(
548
+ "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
549
+ )
550
+ unmatched_delimiter = not unmatched_delimiter
551
+ string_acc += str(char)
552
+ self.index += 1
553
+ char = self.get_char_at()
524
554
 
525
555
  if (
526
556
  char
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.31.0
3
+ Version: 0.33.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -121,6 +121,7 @@ def test_array_edge_cases():
121
121
  assert repair_json('{"employees":["John", "Anna", "Peter') == '{"employees": ["John", "Anna", "Peter"]}'
122
122
  assert repair_json('{"key1": {"key2": [1, 2, 3') == '{"key1": {"key2": [1, 2, 3]}}'
123
123
  assert repair_json('{"key": ["value]}') == '{"key": ["value"]}'
124
+ assert repair_json('["lorem "ipsum" sic"]') == '["lorem \\"ipsum\\" sic"]'
124
125
 
125
126
  def test_escaping():
126
127
  assert repair_json("'\"'") == '""'
@@ -152,6 +153,7 @@ def test_object_edge_cases():
152
153
  assert repair_json('{text:words{words in brackets}m}') == '{"text": "words{words in brackets}m"}'
153
154
  assert repair_json('{"key": "value, value2"```') == '{"key": "value, value2"}'
154
155
  assert repair_json('{key:value,key2:value2}') == '{"key": "value", "key2": "value2"}'
156
+ assert repair_json('[{"lorem": {"ipsum": "sic"}, "lorem": {"ipsum": "sic"}]') == '[{"lorem": {"ipsum": "sic"}}, "lorem", {"ipsum": "sic"}]'
155
157
 
156
158
  def test_number_edge_cases():
157
159
  assert repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'
@@ -19,7 +19,7 @@ def test_true_true_correct(benchmark):
19
19
  mean_time = benchmark.stats.get("median")
20
20
 
21
21
  # Define your time threshold in seconds
22
- max_time = 1.8 / 10 ** 3 # 1.8 millisecond
22
+ max_time = 1.9 / 10 ** 3 # 1.9 millisecond
23
23
 
24
24
  # Assert that the average time is below the threshold
25
25
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -31,7 +31,7 @@ def test_true_true_incorrect(benchmark):
31
31
  mean_time = benchmark.stats.get("median")
32
32
 
33
33
  # Define your time threshold in seconds
34
- max_time = 1.8 / 10 ** 3 # 1.8 millisecond
34
+ max_time = 9 / 10 ** 3 # 1.9 millisecond
35
35
 
36
36
  # Assert that the average time is below the threshold
37
37
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -53,7 +53,7 @@ def test_true_false_incorrect(benchmark):
53
53
  mean_time = benchmark.stats.get("median")
54
54
 
55
55
  # Define your time threshold in seconds
56
- max_time = 1.8 / 10 ** 3 # 1.8 millisecond
56
+ max_time = 1.9 / 10 ** 3 # 1.9 millisecond
57
57
 
58
58
  # Assert that the average time is below the threshold
59
59
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -64,7 +64,7 @@ def test_false_true_correct(benchmark):
64
64
  mean_time = benchmark.stats.get("median")
65
65
 
66
66
  # Define your time threshold in seconds
67
- max_time = 1.8 / 10 ** 3 # 1.8 millisecond
67
+ max_time = 1.9 / 10 ** 3 # 1.9 millisecond
68
68
 
69
69
  # Assert that the average time is below the threshold
70
70
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -75,7 +75,7 @@ def test_false_true_incorrect(benchmark):
75
75
  mean_time = benchmark.stats.get("median")
76
76
 
77
77
  # Define your time threshold in seconds
78
- max_time = 1.8 / 10 ** 3 # 1.8 millisecond
78
+ max_time = 1.9 / 10 ** 3 # 1.9 millisecond
79
79
 
80
80
  # Assert that the average time is below the threshold
81
81
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
File without changes
File without changes
File without changes