json-repair 0.50.1__tar.gz → 0.52.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {json_repair-0.50.1/src/json_repair.egg-info → json_repair-0.52.0}/PKG-INFO +2 -1
  2. {json_repair-0.50.1 → json_repair-0.52.0}/README.md +1 -0
  3. {json_repair-0.50.1 → json_repair-0.52.0}/pyproject.toml +7 -2
  4. {json_repair-0.50.1 → json_repair-0.52.0}/src/json_repair/parse_object.py +29 -1
  5. {json_repair-0.50.1 → json_repair-0.52.0/src/json_repair.egg-info}/PKG-INFO +2 -1
  6. {json_repair-0.50.1 → json_repair-0.52.0}/tests/test_parse_array.py +5 -1
  7. {json_repair-0.50.1 → json_repair-0.52.0}/tests/test_parse_object.py +14 -1
  8. {json_repair-0.50.1 → json_repair-0.52.0}/tests/test_performance.py +11 -0
  9. {json_repair-0.50.1 → json_repair-0.52.0}/LICENSE +0 -0
  10. {json_repair-0.50.1 → json_repair-0.52.0}/setup.cfg +0 -0
  11. {json_repair-0.50.1 → json_repair-0.52.0}/src/json_repair/__init__.py +0 -0
  12. {json_repair-0.50.1 → json_repair-0.52.0}/src/json_repair/__main__.py +0 -0
  13. {json_repair-0.50.1 → json_repair-0.52.0}/src/json_repair/constants.py +0 -0
  14. {json_repair-0.50.1 → json_repair-0.52.0}/src/json_repair/json_context.py +0 -0
  15. {json_repair-0.50.1 → json_repair-0.52.0}/src/json_repair/json_parser.py +0 -0
  16. {json_repair-0.50.1 → json_repair-0.52.0}/src/json_repair/json_repair.py +0 -0
  17. {json_repair-0.50.1 → json_repair-0.52.0}/src/json_repair/object_comparer.py +0 -0
  18. {json_repair-0.50.1 → json_repair-0.52.0}/src/json_repair/parse_array.py +0 -0
  19. {json_repair-0.50.1 → json_repair-0.52.0}/src/json_repair/parse_boolean_or_null.py +0 -0
  20. {json_repair-0.50.1 → json_repair-0.52.0}/src/json_repair/parse_comment.py +0 -0
  21. {json_repair-0.50.1 → json_repair-0.52.0}/src/json_repair/parse_number.py +0 -0
  22. {json_repair-0.50.1 → json_repair-0.52.0}/src/json_repair/parse_string.py +0 -0
  23. {json_repair-0.50.1 → json_repair-0.52.0}/src/json_repair/py.typed +0 -0
  24. {json_repair-0.50.1 → json_repair-0.52.0}/src/json_repair/string_file_wrapper.py +0 -0
  25. {json_repair-0.50.1 → json_repair-0.52.0}/src/json_repair.egg-info/SOURCES.txt +0 -0
  26. {json_repair-0.50.1 → json_repair-0.52.0}/src/json_repair.egg-info/dependency_links.txt +0 -0
  27. {json_repair-0.50.1 → json_repair-0.52.0}/src/json_repair.egg-info/entry_points.txt +0 -0
  28. {json_repair-0.50.1 → json_repair-0.52.0}/src/json_repair.egg-info/top_level.txt +0 -0
  29. {json_repair-0.50.1 → json_repair-0.52.0}/tests/test_json_repair.py +0 -0
  30. {json_repair-0.50.1 → json_repair-0.52.0}/tests/test_parse_boolean_or_null.py +0 -0
  31. {json_repair-0.50.1 → json_repair-0.52.0}/tests/test_parse_comment.py +0 -0
  32. {json_repair-0.50.1 → json_repair-0.52.0}/tests/test_parse_number.py +0 -0
  33. {json_repair-0.50.1 → json_repair-0.52.0}/tests/test_parse_string.py +0 -0
  34. {json_repair-0.50.1 → json_repair-0.52.0}/tests/test_repair_json_cli.py +0 -0
  35. {json_repair-0.50.1 → json_repair-0.52.0}/tests/test_repair_json_from_file.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: json_repair
3
- Version: 0.50.1
3
+ Version: 0.52.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License-Expression: MIT
@@ -269,6 +269,7 @@ You will need owner access to this repository
269
269
  - Ruby: https://github.com/sashazykov/json-repair-rb
270
270
  - Rust: https://github.com/oramasearch/llm_json
271
271
  - R: https://github.com/cgxjdzz/jsonRepair
272
+ - Java: https://github.com/du00cs/json-repairj
272
273
  ---
273
274
  ## Star History
274
275
 
@@ -252,6 +252,7 @@ You will need owner access to this repository
252
252
  - Ruby: https://github.com/sashazykov/json-repair-rb
253
253
  - Rust: https://github.com/oramasearch/llm_json
254
254
  - R: https://github.com/cgxjdzz/jsonRepair
255
+ - Java: https://github.com/du00cs/json-repairj
255
256
  ---
256
257
  ## Star History
257
258
 
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
3
3
  build-backend = "setuptools.build_meta"
4
4
  [project]
5
5
  name = "json_repair"
6
- version = "0.50.1"
6
+ version = "0.52.0"
7
7
  license = "MIT"
8
8
  license-files = ["LICENSE"]
9
9
  authors = [
@@ -26,12 +26,17 @@ pythonpath = [
26
26
  "."
27
27
  ]
28
28
  [tool.coverage.run]
29
+ source = ["src"]
29
30
  omit = [
30
31
  "*/.cursor/extensions/*",
31
32
  "*/pythonFiles/lib/python/*",
32
33
  "*/site-packages/*",
34
+ "src/json_repair/__main__.py",
33
35
  ]
34
36
  [tool.coverage.report]
37
+ include = [
38
+ "src/json_repair/*",
39
+ ]
35
40
  exclude_also = [
36
41
  'def __repr__',
37
42
  'if self.debug:',
@@ -111,4 +116,4 @@ line-ending = "auto"
111
116
 
112
117
  [tool.ruff.lint.per-file-ignores]
113
118
  # Explicit re-exports is fine in __init__.py, still a code smell elsewhere.
114
- "__init__.py" = ["PLC0414"]
119
+ "__init__.py" = ["PLC0414"]
@@ -1,6 +1,6 @@
1
1
  from typing import TYPE_CHECKING
2
2
 
3
- from .constants import JSONReturnType
3
+ from .constants import STRING_DELIMITERS, JSONReturnType
4
4
  from .json_context import ContextValues
5
5
 
6
6
  if TYPE_CHECKING:
@@ -10,6 +10,7 @@ if TYPE_CHECKING:
10
10
  def parse_object(self: "JSONParser") -> dict[str, JSONReturnType]:
11
11
  # <object> ::= '{' [ <member> *(', ' <member>) ] '}' ; A sequence of 'members'
12
12
  obj: dict[str, JSONReturnType] = {}
13
+ start_index = self.index
13
14
  # Stop when you either find the closing parentheses or you have iterated over the entire string
14
15
  while (self.get_char_at() or "}") != "}":
15
16
  # This is what we expect to find:
@@ -112,4 +113,31 @@ def parse_object(self: "JSONParser") -> dict[str, JSONReturnType]:
112
113
  self.skip_whitespaces_at()
113
114
 
114
115
  self.index += 1
116
+
117
+ # If the object is empty but also isn't just {}
118
+ if not obj and self.index - start_index > 2:
119
+ self.log("Parsed object is empty, we will try to parse this as an array instead")
120
+ self.index = start_index
121
+ return self.parse_array()
122
+
123
+ # Check if there are more key-value pairs after the closing brace
124
+ # This handles cases like '{"key": "value"}, "key2": "value2"}'
125
+ # But only if we're not in a nested context
126
+ if not self.context.empty:
127
+ return obj
128
+
129
+ self.skip_whitespaces_at()
130
+ if (self.get_char_at() or "") != ",":
131
+ return obj
132
+ self.index += 1
133
+ self.skip_whitespaces_at()
134
+ if (self.get_char_at() or "") not in STRING_DELIMITERS:
135
+ return obj
136
+ self.log(
137
+ "Found a comma and string delimiter after object closing brace, checking for additional key-value pairs",
138
+ )
139
+ additional_obj = self.parse_object()
140
+ if isinstance(additional_obj, dict):
141
+ obj.update(additional_obj)
142
+
115
143
  return obj
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: json_repair
3
- Version: 0.50.1
3
+ Version: 0.52.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License-Expression: MIT
@@ -269,6 +269,7 @@ You will need owner access to this repository
269
269
  - Ruby: https://github.com/sashazykov/json-repair-rb
270
270
  - Rust: https://github.com/oramasearch/llm_json
271
271
  - R: https://github.com/cgxjdzz/jsonRepair
272
+ - Java: https://github.com/du00cs/json-repairj
272
273
  ---
273
274
  ## Star History
274
275
 
@@ -35,7 +35,11 @@ def test_parse_array_edge_cases():
35
35
  )
36
36
  assert repair_json('{"k"e"y": "value"}') == '{"k\\"e\\"y": "value"}'
37
37
  assert repair_json('["key":"value"}]') == '[{"key": "value"}]'
38
- assert repair_json('[{"key": "value", "key') == '[{"key": "value"}]'
38
+ assert repair_json('[{"key": "value", "key') == '[{"key": "value"}, ["key"]]'
39
+ assert repair_json("{'key1', 'key2'}") == '["key1", "key2"]'
40
+
41
+
42
+ def test_parse_array_missing_quotes():
39
43
  assert repair_json('["value1" value2", "value3"]') == '["value1", "value2", "value3"]'
40
44
  assert (
41
45
  repair_json('{"bad_one":["Lorem Ipsum", "consectetur" comment" ], "good_one":[ "elit", "sed", "tempor"]}')
@@ -22,7 +22,6 @@ def test_parse_object():
22
22
 
23
23
  def test_parse_object_edge_cases():
24
24
  assert repair_json("{foo: [}") == '{"foo": []}'
25
- assert repair_json("{ ") == "{}"
26
25
  assert repair_json('{"": "value"') == '{"": "value"}'
27
26
  assert repair_json('{"value_1": true, COMMENT "value_2": "data"}') == '{"value_1": true, "value_2": "data"}'
28
27
  assert (
@@ -83,3 +82,17 @@ def test_parse_object_edge_cases():
83
82
  == '{"key": "{\\"key\\":[\\"value\\"],\\"key2\\":\\"value2\\"}"}'
84
83
  )
85
84
  assert repair_json('{"key": , "key2": "value2"}') == '{"key": "", "key2": "value2"}'
85
+
86
+
87
+ def test_parse_object_merge_at_the_end():
88
+ assert repair_json('{"key": "value"}, "key2": "value2"}') == '{"key": "value", "key2": "value2"}'
89
+ assert repair_json('{"key": "value"}, "key2": }') == '{"key": "value", "key2": ""}'
90
+ assert repair_json('{"key": "value"}, []') == '[{"key": "value"}, []]'
91
+ assert repair_json('{"key": "value"}, ["abc"]') == '[{"key": "value"}, ["abc"]]'
92
+ assert repair_json('{"key": "value"}, {}') == '[{"key": "value"}, {}]'
93
+ assert repair_json('{"key": "value"}, "" : "value2"}') == '{"key": "value", "": "value2"}'
94
+ assert repair_json('{"key": "value"}, "key2" "value2"}') == '{"key": "value", "key2": "value2"}'
95
+ assert (
96
+ repair_json('{"key1": "value1"}, "key2": "value2", "key3": "value3"}')
97
+ == '{"key1": "value1", "key2": "value2", "key3": "value3"}'
98
+ )
@@ -1,9 +1,12 @@
1
1
  import os.path
2
2
  import pathlib
3
3
 
4
+ import pytest
5
+
4
6
  from src.json_repair import repair_json
5
7
 
6
8
  path = pathlib.Path(__file__).parent.resolve()
9
+ CI = os.getenv("CI") is not None
7
10
 
8
11
  with open(os.path.join(path, "valid.json")) as fd:
9
12
  correct_json = fd.read()
@@ -12,6 +15,7 @@ with open(os.path.join(path, "invalid.json")) as fd:
12
15
  incorrect_json = fd.read()
13
16
 
14
17
 
18
+ @pytest.mark.skipif(CI, reason="Performance tests are skipped in CI")
15
19
  def test_true_true_correct(benchmark):
16
20
  benchmark(repair_json, correct_json, return_objects=True, skip_json_loads=True)
17
21
 
@@ -25,6 +29,7 @@ def test_true_true_correct(benchmark):
25
29
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
26
30
 
27
31
 
32
+ @pytest.mark.skipif(CI, reason="Performance tests are skipped in CI")
28
33
  def test_true_true_incorrect(benchmark):
29
34
  benchmark(repair_json, incorrect_json, return_objects=True, skip_json_loads=True)
30
35
 
@@ -38,6 +43,7 @@ def test_true_true_incorrect(benchmark):
38
43
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
39
44
 
40
45
 
46
+ @pytest.mark.skipif(CI, reason="Performance tests are skipped in CI")
41
47
  def test_true_false_correct(benchmark):
42
48
  benchmark(repair_json, correct_json, return_objects=True, skip_json_loads=False)
43
49
  # Retrieve the median execution time
@@ -50,6 +56,7 @@ def test_true_false_correct(benchmark):
50
56
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
51
57
 
52
58
 
59
+ @pytest.mark.skipif(CI, reason="Performance tests are skipped in CI")
53
60
  def test_true_false_incorrect(benchmark):
54
61
  benchmark(repair_json, incorrect_json, return_objects=True, skip_json_loads=False)
55
62
  # Retrieve the median execution time
@@ -62,6 +69,7 @@ def test_true_false_incorrect(benchmark):
62
69
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
63
70
 
64
71
 
72
+ @pytest.mark.skipif(CI, reason="Performance tests are skipped in CI")
65
73
  def test_false_true_correct(benchmark):
66
74
  benchmark(repair_json, correct_json, return_objects=False, skip_json_loads=True)
67
75
  # Retrieve the median execution time
@@ -74,6 +82,7 @@ def test_false_true_correct(benchmark):
74
82
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
75
83
 
76
84
 
85
+ @pytest.mark.skipif(CI, reason="Performance tests are skipped in CI")
77
86
  def test_false_true_incorrect(benchmark):
78
87
  benchmark(repair_json, incorrect_json, return_objects=False, skip_json_loads=True)
79
88
  # Retrieve the median execution time
@@ -86,6 +95,7 @@ def test_false_true_incorrect(benchmark):
86
95
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
87
96
 
88
97
 
98
+ @pytest.mark.skipif(CI, reason="Performance tests are skipped in CI")
89
99
  def test_false_false_correct(benchmark):
90
100
  benchmark(repair_json, correct_json, return_objects=False, skip_json_loads=False)
91
101
  # Retrieve the median execution time
@@ -98,6 +108,7 @@ def test_false_false_correct(benchmark):
98
108
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
99
109
 
100
110
 
111
+ @pytest.mark.skipif(CI, reason="Performance tests are skipped in CI")
101
112
  def test_false_false_incorrect(benchmark):
102
113
  benchmark(repair_json, incorrect_json, return_objects=False, skip_json_loads=False)
103
114
  # Retrieve the median execution time
File without changes
File without changes