json-repair 0.23.1__tar.gz → 0.24.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.23.1
3
+ Version: 0.24.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -45,6 +45,11 @@ If you find this library useful, you can help me by donating toward my monthly b
45
45
 
46
46
  ---
47
47
 
48
+ # Demo
49
+ If you are unsure if this library will fix your specific problem, or simply want your json validated online, you can visit the demo site on GitHub pages: https://mangiucugna.github.io/json_repair/
50
+
51
+ ---
52
+
48
53
  # Motivation
49
54
  Some LLMs are a bit iffy when it comes to returning well formed JSON data, sometimes they skip a parentheses and sometimes they add some words in it, because that's what an LLM does.
50
55
  Luckily, the mistakes LLMs make are simple enough to be fixed without destroying the content.
@@ -160,6 +165,7 @@ You will need owner access to this repository
160
165
  # Repair JSON in other programming languages
161
166
  - Typescript: https://github.com/josdejong/jsonrepair
162
167
  - Go: https://github.com/RealAlexandreAI/json-repair
168
+ - Ruby: https://github.com/sashazykov/json-repair-rb
163
169
  ---
164
170
  ## Star History
165
171
 
@@ -8,6 +8,11 @@ If you find this library useful, you can help me by donating toward my monthly b
8
8
 
9
9
  ---
10
10
 
11
+ # Demo
12
+ If you are unsure if this library will fix your specific problem, or simply want your json validated online, you can visit the demo site on GitHub pages: https://mangiucugna.github.io/json_repair/
13
+
14
+ ---
15
+
11
16
  # Motivation
12
17
  Some LLMs are a bit iffy when it comes to returning well formed JSON data, sometimes they skip a parentheses and sometimes they add some words in it, because that's what an LLM does.
13
18
  Luckily, the mistakes LLMs make are simple enough to be fixed without destroying the content.
@@ -123,6 +128,7 @@ You will need owner access to this repository
123
128
  # Repair JSON in other programming languages
124
129
  - Typescript: https://github.com/josdejong/jsonrepair
125
130
  - Go: https://github.com/RealAlexandreAI/json-repair
131
+ - Ruby: https://github.com/sashazykov/json-repair-rb
126
132
  ---
127
133
  ## Star History
128
134
 
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
3
3
  build-backend = "setuptools.build_meta"
4
4
  [project]
5
5
  name = "json_repair"
6
- version = "0.23.1"
6
+ version = "0.24.0"
7
7
  license = {file = "LICENSE"}
8
8
  authors = [
9
9
  { name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
@@ -91,6 +91,10 @@ class JSONParser:
91
91
  ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
92
92
  json = self.parse_json()
93
93
  if self.index < len(self.json_str):
94
+ self.log(
95
+ "The parser returned early, checking if there's more json elements",
96
+ "info",
97
+ )
94
98
  json = [json]
95
99
  last_index = self.index
96
100
  while self.index < len(self.json_str):
@@ -100,10 +104,13 @@ class JSONParser:
100
104
  if self.index == last_index:
101
105
  self.index += 1
102
106
  last_index = self.index
107
+ # If nothing extra was found, don't return an array
103
108
  if len(json) == 1:
109
+ self.log(
110
+ "There were no more elements, returning the element without the array",
111
+ "info",
112
+ )
104
113
  json = json[0]
105
- elif len(json) == 0:
106
- json = ""
107
114
  if self.logger.log_level == "none":
108
115
  return json
109
116
  else:
@@ -365,7 +372,24 @@ class JSONParser:
365
372
  ):
366
373
  break
367
374
  elif self.get_context() == "object_value" and char in [",", "}"]:
368
- break
375
+ rstring_delimiter_missing = True
376
+ # check if this is a case in which the closing comma is NOT missing instead
377
+ i = 1
378
+ next_c = self.get_char_at(i)
379
+ while next_c and next_c != rstring_delimiter:
380
+ i += 1
381
+ next_c = self.get_char_at(i)
382
+ if next_c:
383
+ i += 1
384
+ next_c = self.get_char_at(i)
385
+ # found a delimiter, now we need to check that is followed strictly by a comma or brace
386
+ while next_c and next_c.isspace():
387
+ i += 1
388
+ next_c = self.get_char_at(i)
389
+ if next_c and next_c in [",", "}"]:
390
+ rstring_delimiter_missing = False
391
+ if rstring_delimiter_missing:
392
+ break
369
393
  string_acc += char
370
394
  self.index += 1
371
395
  char = self.get_char_at()
@@ -496,7 +520,8 @@ class JSONParser:
496
520
  number_str = ""
497
521
  number_chars = set("0123456789-.eE/,")
498
522
  char = self.get_char_at()
499
- while char and char in number_chars:
523
+ is_array = self.get_context() == "array"
524
+ while char and char in number_chars and (char != "," or not is_array):
500
525
  number_str += char
501
526
  self.index += 1
502
527
  char = self.get_char_at()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.23.1
3
+ Version: 0.24.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -45,6 +45,11 @@ If you find this library useful, you can help me by donating toward my monthly b
45
45
 
46
46
  ---
47
47
 
48
+ # Demo
49
+ If you are unsure if this library will fix your specific problem, or simply want your json validated online, you can visit the demo site on GitHub pages: https://mangiucugna.github.io/json_repair/
50
+
51
+ ---
52
+
48
53
  # Motivation
49
54
  Some LLMs are a bit iffy when it comes to returning well formed JSON data, sometimes they skip a parentheses and sometimes they add some words in it, because that's what an LLM does.
50
55
  Luckily, the mistakes LLMs make are simple enough to be fixed without destroying the content.
@@ -160,6 +165,7 @@ You will need owner access to this repository
160
165
  # Repair JSON in other programming languages
161
166
  - Typescript: https://github.com/josdejong/jsonrepair
162
167
  - Go: https://github.com/RealAlexandreAI/json-repair
168
+ - Ruby: https://github.com/sashazykov/json-repair-rb
163
169
  ---
164
170
  ## Star History
165
171
 
@@ -141,6 +141,7 @@ def test_object_edge_cases():
141
141
  assert repair_json('''{ "a" : "{ b": {} }" }''') == '{"a": "{ b"}'
142
142
  assert repair_json("""{"b": "xxxxx" true}""") == '{"b": "xxxxx"}'
143
143
  assert repair_json('{"key": "Lorem "ipsum" s,"}') == '{"key": "Lorem \\"ipsum\\" s,"}'
144
+ assert repair_json('{"lorem": ipsum, sic, datum.",}') == '{"lorem": "ipsum, sic, datum."}'
144
145
 
145
146
  def test_number_edge_cases():
146
147
  assert repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'
@@ -148,7 +149,7 @@ def test_number_edge_cases():
148
149
  assert repair_json('{"key": .25}') == '{"key": 0.25}'
149
150
  assert repair_json('{"here": "now", "key": 1/3, "foo": "bar"}') == '{"here": "now", "key": "1/3", "foo": "bar"}'
150
151
  assert repair_json('{"key": 12345/67890}') == '{"key": "12345/67890"}'
151
- assert repair_json('[105,12') == '["105,12"]'
152
+ assert repair_json('[105,12') == '[105, 12]'
152
153
  assert repair_json('{"key", 105,12,') == '{"key": "105,12"}'
153
154
  assert repair_json('{"key": 1/3, "foo": "bar"}') == '{"key": "1/3", "foo": "bar"}'
154
155
  assert repair_json('{"key": 10-20}') == '{"key": "10-20"}'
@@ -171,8 +172,8 @@ def test_leading_trailing_characters():
171
172
  def test_multiple_jsons():
172
173
  assert repair_json("[]{}") == "[[], {}]"
173
174
  assert repair_json("{}[]{}") == "[{}, [], {}]"
174
- assert repair_json('{"key":"value"}[1,2,3,True]') == '[{"key": "value"}, ["1,2,3", true]]'
175
- assert repair_json('lorem ```json {"key":"value"} ``` ipsum ```json [1,2,3,True] ``` 42') == '[{"key": "value"}, ["1,2,3", true]]'
175
+ assert repair_json('{"key":"value"}[1,2,3,True]') == '[{"key": "value"}, [1, 2, 3, true]]'
176
+ assert repair_json('lorem ```json {"key":"value"} ``` ipsum ```json [1,2,3,True] ``` 42') == '[{"key": "value"}, [1, 2, 3, true]]'
176
177
 
177
178
  def test_repair_json_with_objects():
178
179
  # Test with valid JSON strings
@@ -97,7 +97,7 @@ def test_false_false_incorrect(benchmark):
97
97
  mean_time = benchmark.stats.get("median")
98
98
 
99
99
  # Define your time threshold in seconds
100
- max_time = 14 / 10 ** 4 # 1.4 millisecond
100
+ max_time = 15 / 10 ** 4 # 1.5 millisecond
101
101
 
102
102
  # Assert that the average time is below the threshold
103
103
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
File without changes
File without changes