json-repair 0.23.1__tar.gz → 0.25.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.23.1
3
+ Version: 0.25.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -45,6 +45,11 @@ If you find this library useful, you can help me by donating toward my monthly b
45
45
 
46
46
  ---
47
47
 
48
+ # Demo
49
+ If you are unsure if this library will fix your specific problem, or simply want your json validated online, you can visit the demo site on GitHub pages: https://mangiucugna.github.io/json_repair/
50
+
51
+ ---
52
+
48
53
  # Motivation
49
54
  Some LLMs are a bit iffy when it comes to returning well formed JSON data, sometimes they skip a parentheses and sometimes they add some words in it, because that's what an LLM does.
50
55
  Luckily, the mistakes LLMs make are simple enough to be fixed without destroying the content.
@@ -160,6 +165,7 @@ You will need owner access to this repository
160
165
  # Repair JSON in other programming languages
161
166
  - Typescript: https://github.com/josdejong/jsonrepair
162
167
  - Go: https://github.com/RealAlexandreAI/json-repair
168
+ - Ruby: https://github.com/sashazykov/json-repair-rb
163
169
  ---
164
170
  ## Star History
165
171
 
@@ -8,6 +8,11 @@ If you find this library useful, you can help me by donating toward my monthly b
8
8
 
9
9
  ---
10
10
 
11
+ # Demo
12
+ If you are unsure if this library will fix your specific problem, or simply want your json validated online, you can visit the demo site on GitHub pages: https://mangiucugna.github.io/json_repair/
13
+
14
+ ---
15
+
11
16
  # Motivation
12
17
  Some LLMs are a bit iffy when it comes to returning well formed JSON data, sometimes they skip a parentheses and sometimes they add some words in it, because that's what an LLM does.
13
18
  Luckily, the mistakes LLMs make are simple enough to be fixed without destroying the content.
@@ -123,6 +128,7 @@ You will need owner access to this repository
123
128
  # Repair JSON in other programming languages
124
129
  - Typescript: https://github.com/josdejong/jsonrepair
125
130
  - Go: https://github.com/RealAlexandreAI/json-repair
131
+ - Ruby: https://github.com/sashazykov/json-repair-rb
126
132
  ---
127
133
  ## Star History
128
134
 
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
3
3
  build-backend = "setuptools.build_meta"
4
4
  [project]
5
5
  name = "json_repair"
6
- version = "0.23.1"
6
+ version = "0.25.0"
7
7
  license = {file = "LICENSE"}
8
8
  authors = [
9
9
  { name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
@@ -91,6 +91,10 @@ class JSONParser:
91
91
  ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
92
92
  json = self.parse_json()
93
93
  if self.index < len(self.json_str):
94
+ self.log(
95
+ "The parser returned early, checking if there's more json elements",
96
+ "info",
97
+ )
94
98
  json = [json]
95
99
  last_index = self.index
96
100
  while self.index < len(self.json_str):
@@ -100,10 +104,13 @@ class JSONParser:
100
104
  if self.index == last_index:
101
105
  self.index += 1
102
106
  last_index = self.index
107
+ # If nothing extra was found, don't return an array
103
108
  if len(json) == 1:
109
+ self.log(
110
+ "There were no more elements, returning the element without the array",
111
+ "info",
112
+ )
104
113
  json = json[0]
105
- elif len(json) == 0:
106
- json = ""
107
114
  if self.logger.log_level == "none":
108
115
  return json
109
116
  else:
@@ -363,9 +370,34 @@ class JSONParser:
363
370
  if self.get_context() == "object_key" and (
364
371
  char == ":" or char.isspace()
365
372
  ):
373
+ self.log(
374
+ "While parsing a string missing the left delimiter in object key context, we found a :, stopping here",
375
+ "info",
376
+ )
366
377
  break
367
378
  elif self.get_context() == "object_value" and char in [",", "}"]:
368
- break
379
+ rstring_delimiter_missing = True
380
+ # check if this is a case in which the closing comma is NOT missing instead
381
+ i = 1
382
+ next_c = self.get_char_at(i)
383
+ while next_c and next_c != rstring_delimiter:
384
+ i += 1
385
+ next_c = self.get_char_at(i)
386
+ if next_c:
387
+ i += 1
388
+ next_c = self.get_char_at(i)
389
+ # found a delimiter, now we need to check that is followed strictly by a comma or brace
390
+ while next_c and next_c.isspace():
391
+ i += 1
392
+ next_c = self.get_char_at(i)
393
+ if next_c and next_c in [",", "}"]:
394
+ rstring_delimiter_missing = False
395
+ if rstring_delimiter_missing:
396
+ self.log(
397
+ "While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn't determine that a right delimiter was present. Stopping here",
398
+ "info",
399
+ )
400
+ break
369
401
  string_acc += char
370
402
  self.index += 1
371
403
  char = self.get_char_at()
@@ -386,6 +418,33 @@ class JSONParser:
386
418
  "While parsing a string, we found a doubled quote, ignoring it",
387
419
  "info",
388
420
  )
421
+ elif missing_quotes and self.get_context() == "object_value":
422
+ # In case of missing starting quote I need to check if the delimeter is the end or the beginning of a key
423
+ i = 1
424
+ next_c = self.get_char_at(i)
425
+ while next_c and next_c not in [
426
+ rstring_delimiter,
427
+ lstring_delimiter,
428
+ ]:
429
+ i += 1
430
+ next_c = self.get_char_at(i)
431
+ if next_c:
432
+ # We found a quote, now let's make sure there's a ":" following
433
+ i += 1
434
+ next_c = self.get_char_at(i)
435
+ # found a delimiter, now we need to check that is followed strictly by a comma or brace
436
+ while next_c and next_c.isspace():
437
+ i += 1
438
+ next_c = self.get_char_at(i)
439
+ if next_c and next_c == ":":
440
+ # Reset the cursor
441
+ self.index -= 1
442
+ char = self.get_char_at()
443
+ self.log(
444
+ "In a string with missing quotes and object value context, I found a delimeter but it turns out it was the beginning on the next key. Stopping here.",
445
+ "info",
446
+ )
447
+ break
389
448
  else:
390
449
  # Check if eventually there is a rstring delimiter, otherwise we bail
391
450
  i = 1
@@ -496,7 +555,8 @@ class JSONParser:
496
555
  number_str = ""
497
556
  number_chars = set("0123456789-.eE/,")
498
557
  char = self.get_char_at()
499
- while char and char in number_chars:
558
+ is_array = self.get_context() == "array"
559
+ while char and char in number_chars and (char != "," or not is_array):
500
560
  number_str += char
501
561
  self.index += 1
502
562
  char = self.get_char_at()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.23.1
3
+ Version: 0.25.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -45,6 +45,11 @@ If you find this library useful, you can help me by donating toward my monthly b
45
45
 
46
46
  ---
47
47
 
48
+ # Demo
49
+ If you are unsure if this library will fix your specific problem, or simply want your json validated online, you can visit the demo site on GitHub pages: https://mangiucugna.github.io/json_repair/
50
+
51
+ ---
52
+
48
53
  # Motivation
49
54
  Some LLMs are a bit iffy when it comes to returning well formed JSON data, sometimes they skip a parentheses and sometimes they add some words in it, because that's what an LLM does.
50
55
  Luckily, the mistakes LLMs make are simple enough to be fixed without destroying the content.
@@ -160,6 +165,7 @@ You will need owner access to this repository
160
165
  # Repair JSON in other programming languages
161
166
  - Typescript: https://github.com/josdejong/jsonrepair
162
167
  - Go: https://github.com/RealAlexandreAI/json-repair
168
+ - Ruby: https://github.com/sashazykov/json-repair-rb
163
169
  ---
164
170
  ## Star History
165
171
 
@@ -141,6 +141,8 @@ def test_object_edge_cases():
141
141
  assert repair_json('''{ "a" : "{ b": {} }" }''') == '{"a": "{ b"}'
142
142
  assert repair_json("""{"b": "xxxxx" true}""") == '{"b": "xxxxx"}'
143
143
  assert repair_json('{"key": "Lorem "ipsum" s,"}') == '{"key": "Lorem \\"ipsum\\" s,"}'
144
+ assert repair_json('{"lorem": ipsum, sic, datum.",}') == '{"lorem": "ipsum, sic, datum."}'
145
+ assert repair_json('{"lorem": sic tamet. "ipsum": sic tamet, quick brown fox. "sic": ipsum}') == '{"lorem": "sic tamet.", "ipsum": "sic tamet", "sic": "ipsum"}'
144
146
 
145
147
  def test_number_edge_cases():
146
148
  assert repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'
@@ -148,7 +150,7 @@ def test_number_edge_cases():
148
150
  assert repair_json('{"key": .25}') == '{"key": 0.25}'
149
151
  assert repair_json('{"here": "now", "key": 1/3, "foo": "bar"}') == '{"here": "now", "key": "1/3", "foo": "bar"}'
150
152
  assert repair_json('{"key": 12345/67890}') == '{"key": "12345/67890"}'
151
- assert repair_json('[105,12') == '["105,12"]'
153
+ assert repair_json('[105,12') == '[105, 12]'
152
154
  assert repair_json('{"key", 105,12,') == '{"key": "105,12"}'
153
155
  assert repair_json('{"key": 1/3, "foo": "bar"}') == '{"key": "1/3", "foo": "bar"}'
154
156
  assert repair_json('{"key": 10-20}') == '{"key": "10-20"}'
@@ -171,8 +173,8 @@ def test_leading_trailing_characters():
171
173
  def test_multiple_jsons():
172
174
  assert repair_json("[]{}") == "[[], {}]"
173
175
  assert repair_json("{}[]{}") == "[{}, [], {}]"
174
- assert repair_json('{"key":"value"}[1,2,3,True]') == '[{"key": "value"}, ["1,2,3", true]]'
175
- assert repair_json('lorem ```json {"key":"value"} ``` ipsum ```json [1,2,3,True] ``` 42') == '[{"key": "value"}, ["1,2,3", true]]'
176
+ assert repair_json('{"key":"value"}[1,2,3,True]') == '[{"key": "value"}, [1, 2, 3, true]]'
177
+ assert repair_json('lorem ```json {"key":"value"} ``` ipsum ```json [1,2,3,True] ``` 42') == '[{"key": "value"}, [1, 2, 3, true]]'
176
178
 
177
179
  def test_repair_json_with_objects():
178
180
  # Test with valid JSON strings
@@ -242,7 +244,7 @@ def test_repair_json_from_file():
242
244
  # Write content to the temporary file
243
245
  with os.fdopen(temp_fd, 'w') as tmp:
244
246
  tmp.write("{key:value}")
245
- assert(from_file(temp_path, logging=True)) == ({'key': 'value'}, [{'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'text': 'While parsing a string, we found no starting quote. Will add the quote back', 'context': '{key:value}'}, {'text': 'While parsing a string, we missed the closing quote, ignoring', 'context': '{key:value}'}, {'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'text': 'While parsing a string, we found no starting quote. Will add the quote back', 'context': '{key:value}'}, {'text': 'While parsing a string, we missed the closing quote, ignoring', 'context': '{key:value}'}])
247
+ assert(from_file(temp_path, logging=True)) == ({'key': 'value'}, [{'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'text': 'While parsing a string, we found no starting quote. Will add the quote back', 'context': '{key:value}'}, {'context': '{key:value}', 'text': 'While parsing a string missing the left delimiter in object key context, we found a :, stopping here',}, {'text': 'While parsing a string, we missed the closing quote, ignoring', 'context': '{key:value}'}, {'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'text': 'While parsing a string, we found no starting quote. Will add the quote back', 'context': '{key:value}'}, {'context': '{key:value}', 'text': 'While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn\'t determine that a right delimiter was present. Stopping here'}, {'text': 'While parsing a string, we missed the closing quote, ignoring', 'context': '{key:value}'}])
246
248
  finally:
247
249
  # Clean up - delete the temporary file
248
250
  os.remove(temp_path)
@@ -97,7 +97,7 @@ def test_false_false_incorrect(benchmark):
97
97
  mean_time = benchmark.stats.get("median")
98
98
 
99
99
  # Define your time threshold in seconds
100
- max_time = 14 / 10 ** 4 # 1.4 millisecond
100
+ max_time = 15 / 10 ** 4 # 1.5 millisecond
101
101
 
102
102
  # Assert that the average time is below the threshold
103
103
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
File without changes
File without changes