json-repair 0.8.1__tar.gz → 0.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.8.1
3
+ Version: 0.10.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
3
3
  build-backend = "setuptools.build_meta"
4
4
  [project]
5
5
  name = "json_repair"
6
- version = "0.8.1"
6
+ version = "0.10.0"
7
7
  license = {file = "LICENSE"}
8
8
  authors = [
9
9
  { name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
@@ -61,7 +61,9 @@ class JSONParser:
61
61
  elif char == '"':
62
62
  return self.parse_string()
63
63
  elif char == "'":
64
- return self.parse_string(use_single_quotes=True)
64
+ return self.parse_string(string_quotes="'")
65
+ elif char == "“":
66
+ return self.parse_string(string_quotes=["“", "”"])
65
67
  # <number> starts with [0-9] or minus
66
68
  elif char.isdigit() or char == "-":
67
69
  return self.parse_number()
@@ -102,9 +104,7 @@ class JSONParser:
102
104
  # <member> starts with a <string>
103
105
  key = ""
104
106
  while key == "" and self.get_char_at():
105
- key = self.parse_string(
106
- use_single_quotes=(self.json_str[self.index] == "'")
107
- )
107
+ key = self.parse_json()
108
108
 
109
109
  # This can happen sometimes like { "": "value" }
110
110
  if key == "" and self.get_char_at() == ":":
@@ -112,7 +112,7 @@ class JSONParser:
112
112
  break
113
113
 
114
114
  # We reached the end here
115
- if key == "}":
115
+ if (self.get_char_at() or "}") == "}":
116
116
  continue
117
117
 
118
118
  # An extreme case of missing ":" after a key
@@ -170,19 +170,22 @@ class JSONParser:
170
170
  self.index += 1
171
171
  return arr
172
172
 
173
- def parse_string(self, use_single_quotes=False) -> str:
173
+ def parse_string(self, string_quotes=False) -> str:
174
174
  # <string> is a string of valid characters enclosed in quotes
175
175
  # i.e. { name: "John" }
176
176
  # Somehow all weird cases in an invalid JSON happen to be resolved in this function, so be careful here
177
177
 
178
178
  # Flag to manage corner cases related to missing starting quote
179
179
  fixed_quotes = False
180
- string_terminator = '"'
181
- if use_single_quotes:
182
- string_terminator = "'"
180
+ lstring_delimiter = rstring_delimiter = '"'
181
+ if isinstance(string_quotes, list):
182
+ lstring_delimiter = string_quotes[0]
183
+ rstring_delimiter = string_quotes[1]
184
+ elif isinstance(string_quotes, str):
185
+ lstring_delimiter = rstring_delimiter = string_quotes
183
186
  char = self.get_char_at()
184
- if char != string_terminator:
185
- self.insert_char_at(string_terminator)
187
+ if char != lstring_delimiter:
188
+ self.insert_char_at(lstring_delimiter)
186
189
  fixed_quotes = True
187
190
  else:
188
191
  self.index += 1
@@ -198,7 +201,7 @@ class JSONParser:
198
201
  # * If we are fixing missing quotes in an object, when it finds the special terminators
199
202
  char = self.get_char_at()
200
203
  fix_broken_markdown_link = False
201
- while char and char != string_terminator:
204
+ while char and char != rstring_delimiter:
202
205
  if fixed_quotes:
203
206
  if self.context == "object_key" and (char == ":" or char.isspace()):
204
207
  break
@@ -208,7 +211,7 @@ class JSONParser:
208
211
  char = self.get_char_at()
209
212
  # ChatGPT sometimes forget to quote links in markdown like: { "content": "[LINK]("https://google.com")" }
210
213
  if (
211
- char == string_terminator
214
+ (char == lstring_delimiter or char == rstring_delimiter)
212
215
  # Next character is not a comma
213
216
  and self.get_char_at(1) != ","
214
217
  and (
@@ -228,8 +231,8 @@ class JSONParser:
228
231
  end = self.index
229
232
 
230
233
  # A fallout of the previous special case in the while loop, we need to update the index only if we had a closing quote
231
- if char != string_terminator:
232
- self.insert_char_at(string_terminator)
234
+ if char != rstring_delimiter:
235
+ self.insert_char_at(rstring_delimiter)
233
236
  else:
234
237
  self.index += 1
235
238
 
@@ -245,13 +248,16 @@ class JSONParser:
245
248
  self.index += 1
246
249
  char = self.get_char_at()
247
250
  if number_str:
248
- if "." in number_str or "e" in number_str or "E" in number_str:
249
- return float(number_str)
250
- elif number_str == "-":
251
- # If there is a stray "-" this will throw an exception, throw away this character
252
- return self.parse_json()
253
- else:
254
- return int(number_str)
251
+ try:
252
+ if "." in number_str or "e" in number_str or "E" in number_str:
253
+ return float(number_str)
254
+ elif number_str == "-":
255
+ # If there is a stray "-" this will throw an exception, throw away this character
256
+ return self.parse_json()
257
+ else:
258
+ return int(number_str)
259
+ except ValueError:
260
+ return number_str
255
261
  else:
256
262
  # This is a string then
257
263
  return self.parse_string()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.8.1
3
+ Version: 0.10.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -50,12 +50,13 @@ def test_repair_json():
50
50
  assert repair_json("[[1\n\n]") == "[[1]]"
51
51
  assert repair_json("{") == "{}"
52
52
  assert repair_json("}") == '""'
53
- assert repair_json('{"') == '{"": ""}'
53
+ assert repair_json('{"') == '{}'
54
54
  assert repair_json('["') == '[]'
55
55
  assert repair_json("'\"'") == '"\\\""'
56
56
  assert repair_json("'string\"") == '"string\\\""'
57
57
  assert repair_json('{foo: [}') == '{"foo": []}'
58
58
  assert repair_json('{"key": "value:value"}') == '{"key": "value:value"}'
59
+ assert repair_json('{“slanted_delimiter”: "value"}') == '{"slanted_delimiter": "value"}'
59
60
  assert (
60
61
  repair_json('{"name": "John", "age": 30, "city": "New')
61
62
  == '{"name": "John", "age": 30, "city": "New"}'
@@ -191,6 +192,10 @@ def test_repair_json_corner_cases_generate_by_gpt():
191
192
  # Test with null values
192
193
  assert repair_json('{"key": null}') == '{"key": null}'
193
194
 
195
+ # Test with numeric-like values
196
+ assert repair_json('{"key": 10-20}') == '{"key": "10-20"}'
197
+ assert repair_json('{"key": 1.1.1}') == '{"key": "1.1.1"}'
198
+
194
199
 
195
200
  def test_repair_json_corner_cases_generate_by_gpt_with_objects():
196
201
  # Test with nested JSON
@@ -221,6 +226,10 @@ def test_repair_json_corner_cases_generate_by_gpt_with_objects():
221
226
  # Test with null values
222
227
  assert repair_json('{"key": null}', True) == {"key": None}
223
228
 
229
+ # Test with numeric-like values
230
+ assert repair_json('{"key": 10-20}', True) == {"key": "10-20"}
231
+ assert repair_json('{"key": 1.1.1}', True) == {"key": "1.1.1"}
232
+
224
233
  def test_repair_json_skip_json_loads():
225
234
  assert repair_json('{"key": true, "key2": false, "key3": null}', skip_json_loads=True) == '{"key": true, "key2": false, "key3": null}'
226
235
  assert repair_json('{"key": true, "key2": false, "key3": null}', return_objects=True, skip_json_loads=True) == {"key": True, "key2": False, "key3": None}
@@ -624,7 +624,7 @@ def test_false_false_correct(benchmark):
624
624
  mean_time = benchmark.stats.get("median")
625
625
 
626
626
  # Define your time threshold in seconds
627
- max_time = 54 / 10 ** 6 # 54 microsecond
627
+ max_time = 55 / 10 ** 6 # 55 microsecond
628
628
 
629
629
  # Assert that the average time is below the threshold
630
630
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
File without changes
File without changes
File without changes