json-repair 0.6.1__tar.gz → 0.6.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.6.1
3
+ Version: 0.6.2
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
3
3
  build-backend = "setuptools.build_meta"
4
4
  [project]
5
5
  name = "json_repair"
6
- version = "0.6.1"
6
+ version = "0.6.2"
7
7
  license = {file = "LICENSE"}
8
8
  authors = [
9
9
  { name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
@@ -207,17 +207,18 @@ class JSONParser:
207
207
  if fixed_quotes:
208
208
  if self.context == "object_key" and (char == ":" or char.isspace()):
209
209
  break
210
- elif self.context == "object_value" and (char == "," or char == "}"):
210
+ elif self.context == "object_value" and char in [",", "}"]:
211
211
  break
212
212
  self.index += 1
213
213
  char = self.get_char_at()
214
214
  # ChatGPT sometimes forget to quote links in markdown like: { "content": "[LINK]("https://google.com")" }
215
215
  if (
216
216
  char == string_terminator
217
- and self.get_next_char() != ","
217
+ # Next character is not a comma
218
+ and self.get_char_at(1) != ","
218
219
  and (
219
220
  fix_broken_markdown_link
220
- or (self.get_prev_char(2) + self.get_prev_char()) == "]("
221
+ or (self.get_char_at(-2) == "]" and self.get_char_at(-1)) == "("
221
222
  )
222
223
  ):
223
224
  fix_broken_markdown_link = not fix_broken_markdown_link
@@ -239,7 +240,7 @@ class JSONParser:
239
240
 
240
241
  return self.json_str[start:end]
241
242
 
242
- def parse_number(self) -> Union[float, int]:
243
+ def parse_number(self) -> Union[float, int, str]:
243
244
  # <number> is a valid real number expressed in one of a number of given formats
244
245
  number_str = ""
245
246
  number_chars = set("0123456789-.eE")
@@ -257,7 +258,7 @@ class JSONParser:
257
258
  # This is a string then
258
259
  return self.parse_string()
259
260
 
260
- def parse_boolean_or_null(self) -> Union[bool, None]:
261
+ def parse_boolean_or_null(self) -> Union[bool, str, None]:
261
262
  # <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
262
263
  boolean_map = {"true": (True, 4), "false": (False, 5), "null": (None, 4)}
263
264
  for key, (value, length) in boolean_map.items():
@@ -272,21 +273,7 @@ class JSONParser:
272
273
  self.json_str = self.json_str[: self.index] + char + self.json_str[self.index :]
273
274
  self.index += 1
274
275
 
275
- def get_char_at(self) -> Union[str, bool]:
276
- # Why not use something simpler? Because we might be out of bounds and doing this check all the time is annoying
277
- try:
278
- return self.json_str[self.index]
279
- except IndexError:
280
- return False
281
-
282
- def get_prev_char(self, count=1):
283
- # Why not use something simpler? Because we might be out of bounds and doing this check all the time is annoying
284
- try:
285
- return self.json_str[self.index - count]
286
- except IndexError:
287
- return False
288
-
289
- def get_next_char(self, count=1):
276
+ def get_char_at(self, count: int = 0) -> Union[str, bool]:
290
277
  # Why not use something simpler? Because we might be out of bounds and doing this check all the time is annoying
291
278
  try:
292
279
  return self.json_str[self.index + count]
@@ -320,8 +307,7 @@ def repair_json(
320
307
  It will return the fixed string by default.
321
308
  When `return_objects=True` is passed, it will return the decoded data structure instead.
322
309
  """
323
- json_str = re.sub(r"^\s+", "", json_str)
324
- json_str = re.sub(r"\s+$", "", json_str)
310
+ json_str = json_str.strip()
325
311
  json_str = re.sub(r"/\*.*?\*/", "", json_str)
326
312
  parser = JSONParser(json_str)
327
313
  if skip_json_loads:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.6.1
3
+ Version: 0.6.2
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -7,8 +7,9 @@ def test_repair_json():
7
7
  assert repair_json("{}") == "{}"
8
8
  assert repair_json("\"") == '""'
9
9
  assert repair_json("\n") == '""'
10
- assert repair_json('{"key": true, "key2": false, "key3": null}') == '{"key": true, "key2": false, "key3": null}'
11
- assert repair_json('{"key": TRUE, "key2": FALSE, "key3": Null}') == '{"key": true, "key2": false, "key3": null}'
10
+ assert repair_json(" /* COMMENT */ ") == '""'
11
+ assert repair_json(' {"key": true, "key2": false, "key3": null}') == '{"key": true, "key2": false, "key3": null}'
12
+ assert repair_json('{"key": TRUE, "key2": FALSE, "key3": Null} ') == '{"key": true, "key2": false, "key3": null}'
12
13
  assert repair_json("{'key': 'string', 'key2': false, \"key3\": null, \"key4\": unquoted}") == '{"key": "string", "key2": false, "key3": null, "key4": "unquoted"}'
13
14
  assert (
14
15
  repair_json('{"name": "John", "age": 30, "city": "New York"}')
@@ -282,7 +282,7 @@ def test_true_true(benchmark):
282
282
  mean_time = benchmark.stats.get("median")
283
283
 
284
284
  # Define your time threshold in seconds (100ms in this case)
285
- max_time = 1.1 / 10 ** 6 # 1.1 microsecond
285
+ max_time = 0.7 / 10 ** 6 # 0.7 microsecond
286
286
 
287
287
  # Assert that the average time is below the threshold
288
288
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -293,7 +293,7 @@ def test_true_false(benchmark):
293
293
  mean_time = benchmark.stats.get("median")
294
294
 
295
295
  # Define your time threshold in seconds (100ms in this case)
296
- max_time = 180 * (1 / 10 ** 6) # 180 microsecond
296
+ max_time = 24 * (1 / 10 ** 6) # 24 microsecond
297
297
 
298
298
  # Assert that the average time is below the threshold
299
299
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -304,7 +304,7 @@ def test_false_true(benchmark):
304
304
  mean_time = benchmark.stats.get("median")
305
305
 
306
306
  # Define your time threshold in seconds (ms in this case)
307
- max_time = 1 / 10 ** 3 # 1 millisecond
307
+ max_time = 900 / 10 ** 4 # 0.9 millisecond
308
308
 
309
309
  # Assert that the average time is below the threshold
310
310
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -315,7 +315,7 @@ def test_false_false(benchmark):
315
315
  mean_time = benchmark.stats.get("median")
316
316
 
317
317
  # Define your time threshold in seconds (100ms in this case)
318
- max_time = 210 * (1 / 10 ** 6) # 210 microsecond
318
+ max_time = 55 / 10 ** 6 # 56 microsecond
319
319
 
320
320
  # Assert that the average time is below the threshold
321
321
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
File without changes
File without changes
File without changes