json-repair 0.29.3__tar.gz → 0.29.4__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (21) hide show
  1. {json_repair-0.29.3/src/json_repair.egg-info → json_repair-0.29.4}/PKG-INFO +1 -1
  2. {json_repair-0.29.3 → json_repair-0.29.4}/pyproject.toml +1 -1
  3. json_repair-0.29.4/src/json_repair/json_context.py +45 -0
  4. {json_repair-0.29.3 → json_repair-0.29.4}/src/json_repair/json_parser.py +29 -43
  5. {json_repair-0.29.3 → json_repair-0.29.4/src/json_repair.egg-info}/PKG-INFO +1 -1
  6. {json_repair-0.29.3 → json_repair-0.29.4}/tests/test_performance.py +6 -6
  7. json_repair-0.29.3/src/json_repair/json_context.py +0 -69
  8. {json_repair-0.29.3 → json_repair-0.29.4}/LICENSE +0 -0
  9. {json_repair-0.29.3 → json_repair-0.29.4}/README.md +0 -0
  10. {json_repair-0.29.3 → json_repair-0.29.4}/setup.cfg +0 -0
  11. {json_repair-0.29.3 → json_repair-0.29.4}/src/json_repair/__init__.py +0 -0
  12. {json_repair-0.29.3 → json_repair-0.29.4}/src/json_repair/__main__.py +0 -0
  13. {json_repair-0.29.3 → json_repair-0.29.4}/src/json_repair/json_repair.py +0 -0
  14. {json_repair-0.29.3 → json_repair-0.29.4}/src/json_repair/py.typed +0 -0
  15. {json_repair-0.29.3 → json_repair-0.29.4}/src/json_repair/string_file_wrapper.py +0 -0
  16. {json_repair-0.29.3 → json_repair-0.29.4}/src/json_repair.egg-info/SOURCES.txt +0 -0
  17. {json_repair-0.29.3 → json_repair-0.29.4}/src/json_repair.egg-info/dependency_links.txt +0 -0
  18. {json_repair-0.29.3 → json_repair-0.29.4}/src/json_repair.egg-info/entry_points.txt +0 -0
  19. {json_repair-0.29.3 → json_repair-0.29.4}/src/json_repair.egg-info/top_level.txt +0 -0
  20. {json_repair-0.29.3 → json_repair-0.29.4}/tests/test_coverage.py +0 -0
  21. {json_repair-0.29.3 → json_repair-0.29.4}/tests/test_json_repair.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.29.3
3
+ Version: 0.29.4
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
3
3
  build-backend = "setuptools.build_meta"
4
4
  [project]
5
5
  name = "json_repair"
6
- version = "0.29.3"
6
+ version = "0.29.4"
7
7
  license = {file = "LICENSE"}
8
8
  authors = [
9
9
  { name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
@@ -0,0 +1,45 @@
1
+ from enum import Enum, auto
2
+ from typing import List, Optional
3
+
4
+
5
+ class ContextValues(Enum):
6
+ OBJECT_KEY = auto()
7
+ OBJECT_VALUE = auto()
8
+ ARRAY = auto()
9
+
10
+
11
+ class JsonContext:
12
+ def __init__(self) -> None:
13
+ self.context: List[ContextValues] = []
14
+ self.current: Optional[ContextValues] = None
15
+ self.empty: bool = True
16
+
17
+ def set(self, value: ContextValues) -> None:
18
+ """
19
+ Set a new context value.
20
+
21
+ Args:
22
+ value (ContextValues): The context value to be added.
23
+
24
+ Returns:
25
+ None
26
+ """
27
+ # If a value is provided update the context variable and save in stack
28
+ if value:
29
+ self.context.append(value)
30
+ self.current = value
31
+ self.empty = False
32
+
33
+ def reset(self) -> None:
34
+ """
35
+ Remove the most recent context value.
36
+
37
+ Returns:
38
+ None
39
+ """
40
+ try:
41
+ self.context.pop()
42
+ self.current = self.context[-1]
43
+ except IndexError:
44
+ self.current = None
45
+ self.empty = True
@@ -34,7 +34,8 @@ class JSONParser:
34
34
  self.logger: List[Dict[str, str]] = []
35
35
  self.log = self._log
36
36
  else:
37
- self.log = self.noop
37
+ # No-op
38
+ self.log = lambda *args, **kwargs: None
38
39
 
39
40
  def parse(
40
41
  self,
@@ -88,12 +89,10 @@ class JSONParser:
88
89
  )
89
90
  return ""
90
91
  # <string> starts with a quote
91
- elif not self.context.is_empty() and (
92
- char in ['"', "'", "“"] or char.isalpha()
93
- ):
92
+ elif not self.context.empty and (char in ['"', "'", "“"] or char.isalpha()):
94
93
  return self.parse_string()
95
94
  # <number> starts with [0-9] or minus
96
- elif not self.context.is_empty() and (
95
+ elif not self.context.empty and (
97
96
  char.isdigit() or char == "-" or char == "."
98
97
  ):
99
98
  return self.parse_number()
@@ -234,8 +233,9 @@ class JSONParser:
234
233
  elif char.isalnum():
235
234
  # This could be a <boolean> and not a string. Because (T)rue or (F)alse or (N)ull are valid
236
235
  # But remember, object keys are only of type string
237
- if char.lower() in ["t", "f", "n"] and not self.context.is_current(
238
- ContextValues.OBJECT_KEY
236
+ if (
237
+ char.lower() in ["t", "f", "n"]
238
+ and self.context.current != ContextValues.OBJECT_KEY
239
239
  ):
240
240
  value = self.parse_boolean_or_null()
241
241
  if value != "":
@@ -255,15 +255,13 @@ class JSONParser:
255
255
  if self.get_char_at() == lstring_delimiter:
256
256
  # If it's an empty key, this was easy
257
257
  if (
258
- self.context.is_current(ContextValues.OBJECT_KEY)
258
+ self.context.current == ContextValues.OBJECT_KEY
259
259
  and self.get_char_at(1) == ":"
260
260
  ):
261
261
  self.index += 1
262
262
  return ""
263
263
  # Find the next delimiter
264
- i = self.skip_to_character(
265
- character=rstring_delimiter, idx=1, move_main_index=False
266
- )
264
+ i = self.skip_to_character(character=rstring_delimiter, idx=1)
267
265
  next_c = self.get_char_at(i)
268
266
  # Now check that the next character is also a delimiter to ensure that we have "".....""
269
267
  # In that case we ignore this rstring delimiter
@@ -296,22 +294,20 @@ class JSONParser:
296
294
  while char and char != rstring_delimiter:
297
295
  if (
298
296
  missing_quotes
299
- and self.context.is_current(ContextValues.OBJECT_KEY)
297
+ and self.context.current == ContextValues.OBJECT_KEY
300
298
  and (char == ":" or char.isspace())
301
299
  ):
302
300
  self.log(
303
301
  "While parsing a string missing the left delimiter in object key context, we found a :, stopping here",
304
302
  )
305
303
  break
306
- if self.context.is_current(ContextValues.OBJECT_VALUE) and char in [
304
+ if self.context.current == ContextValues.OBJECT_VALUE and char in [
307
305
  ",",
308
306
  "}",
309
307
  ]:
310
308
  rstring_delimiter_missing = True
311
309
  # check if this is a case in which the closing comma is NOT missing instead
312
- i = self.skip_to_character(
313
- character=rstring_delimiter, idx=1, move_main_index=False
314
- )
310
+ i = self.skip_to_character(character=rstring_delimiter, idx=1)
315
311
  next_c = self.get_char_at(i)
316
312
  if next_c:
317
313
  i += 1
@@ -345,8 +341,9 @@ class JSONParser:
345
341
  "While parsing a string, we found a doubled quote, ignoring it"
346
342
  )
347
343
  self.index += 1
348
- elif missing_quotes and self.context.is_current(
349
- ContextValues.OBJECT_VALUE
344
+ elif (
345
+ missing_quotes
346
+ and self.context.current == ContextValues.OBJECT_VALUE
350
347
  ):
351
348
  # In case of missing starting quote I need to check if the delimeter is the end or the beginning of a key
352
349
  i = 1
@@ -387,20 +384,20 @@ class JSONParser:
387
384
  # If we are in an object context, let's check for the right delimiters
388
385
  if (
389
386
  (
390
- self.context.is_any(ContextValues.OBJECT_KEY)
387
+ ContextValues.OBJECT_KEY in self.context.context
391
388
  and next_c in [":", "}"]
392
389
  )
393
390
  or (
394
- self.context.is_any(ContextValues.OBJECT_VALUE)
391
+ ContextValues.OBJECT_VALUE in self.context.context
395
392
  and next_c == "}"
396
393
  )
397
394
  or (
398
- self.context.is_any(ContextValues.ARRAY)
395
+ ContextValues.ARRAY in self.context.context
399
396
  and next_c in ["]", ","]
400
397
  )
401
398
  or (
402
399
  check_comma_in_object_value
403
- and self.context.is_current(ContextValues.OBJECT_VALUE)
400
+ and self.context.current == ContextValues.OBJECT_VALUE
404
401
  and next_c == ","
405
402
  )
406
403
  ):
@@ -408,13 +405,12 @@ class JSONParser:
408
405
  i += 1
409
406
  next_c = self.get_char_at(i)
410
407
  # If we stopped for a comma in object_value context, let's check if find a "} at the end of the string
411
- if next_c == "," and self.context.is_current(
412
- ContextValues.OBJECT_VALUE
408
+ if (
409
+ next_c == ","
410
+ and self.context.current == ContextValues.OBJECT_VALUE
413
411
  ):
414
412
  i += 1
415
- i = self.skip_to_character(
416
- character=rstring_delimiter, idx=i, move_main_index=False
417
- )
413
+ i = self.skip_to_character(character=rstring_delimiter, idx=i)
418
414
  next_c = self.get_char_at(i)
419
415
  # Ok now I found a delimiter, let's skip whitespaces and see if next we find a }
420
416
  i += 1
@@ -429,15 +425,13 @@ class JSONParser:
429
425
  self.index += 1
430
426
  char = self.get_char_at()
431
427
  elif next_c == rstring_delimiter:
432
- if self.context.is_current(ContextValues.OBJECT_VALUE):
428
+ if self.context.current == ContextValues.OBJECT_VALUE:
433
429
  # But this might not be it! This could be just a missing comma
434
430
  # We found a delimiter and we need to check if this is a key
435
431
  # so find a rstring_delimiter and a colon after
436
432
  i += 1
437
433
  i = self.skip_to_character(
438
- character=rstring_delimiter,
439
- idx=i,
440
- move_main_index=False,
434
+ character=rstring_delimiter, idx=i
441
435
  )
442
436
  i += 1
443
437
  next_c = self.get_char_at(i)
@@ -462,7 +456,7 @@ class JSONParser:
462
456
  if (
463
457
  char
464
458
  and missing_quotes
465
- and self.context.is_current(ContextValues.OBJECT_KEY)
459
+ and self.context.current == ContextValues.OBJECT_KEY
466
460
  and char.isspace()
467
461
  ):
468
462
  self.log(
@@ -488,7 +482,7 @@ class JSONParser:
488
482
  number_str = ""
489
483
  number_chars = set("0123456789-.eE/,")
490
484
  char = self.get_char_at()
491
- is_array = self.context.is_current(ContextValues.ARRAY)
485
+ is_array = self.context.current == ContextValues.ARRAY
492
486
  while char and char in number_chars and (char != "," or not is_array):
493
487
  number_str += char
494
488
  self.index += 1
@@ -561,9 +555,7 @@ class JSONParser:
561
555
  return idx
562
556
  return idx
563
557
 
564
- def skip_to_character(
565
- self, character: str, idx: int = 0, move_main_index=True
566
- ) -> int:
558
+ def skip_to_character(self, character: str, idx: int = 0) -> int:
567
559
  """
568
560
  This function quickly iterates to find a character, syntactic sugar to make the code more concise
569
561
  """
@@ -572,10 +564,7 @@ class JSONParser:
572
564
  except IndexError:
573
565
  return idx
574
566
  while char != character:
575
- if move_main_index: # pragma: no cover
576
- self.index += 1
577
- else:
578
- idx += 1
567
+ idx += 1
579
568
  try:
580
569
  char = self.json_str[self.index + idx]
581
570
  except IndexError:
@@ -593,6 +582,3 @@ class JSONParser:
593
582
  "context": context,
594
583
  }
595
584
  )
596
-
597
- def noop(*args: Any, **kwargs: Any) -> None:
598
- pass
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.29.3
3
+ Version: 0.29.4
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -19,7 +19,7 @@ def test_true_true_correct(benchmark):
19
19
  mean_time = benchmark.stats.get("median")
20
20
 
21
21
  # Define your time threshold in seconds
22
- max_time = 2 / 10 ** 3 # 2 millisecond
22
+ max_time = 1.8 / 10 ** 3 # 1.8 millisecond
23
23
 
24
24
  # Assert that the average time is below the threshold
25
25
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -31,7 +31,7 @@ def test_true_true_incorrect(benchmark):
31
31
  mean_time = benchmark.stats.get("median")
32
32
 
33
33
  # Define your time threshold in seconds
34
- max_time = 2 / 10 ** 3 # 2 millisecond
34
+ max_time = 1.8 / 10 ** 3 # 1.8 millisecond
35
35
 
36
36
  # Assert that the average time is below the threshold
37
37
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -53,7 +53,7 @@ def test_true_false_incorrect(benchmark):
53
53
  mean_time = benchmark.stats.get("median")
54
54
 
55
55
  # Define your time threshold in seconds
56
- max_time = 2 / 10 ** 3 # 2 millisecond
56
+ max_time = 1.8 / 10 ** 3 # 1.8 millisecond
57
57
 
58
58
  # Assert that the average time is below the threshold
59
59
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -64,7 +64,7 @@ def test_false_true_correct(benchmark):
64
64
  mean_time = benchmark.stats.get("median")
65
65
 
66
66
  # Define your time threshold in seconds
67
- max_time = 2 / 10 ** 3 # 2 millisecond
67
+ max_time = 1.8 / 10 ** 3 # 1.8 millisecond
68
68
 
69
69
  # Assert that the average time is below the threshold
70
70
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -75,7 +75,7 @@ def test_false_true_incorrect(benchmark):
75
75
  mean_time = benchmark.stats.get("median")
76
76
 
77
77
  # Define your time threshold in seconds
78
- max_time = 2 / 10 ** 3 # 2 millisecond
78
+ max_time = 1.8 / 10 ** 3 # 1.8 millisecond
79
79
 
80
80
  # Assert that the average time is below the threshold
81
81
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -97,7 +97,7 @@ def test_false_false_incorrect(benchmark):
97
97
  mean_time = benchmark.stats.get("median")
98
98
 
99
99
  # Define your time threshold in seconds
100
- max_time = 2 / 10 ** 3 # 2 millisecond
100
+ max_time = 1.8 / 10 ** 3 # 1.8 millisecond
101
101
 
102
102
  # Assert that the average time is below the threshold
103
103
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
@@ -1,69 +0,0 @@
1
- from enum import Enum, auto
2
- from typing import List
3
-
4
-
5
- class ContextValues(Enum):
6
- OBJECT_KEY = auto()
7
- OBJECT_VALUE = auto()
8
- ARRAY = auto()
9
-
10
-
11
- class JsonContext:
12
- def __init__(self) -> None:
13
- self.context: List[ContextValues] = []
14
-
15
- def set(self, value: ContextValues) -> None:
16
- """
17
- Set a new context value.
18
-
19
- Args:
20
- value (ContextValues): The context value to be added.
21
-
22
- Returns:
23
- None
24
- """
25
- # If a value is provided update the context variable and save in stack
26
- if value:
27
- self.context.append(value)
28
-
29
- def reset(self) -> None:
30
- """
31
- Remove the most recent context value.
32
-
33
- Returns:
34
- None
35
- """
36
- self.context.pop()
37
-
38
- def is_current(self, context: ContextValues) -> bool:
39
- """
40
- Check if the given context is the current (most recent) context.
41
-
42
- Args:
43
- context (ContextValues): The context value to check.
44
-
45
- Returns:
46
- bool: True if the given context is the same as the most recent context in the stack, False otherwise.
47
- """
48
- return self.context[-1] == context
49
-
50
- def is_any(self, context: ContextValues) -> bool:
51
- """
52
- Check if the given context exists anywhere in the context stack.
53
-
54
- Args:
55
- context (ContextValues): The context value to check.
56
-
57
- Returns:
58
- bool: True if the given context exists in the stack, False otherwise.
59
- """
60
- return context in self.context
61
-
62
- def is_empty(self) -> bool:
63
- """
64
- Check if the context stack is empty.
65
-
66
- Returns:
67
- bool: True if the context stack is empty, False otherwise.
68
- """
69
- return len(self.context) == 0
File without changes
File without changes
File without changes