acryl-datahub 1.0.0rc4__py3-none-any.whl → 1.0.0rc5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -8,11 +8,11 @@ END_KEYWORD = "END"
8
8
 
9
9
  CONTROL_FLOW_KEYWORDS = [
10
10
  "GO",
11
- r"BEGIN\w+TRY",
12
- r"BEGIN\w+CATCH",
11
+ r"BEGIN\s+TRY",
12
+ r"BEGIN\s+CATCH",
13
13
  "BEGIN",
14
- r"END\w+TRY",
15
- r"END\w+CATCH",
14
+ r"END\s+TRY",
15
+ r"END\s+CATCH",
16
16
  # This isn't strictly correct, but we assume that IF | (condition) | (block) should all be split up
17
17
  # This mainly ensures that IF statements don't get tacked onto the previous statement incorrectly
18
18
  "IF",
@@ -73,25 +73,31 @@ class _StatementSplitter:
73
73
  # what a given END is closing.
74
74
  self.current_case_statements = 0
75
75
 
76
- def _is_keyword_at_position(self, pos: int, keyword: str) -> bool:
76
+ def _is_keyword_at_position(self, pos: int, keyword: str) -> Tuple[bool, str]:
77
77
  """
78
78
  Check if a keyword exists at the given position using regex word boundaries.
79
79
  """
80
80
  sql = self.sql
81
81
 
82
- if pos + len(keyword) > len(sql):
83
- return False
82
+ keyword_length = len(keyword.replace(r"\s+", " "))
83
+
84
+ if pos + keyword_length > len(sql):
85
+ return False, ""
84
86
 
85
87
  # If we're not at a word boundary, we can't generate a keyword.
86
88
  if pos > 0 and not (
87
89
  bool(re.match(r"\w\W", sql[pos - 1 : pos + 1]))
88
90
  or bool(re.match(r"\W\w", sql[pos - 1 : pos + 1]))
89
91
  ):
90
- return False
92
+ return False, ""
91
93
 
92
- pattern = rf"^{re.escape(keyword)}\b"
94
+ pattern = rf"^{keyword}\b"
93
95
  match = re.match(pattern, sql[pos:], re.IGNORECASE)
94
- return bool(match)
96
+ is_match = bool(match)
97
+ actual_match = (
98
+ sql[pos:][match.start() : match.end()] if match is not None else ""
99
+ )
100
+ return is_match, actual_match
95
101
 
96
102
  def _look_ahead_for_keywords(self, keywords: List[str]) -> Tuple[bool, str, int]:
97
103
  """
@@ -99,7 +105,8 @@ class _StatementSplitter:
99
105
  """
100
106
 
101
107
  for keyword in keywords:
102
- if self._is_keyword_at_position(self.i, keyword):
108
+ is_match, keyword = self._is_keyword_at_position(self.i, keyword)
109
+ if is_match:
103
110
  return True, keyword, len(keyword)
104
111
  return False, "", 0
105
112
 
@@ -118,7 +125,7 @@ class _StatementSplitter:
118
125
 
119
126
  def process(self) -> Iterator[str]:
120
127
  if not self.sql or not self.sql.strip():
121
- return
128
+ yield from ()
122
129
 
123
130
  prev_real_char = "\0" # the most recent non-whitespace, non-comment character
124
131
  while self.i < len(self.sql):
@@ -181,7 +188,7 @@ class _StatementSplitter:
181
188
  def _process_normal(self, most_recent_real_char: str) -> Iterator[str]:
182
189
  c = self.sql[self.i]
183
190
 
184
- if self._is_keyword_at_position(self.i, CASE_KEYWORD):
191
+ if self._is_keyword_at_position(self.i, CASE_KEYWORD)[0]:
185
192
  self.current_case_statements += 1
186
193
 
187
194
  is_control_keyword, keyword, keyword_len = self._look_ahead_for_keywords(
@@ -10,13 +10,11 @@ import tempfile
10
10
  import threading
11
11
  from dataclasses import dataclass, field
12
12
  from datetime import datetime
13
- from enum import Enum
14
13
  from types import TracebackType
15
14
  from typing import (
16
15
  Any,
17
16
  Callable,
18
17
  Dict,
19
- Final,
20
18
  Generic,
21
19
  Iterator,
22
20
  List,
@@ -31,6 +29,7 @@ from typing import (
31
29
  )
32
30
 
33
31
  from datahub.ingestion.api.closeable import Closeable
32
+ from datahub.utilities.sentinels import Unset, unset
34
33
 
35
34
  logger: logging.Logger = logging.getLogger(__name__)
36
35
 
@@ -59,16 +58,6 @@ SqliteValue = Union[int, float, str, bytes, datetime, None]
59
58
  _VT = TypeVar("_VT")
60
59
 
61
60
 
62
- class Unset(Enum):
63
- token = 0
64
-
65
-
66
- # It's pretty annoying to create a true sentinel that works with typing.
67
- # https://peps.python.org/pep-0484/#support-for-singleton-types-in-unions
68
- # Can't wait for https://peps.python.org/pep-0661/
69
- _unset: Final = Unset.token
70
-
71
-
72
61
  class ConnectionWrapper:
73
62
  """
74
63
  Wraps a SQlite connection, allowing connection reuse across multiple FileBacked* objects.
@@ -372,7 +361,7 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
372
361
  self,
373
362
  /,
374
363
  key: str,
375
- default: Union[_VT, Unset] = _unset,
364
+ default: Union[_VT, Unset] = unset,
376
365
  ) -> _VT:
377
366
  # If key is in the dictionary, this is similar to __getitem__ + mark_dirty.
378
367
  # If key is not in the dictionary, this is similar to __setitem__.
@@ -383,7 +372,7 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
383
372
  self.mark_dirty(key)
384
373
  return value
385
374
  except KeyError:
386
- if default is _unset:
375
+ if default is unset:
387
376
  raise
388
377
 
389
378
  self[key] = default
@@ -0,0 +1,22 @@
1
+ from enum import Enum
2
+ from typing import Final
3
+
4
+ # It's pretty annoying to create a true sentinel that works with typing.
5
+ # This approach using enums is inspired by:
6
+ # https://peps.python.org/pep-0484/#support-for-singleton-types-in-unions
7
+ #
8
+ # Can't wait for https://peps.python.org/pep-0661/
9
+
10
+
11
+ class Unset(Enum):
12
+ token = 0
13
+
14
+
15
+ unset: Final = Unset.token
16
+
17
+
18
+ class Auto(Enum):
19
+ token = 0
20
+
21
+
22
+ auto: Final = Auto.token