acryl-datahub 0.15.0.6rc1__py3-none-any.whl → 0.15.0.6rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.6rc1.dist-info → acryl_datahub-0.15.0.6rc3.dist-info}/METADATA +2505 -2505
- {acryl_datahub-0.15.0.6rc1.dist-info → acryl_datahub-0.15.0.6rc3.dist-info}/RECORD +30 -19
- datahub/_version.py +1 -1
- datahub/cli/iceberg_cli.py +30 -6
- datahub/errors.py +35 -0
- datahub/ingestion/source/dbt/dbt_common.py +5 -0
- datahub/ingestion/source/dbt/dbt_core.py +11 -9
- datahub/ingestion/source/dynamodb/dynamodb.py +5 -0
- datahub/ingestion/source/looker/looker_common.py +3 -2
- datahub/ingestion/source/mongodb.py +17 -16
- datahub/ingestion/source/s3/source.py +14 -5
- datahub/ingestion/source/snowflake/snowflake_schema.py +10 -0
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +11 -14
- datahub/ingestion/source/sql/sql_common.py +10 -6
- datahub/ingestion/source/sql/teradata.py +12 -0
- datahub/sdk/__init__.py +33 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_attribution.py +48 -0
- datahub/sdk/_entity.py +89 -0
- datahub/sdk/_shared.py +345 -0
- datahub/sdk/container.py +193 -0
- datahub/sdk/dataset.py +584 -0
- datahub/sdk/entity_client.py +115 -0
- datahub/sdk/main_client.py +56 -0
- datahub/sdk/resolver_client.py +101 -0
- datahub/sql_parsing/split_statements.py +209 -122
- {acryl_datahub-0.15.0.6rc1.dist-info → acryl_datahub-0.15.0.6rc3.dist-info}/LICENSE +0 -0
- {acryl_datahub-0.15.0.6rc1.dist-info → acryl_datahub-0.15.0.6rc3.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.6rc1.dist-info → acryl_datahub-0.15.0.6rc3.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.6rc1.dist-info → acryl_datahub-0.15.0.6rc3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Optional, overload
|
|
4
|
+
|
|
5
|
+
from datahub.errors import SdkUsageError
|
|
6
|
+
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
7
|
+
from datahub.ingestion.graph.config import DatahubClientConfig
|
|
8
|
+
from datahub.sdk.entity_client import EntityClient
|
|
9
|
+
from datahub.sdk.resolver_client import ResolverClient
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DataHubClient:
|
|
13
|
+
@overload
|
|
14
|
+
def __init__(self, *, server: str, token: Optional[str] = None): ...
|
|
15
|
+
@overload
|
|
16
|
+
def __init__(self, *, config: DatahubClientConfig): ...
|
|
17
|
+
@overload
|
|
18
|
+
def __init__(self, *, graph: DataHubGraph): ...
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
*,
|
|
22
|
+
server: Optional[str] = None,
|
|
23
|
+
token: Optional[str] = None,
|
|
24
|
+
graph: Optional[DataHubGraph] = None,
|
|
25
|
+
config: Optional[DatahubClientConfig] = None,
|
|
26
|
+
):
|
|
27
|
+
if server is not None:
|
|
28
|
+
if config is not None:
|
|
29
|
+
raise SdkUsageError("Cannot specify both server and config")
|
|
30
|
+
if graph is not None:
|
|
31
|
+
raise SdkUsageError("Cannot specify both server and graph")
|
|
32
|
+
graph = DataHubGraph(config=DatahubClientConfig(server=server, token=token))
|
|
33
|
+
elif config is not None:
|
|
34
|
+
if graph is not None:
|
|
35
|
+
raise SdkUsageError("Cannot specify both config and graph")
|
|
36
|
+
graph = DataHubGraph(config=config)
|
|
37
|
+
elif graph is None:
|
|
38
|
+
raise SdkUsageError("Must specify either server, config, or graph")
|
|
39
|
+
|
|
40
|
+
self._graph = graph
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def from_env(cls) -> "DataHubClient":
|
|
44
|
+
# Inspired by the DockerClient.from_env() method.
|
|
45
|
+
# TODO: This one also reads from ~/.datahubenv, so the "from_env" name might be a bit confusing.
|
|
46
|
+
# That file is part of the "environment", but is not a traditional "env variable".
|
|
47
|
+
graph = get_default_graph()
|
|
48
|
+
return cls(graph=graph)
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def entities(self) -> EntityClient:
|
|
52
|
+
return EntityClient(self)
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def resolve(self) -> ResolverClient:
|
|
56
|
+
return ResolverClient(self)
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Optional, overload
|
|
4
|
+
|
|
5
|
+
from datahub.errors import ItemNotFoundError, MultipleItemsFoundError, SdkUsageError
|
|
6
|
+
from datahub.ingestion.graph.client import DataHubGraph
|
|
7
|
+
from datahub.metadata.urns import (
|
|
8
|
+
CorpUserUrn,
|
|
9
|
+
DomainUrn,
|
|
10
|
+
GlossaryTermUrn,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from datahub.sdk.main_client import DataHubClient
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ResolverClient:
|
|
18
|
+
def __init__(self, client: DataHubClient):
|
|
19
|
+
self._client = client
|
|
20
|
+
|
|
21
|
+
# TODO: add caching to this method
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def _graph(self) -> DataHubGraph:
|
|
25
|
+
return self._client._graph
|
|
26
|
+
|
|
27
|
+
def domain(self, *, name: str) -> DomainUrn:
|
|
28
|
+
urn_str = self._graph.get_domain_urn_by_name(name)
|
|
29
|
+
if urn_str is None:
|
|
30
|
+
raise ItemNotFoundError(f"Domain with name {name} not found")
|
|
31
|
+
return DomainUrn.from_string(urn_str)
|
|
32
|
+
|
|
33
|
+
@overload
|
|
34
|
+
def user(self, *, name: str) -> CorpUserUrn: ...
|
|
35
|
+
@overload
|
|
36
|
+
def user(self, *, email: str) -> CorpUserUrn: ...
|
|
37
|
+
def user(
|
|
38
|
+
self, *, name: Optional[str] = None, email: Optional[str] = None
|
|
39
|
+
) -> CorpUserUrn:
|
|
40
|
+
filter_explanation: str
|
|
41
|
+
filters = []
|
|
42
|
+
if name is not None:
|
|
43
|
+
if email is not None:
|
|
44
|
+
raise SdkUsageError("Cannot specify both name and email for auto_user")
|
|
45
|
+
# TODO: do we filter on displayName or fullName?
|
|
46
|
+
filter_explanation = f"with name {name}"
|
|
47
|
+
filters.append(
|
|
48
|
+
{
|
|
49
|
+
"field": "fullName",
|
|
50
|
+
"values": [name],
|
|
51
|
+
"condition": "EQUAL",
|
|
52
|
+
}
|
|
53
|
+
)
|
|
54
|
+
elif email is not None:
|
|
55
|
+
filter_explanation = f"with email {email}"
|
|
56
|
+
filters.append(
|
|
57
|
+
{
|
|
58
|
+
"field": "email",
|
|
59
|
+
"values": [email],
|
|
60
|
+
"condition": "EQUAL",
|
|
61
|
+
}
|
|
62
|
+
)
|
|
63
|
+
else:
|
|
64
|
+
raise SdkUsageError("Must specify either name or email for auto_user")
|
|
65
|
+
|
|
66
|
+
users = list(
|
|
67
|
+
self._graph.get_urns_by_filter(
|
|
68
|
+
entity_types=[CorpUserUrn.ENTITY_TYPE],
|
|
69
|
+
extraFilters=filters,
|
|
70
|
+
)
|
|
71
|
+
)
|
|
72
|
+
if len(users) == 0:
|
|
73
|
+
# TODO: In auto methods, should we just create the user/domain/etc if it doesn't exist?
|
|
74
|
+
raise ItemNotFoundError(f"User {filter_explanation} not found")
|
|
75
|
+
elif len(users) > 1:
|
|
76
|
+
raise MultipleItemsFoundError(
|
|
77
|
+
f"Multiple users found {filter_explanation}: {users}"
|
|
78
|
+
)
|
|
79
|
+
else:
|
|
80
|
+
return CorpUserUrn.from_string(users[0])
|
|
81
|
+
|
|
82
|
+
def term(self, *, name: str) -> GlossaryTermUrn:
|
|
83
|
+
# TODO: Add some limits on the graph fetch
|
|
84
|
+
terms = list(
|
|
85
|
+
self._graph.get_urns_by_filter(
|
|
86
|
+
entity_types=[GlossaryTermUrn.ENTITY_TYPE],
|
|
87
|
+
extraFilters=[
|
|
88
|
+
{
|
|
89
|
+
"field": "id",
|
|
90
|
+
"values": [name],
|
|
91
|
+
"condition": "EQUAL",
|
|
92
|
+
}
|
|
93
|
+
],
|
|
94
|
+
)
|
|
95
|
+
)
|
|
96
|
+
if len(terms) == 0:
|
|
97
|
+
raise ItemNotFoundError(f"Term with name {name} not found")
|
|
98
|
+
elif len(terms) > 1:
|
|
99
|
+
raise SdkUsageError(f"Multiple terms found with name {name}: {terms}")
|
|
100
|
+
else:
|
|
101
|
+
return GlossaryTermUrn.from_string(terms[0])
|
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from enum import Enum
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Iterator, List, Tuple
|
|
4
|
+
|
|
5
|
+
SELECT_KEYWORD = "SELECT"
|
|
6
|
+
CASE_KEYWORD = "CASE"
|
|
7
|
+
END_KEYWORD = "END"
|
|
4
8
|
|
|
5
9
|
CONTROL_FLOW_KEYWORDS = [
|
|
6
10
|
"GO",
|
|
@@ -9,18 +13,36 @@ CONTROL_FLOW_KEYWORDS = [
|
|
|
9
13
|
"BEGIN",
|
|
10
14
|
r"END\w+TRY",
|
|
11
15
|
r"END\w+CATCH",
|
|
12
|
-
|
|
16
|
+
# This isn't strictly correct, but we assume that IF | (condition) | (block) should all be split up
|
|
17
|
+
# This mainly ensures that IF statements don't get tacked onto the previous statement incorrectly
|
|
18
|
+
"IF",
|
|
19
|
+
# For things like CASE, END does not mean the end of a statement.
|
|
20
|
+
# We have special handling for this.
|
|
21
|
+
END_KEYWORD,
|
|
22
|
+
# "ELSE", # else is also valid in CASE, so we we can't use it here.
|
|
13
23
|
]
|
|
14
24
|
|
|
15
25
|
# There's an exception to this rule, which is when the statement
|
|
16
|
-
# is
|
|
17
|
-
|
|
26
|
+
# is preceded by a CTE. For those, we have to check if the character
|
|
27
|
+
# before this is a ")".
|
|
28
|
+
NEW_STATEMENT_KEYWORDS = [
|
|
18
29
|
# SELECT is used inside queries as well, so we can't include it here.
|
|
30
|
+
"CREATE",
|
|
19
31
|
"INSERT",
|
|
20
32
|
"UPDATE",
|
|
21
33
|
"DELETE",
|
|
22
34
|
"MERGE",
|
|
23
35
|
]
|
|
36
|
+
STRICT_NEW_STATEMENT_KEYWORDS = [
|
|
37
|
+
# For these keywords, a SELECT following it does indicate a new statement.
|
|
38
|
+
"DROP",
|
|
39
|
+
"TRUNCATE",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class _AlreadyIncremented(Exception):
|
|
44
|
+
# Using exceptions for control flow isn't great - but the code is clearer so it's fine.
|
|
45
|
+
pass
|
|
24
46
|
|
|
25
47
|
|
|
26
48
|
class ParserState(Enum):
|
|
@@ -30,134 +52,199 @@ class ParserState(Enum):
|
|
|
30
52
|
MULTILINE_COMMENT = 4
|
|
31
53
|
|
|
32
54
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
"""
|
|
37
|
-
if pos + len(keyword) > len(sql):
|
|
38
|
-
return False
|
|
55
|
+
class _StatementSplitter:
|
|
56
|
+
def __init__(self, sql: str):
|
|
57
|
+
self.sql = sql
|
|
39
58
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
):
|
|
45
|
-
return False
|
|
59
|
+
# Main parser state.
|
|
60
|
+
self.i = 0
|
|
61
|
+
self.state = ParserState.NORMAL
|
|
62
|
+
self.current_statement: List[str] = []
|
|
46
63
|
|
|
47
|
-
|
|
48
|
-
match = re.match(pattern, sql[pos:], re.IGNORECASE)
|
|
49
|
-
return bool(match)
|
|
64
|
+
# Additional parser state.
|
|
50
65
|
|
|
66
|
+
# If we see a SELECT, should we start a new statement?
|
|
67
|
+
# If we previously saw a drop/truncate/etc, a SELECT does mean a new statement.
|
|
68
|
+
# But if we're in a select/create/etc, a select could just be a subquery.
|
|
69
|
+
self.does_select_mean_new_statement = False
|
|
51
70
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
Look ahead for SQL keywords at the current position.
|
|
57
|
-
"""
|
|
71
|
+
# The END keyword terminates CASE and BEGIN blocks.
|
|
72
|
+
# We need to match the CASE statements with END blocks to determine
|
|
73
|
+
# what a given END is closing.
|
|
74
|
+
self.current_case_statements = 0
|
|
58
75
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
76
|
+
def _is_keyword_at_position(self, pos: int, keyword: str) -> bool:
|
|
77
|
+
"""
|
|
78
|
+
Check if a keyword exists at the given position using regex word boundaries.
|
|
79
|
+
"""
|
|
80
|
+
sql = self.sql
|
|
63
81
|
|
|
82
|
+
if pos + len(keyword) > len(sql):
|
|
83
|
+
return False
|
|
64
84
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
85
|
+
# If we're not at a word boundary, we can't generate a keyword.
|
|
86
|
+
if pos > 0 and not (
|
|
87
|
+
bool(re.match(r"\w\W", sql[pos - 1 : pos + 1]))
|
|
88
|
+
or bool(re.match(r"\W\w", sql[pos - 1 : pos + 1]))
|
|
89
|
+
):
|
|
90
|
+
return False
|
|
91
|
+
|
|
92
|
+
pattern = rf"^{re.escape(keyword)}\b"
|
|
93
|
+
match = re.match(pattern, sql[pos:], re.IGNORECASE)
|
|
94
|
+
return bool(match)
|
|
71
95
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
96
|
+
def _look_ahead_for_keywords(self, keywords: List[str]) -> Tuple[bool, str, int]:
|
|
97
|
+
"""
|
|
98
|
+
Look ahead for SQL keywords at the current position.
|
|
99
|
+
"""
|
|
75
100
|
|
|
76
|
-
|
|
77
|
-
|
|
101
|
+
for keyword in keywords:
|
|
102
|
+
if self._is_keyword_at_position(self.i, keyword):
|
|
103
|
+
return True, keyword, len(keyword)
|
|
104
|
+
return False, "", 0
|
|
105
|
+
|
|
106
|
+
def _yield_if_complete(self) -> Iterator[str]:
|
|
107
|
+
statement = "".join(self.current_statement).strip()
|
|
78
108
|
if statement:
|
|
109
|
+
# Subtle - to avoid losing full whitespace, they get merged into the next statement.
|
|
79
110
|
yield statement
|
|
80
|
-
current_statement.clear()
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
i += 1
|
|
102
|
-
else:
|
|
103
|
-
most_recent_real_char = prev_real_char
|
|
104
|
-
if not c.isspace():
|
|
111
|
+
self.current_statement.clear()
|
|
112
|
+
|
|
113
|
+
# Reset current_statement-specific state.
|
|
114
|
+
self.does_select_mean_new_statement = False
|
|
115
|
+
if self.current_case_statements != 0:
|
|
116
|
+
breakpoint()
|
|
117
|
+
self.current_case_statements = 0
|
|
118
|
+
|
|
119
|
+
def process(self) -> Iterator[str]:
|
|
120
|
+
if not self.sql or not self.sql.strip():
|
|
121
|
+
return
|
|
122
|
+
|
|
123
|
+
prev_real_char = "\0" # the most recent non-whitespace, non-comment character
|
|
124
|
+
while self.i < len(self.sql):
|
|
125
|
+
c = self.sql[self.i]
|
|
126
|
+
next_char = self.sql[self.i + 1] if self.i < len(self.sql) - 1 else "\0"
|
|
127
|
+
|
|
128
|
+
if self.state == ParserState.NORMAL:
|
|
129
|
+
if c == "'":
|
|
130
|
+
self.state = ParserState.STRING
|
|
131
|
+
self.current_statement.append(c)
|
|
105
132
|
prev_real_char = c
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
i +=
|
|
116
|
-
continue
|
|
117
|
-
|
|
118
|
-
(
|
|
119
|
-
is_force_new_statement_keyword,
|
|
120
|
-
keyword,
|
|
121
|
-
keyword_len,
|
|
122
|
-
) = _look_ahead_for_keywords(
|
|
123
|
-
sql, i, keywords=FORCE_NEW_STATEMENT_KEYWORDS
|
|
124
|
-
)
|
|
125
|
-
if (
|
|
126
|
-
is_force_new_statement_keyword and most_recent_real_char != ")"
|
|
127
|
-
): # usually we'd have a close paren that closes a CTE
|
|
128
|
-
# Force termination of current statement
|
|
129
|
-
yield from yield_if_complete()
|
|
130
|
-
|
|
131
|
-
current_statement.append(keyword)
|
|
132
|
-
i += keyword_len
|
|
133
|
-
continue
|
|
134
|
-
|
|
135
|
-
elif c == ";":
|
|
136
|
-
yield from yield_if_complete()
|
|
133
|
+
elif c == "-" and next_char == "-":
|
|
134
|
+
self.state = ParserState.COMMENT
|
|
135
|
+
self.current_statement.append(c)
|
|
136
|
+
self.current_statement.append(next_char)
|
|
137
|
+
self.i += 1
|
|
138
|
+
elif c == "/" and next_char == "*":
|
|
139
|
+
self.state = ParserState.MULTILINE_COMMENT
|
|
140
|
+
self.current_statement.append(c)
|
|
141
|
+
self.current_statement.append(next_char)
|
|
142
|
+
self.i += 1
|
|
137
143
|
else:
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
144
|
+
most_recent_real_char = prev_real_char
|
|
145
|
+
if not c.isspace():
|
|
146
|
+
prev_real_char = c
|
|
147
|
+
|
|
148
|
+
try:
|
|
149
|
+
yield from self._process_normal(
|
|
150
|
+
most_recent_real_char=most_recent_real_char
|
|
151
|
+
)
|
|
152
|
+
except _AlreadyIncremented:
|
|
153
|
+
# Skip the normal i += 1 step.
|
|
154
|
+
continue
|
|
155
|
+
|
|
156
|
+
elif self.state == ParserState.STRING:
|
|
157
|
+
self.current_statement.append(c)
|
|
158
|
+
if c == "'" and next_char == "'":
|
|
159
|
+
self.current_statement.append(next_char)
|
|
160
|
+
self.i += 1
|
|
161
|
+
elif c == "'":
|
|
162
|
+
self.state = ParserState.NORMAL
|
|
163
|
+
|
|
164
|
+
elif self.state == ParserState.COMMENT:
|
|
165
|
+
self.current_statement.append(c)
|
|
166
|
+
if c == "\n":
|
|
167
|
+
self.state = ParserState.NORMAL
|
|
168
|
+
|
|
169
|
+
elif self.state == ParserState.MULTILINE_COMMENT:
|
|
170
|
+
self.current_statement.append(c)
|
|
171
|
+
if c == "*" and next_char == "/":
|
|
172
|
+
self.current_statement.append(next_char)
|
|
173
|
+
self.i += 1
|
|
174
|
+
self.state = ParserState.NORMAL
|
|
175
|
+
|
|
176
|
+
self.i += 1
|
|
177
|
+
|
|
178
|
+
# Handle the last statement
|
|
179
|
+
yield from self._yield_if_complete()
|
|
180
|
+
|
|
181
|
+
def _process_normal(self, most_recent_real_char: str) -> Iterator[str]:
|
|
182
|
+
c = self.sql[self.i]
|
|
183
|
+
|
|
184
|
+
if self._is_keyword_at_position(self.i, CASE_KEYWORD):
|
|
185
|
+
self.current_case_statements += 1
|
|
186
|
+
|
|
187
|
+
is_control_keyword, keyword, keyword_len = self._look_ahead_for_keywords(
|
|
188
|
+
keywords=CONTROL_FLOW_KEYWORDS
|
|
189
|
+
)
|
|
190
|
+
if (
|
|
191
|
+
is_control_keyword
|
|
192
|
+
and keyword == END_KEYWORD
|
|
193
|
+
and self.current_case_statements > 0
|
|
194
|
+
):
|
|
195
|
+
# If we're closing a CASE statement with END, we can just decrement the counter and continue.
|
|
196
|
+
self.current_case_statements -= 1
|
|
197
|
+
elif is_control_keyword:
|
|
198
|
+
# Yield current statement if any
|
|
199
|
+
yield from self._yield_if_complete()
|
|
200
|
+
# Yield keyword as its own statement
|
|
201
|
+
yield keyword
|
|
202
|
+
self.i += keyword_len
|
|
203
|
+
self.does_select_mean_new_statement = True
|
|
204
|
+
raise _AlreadyIncremented()
|
|
205
|
+
|
|
206
|
+
(
|
|
207
|
+
is_strict_new_statement_keyword,
|
|
208
|
+
keyword,
|
|
209
|
+
keyword_len,
|
|
210
|
+
) = self._look_ahead_for_keywords(keywords=STRICT_NEW_STATEMENT_KEYWORDS)
|
|
211
|
+
if is_strict_new_statement_keyword:
|
|
212
|
+
yield from self._yield_if_complete()
|
|
213
|
+
self.current_statement.append(keyword)
|
|
214
|
+
self.i += keyword_len
|
|
215
|
+
self.does_select_mean_new_statement = True
|
|
216
|
+
raise _AlreadyIncremented()
|
|
217
|
+
|
|
218
|
+
(
|
|
219
|
+
is_force_new_statement_keyword,
|
|
220
|
+
keyword,
|
|
221
|
+
keyword_len,
|
|
222
|
+
) = self._look_ahead_for_keywords(
|
|
223
|
+
keywords=(
|
|
224
|
+
NEW_STATEMENT_KEYWORDS
|
|
225
|
+
+ ([SELECT_KEYWORD] if self.does_select_mean_new_statement else [])
|
|
226
|
+
),
|
|
227
|
+
)
|
|
228
|
+
if (
|
|
229
|
+
is_force_new_statement_keyword and most_recent_real_char != ")"
|
|
230
|
+
): # usually we'd have a close paren that closes a CTE
|
|
231
|
+
# Force termination of current statement
|
|
232
|
+
yield from self._yield_if_complete()
|
|
233
|
+
|
|
234
|
+
self.current_statement.append(keyword)
|
|
235
|
+
self.i += keyword_len
|
|
236
|
+
raise _AlreadyIncremented()
|
|
237
|
+
|
|
238
|
+
if c == ";":
|
|
239
|
+
yield from self._yield_if_complete()
|
|
240
|
+
else:
|
|
241
|
+
self.current_statement.append(c)
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def split_statements(sql: str) -> Iterator[str]:
|
|
245
|
+
"""
|
|
246
|
+
Split T-SQL code into individual statements, handling various SQL constructs.
|
|
247
|
+
"""
|
|
248
|
+
|
|
249
|
+
splitter = _StatementSplitter(sql)
|
|
250
|
+
yield from splitter.process()
|
|
File without changes
|
|
File without changes
|
{acryl_datahub-0.15.0.6rc1.dist-info → acryl_datahub-0.15.0.6rc3.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|