castor-extractor 0.16.11__py3-none-any.whl → 0.16.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +16 -0
- castor_extractor/utils/client/api.py +8 -3
- castor_extractor/utils/retry.py +3 -1
- castor_extractor/visualization/tableau_revamp/client/client.py +5 -2
- castor_extractor/visualization/tableau_revamp/client/gql_queries.py +10 -1
- castor_extractor/warehouse/abstract/__init__.py +2 -0
- castor_extractor/warehouse/abstract/asset.py +13 -0
- castor_extractor/warehouse/databricks/client.py +239 -3
- castor_extractor/warehouse/databricks/client_test.py +61 -1
- castor_extractor/warehouse/databricks/extract.py +36 -0
- castor_extractor/warehouse/databricks/format.py +13 -0
- castor_extractor/warehouse/databricks/test_constants.py +79 -0
- castor_extractor/warehouse/databricks/types.py +6 -1
- castor_extractor/warehouse/snowflake/extract.py +2 -0
- castor_extractor/warehouse/snowflake/queries/function.sql +10 -0
- {castor_extractor-0.16.11.dist-info → castor_extractor-0.16.15.dist-info}/METADATA +1 -1
- {castor_extractor-0.16.11.dist-info → castor_extractor-0.16.15.dist-info}/RECORD +20 -18
- {castor_extractor-0.16.11.dist-info → castor_extractor-0.16.15.dist-info}/LICENCE +0 -0
- {castor_extractor-0.16.11.dist-info → castor_extractor-0.16.15.dist-info}/WHEEL +0 -0
- {castor_extractor-0.16.11.dist-info → castor_extractor-0.16.15.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
|
@@ -1,5 +1,21 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.16.15 - 2024-06-07
|
|
4
|
+
|
|
5
|
+
* Tableau: extract database_name for CustomSQLTables
|
|
6
|
+
|
|
7
|
+
## 0.16.14 - 2024-06-06
|
|
8
|
+
|
|
9
|
+
* Snowflake: Extract SQL user defined function
|
|
10
|
+
|
|
11
|
+
## 0.16.13 - 2024-06-05
|
|
12
|
+
|
|
13
|
+
* Tableau: extract database_name for tables
|
|
14
|
+
|
|
15
|
+
## 0.16.12 - 2024-06-04
|
|
16
|
+
|
|
17
|
+
* Databricks: Extract lineage
|
|
18
|
+
|
|
3
19
|
## 0.16.11 - 2024-06-03
|
|
4
20
|
|
|
5
21
|
* Tableau: add extra fields to optimise storage
|
|
@@ -5,7 +5,7 @@ import requests
|
|
|
5
5
|
|
|
6
6
|
logger = logging.getLogger(__name__)
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
DEFAULT_TIMEOUT_S = 30
|
|
9
9
|
|
|
10
10
|
# https://requests.readthedocs.io/en/latest/api/#requests.request
|
|
11
11
|
HttpMethod = Literal["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"]
|
|
@@ -20,7 +20,7 @@ class APIClient:
|
|
|
20
20
|
def __init__(self, host: str, token: Optional[str] = None):
|
|
21
21
|
self._host = host
|
|
22
22
|
self._token = token or ""
|
|
23
|
-
self._timeout =
|
|
23
|
+
self._timeout = DEFAULT_TIMEOUT_S
|
|
24
24
|
|
|
25
25
|
@staticmethod
|
|
26
26
|
def build_url(host: str, path: str):
|
|
@@ -44,7 +44,12 @@ class APIClient:
|
|
|
44
44
|
) -> Any:
|
|
45
45
|
logger.debug(f"Calling {method} on {url}")
|
|
46
46
|
result = requests.request(
|
|
47
|
-
method,
|
|
47
|
+
method,
|
|
48
|
+
url,
|
|
49
|
+
headers=self._headers(),
|
|
50
|
+
params=params,
|
|
51
|
+
json=data,
|
|
52
|
+
timeout=self._timeout,
|
|
48
53
|
)
|
|
49
54
|
result.raise_for_status()
|
|
50
55
|
|
castor_extractor/utils/retry.py
CHANGED
|
@@ -68,7 +68,8 @@ class Retry(BaseModel):
|
|
|
68
68
|
self._retry_attempts += 1
|
|
69
69
|
wait_ms = self.base() + self.jitter()
|
|
70
70
|
wait_s = float(wait_ms) / MS_IN_SEC
|
|
71
|
-
|
|
71
|
+
msg = f"Attempting a new call in {wait_s} seconds, {self._retry_attempts} attempt(s) / {self.max_retries} max retries"
|
|
72
|
+
logger.warning(msg)
|
|
72
73
|
time.sleep(wait_s)
|
|
73
74
|
return True
|
|
74
75
|
|
|
@@ -93,6 +94,7 @@ def retry(
|
|
|
93
94
|
try:
|
|
94
95
|
return None, callable(*args, **kwargs)
|
|
95
96
|
except exceptions_ as err:
|
|
97
|
+
logger.warning(f"Exception within {callable.__name__}")
|
|
96
98
|
return err, None
|
|
97
99
|
|
|
98
100
|
def _func(*args, **kwargs) -> Any:
|
|
@@ -31,8 +31,11 @@ _TSC_ASSETS = (
|
|
|
31
31
|
# increase the value when extraction is too slow
|
|
32
32
|
# decrease the value when timeouts arise
|
|
33
33
|
_CUSTOM_PAGE_SIZE: Dict[TableauRevampAsset, int] = {
|
|
34
|
-
#
|
|
35
|
-
|
|
34
|
+
# for some clients, extraction of columns tend to hit the node limit
|
|
35
|
+
# https://community.tableau.com/s/question/0D54T00000YuK60SAF/metadata-query-nodelimitexceeded-error
|
|
36
|
+
# the workaround is to reduce pagination
|
|
37
|
+
TableauRevampAsset.COLUMN: 50,
|
|
38
|
+
# fields are light but volumes are bigger
|
|
36
39
|
TableauRevampAsset.FIELD: 1000,
|
|
37
40
|
TableauRevampAsset.TABLE: 50,
|
|
38
41
|
}
|
|
@@ -63,12 +63,21 @@ downstreamWorkbooks { id }
|
|
|
63
63
|
id
|
|
64
64
|
name
|
|
65
65
|
... on DatabaseTable {
|
|
66
|
-
connectionType
|
|
67
66
|
fullName
|
|
68
67
|
schema
|
|
68
|
+
database {
|
|
69
|
+
connectionType
|
|
70
|
+
id
|
|
71
|
+
name
|
|
72
|
+
}
|
|
69
73
|
}
|
|
70
74
|
... on CustomSQLTable {
|
|
71
75
|
query
|
|
76
|
+
database {
|
|
77
|
+
connectionType
|
|
78
|
+
id
|
|
79
|
+
name
|
|
80
|
+
}
|
|
72
81
|
}
|
|
73
82
|
"""
|
|
74
83
|
|
|
@@ -7,6 +7,8 @@ from ...types import ExternalAsset, classproperty
|
|
|
7
7
|
class WarehouseAsset(ExternalAsset):
|
|
8
8
|
"""Assets that can be extracted from warehouses"""
|
|
9
9
|
|
|
10
|
+
ADDITIONAL_COLUMN_LINEAGE = "additional_column_lineage"
|
|
11
|
+
ADDITIONAL_TABLE_LINEAGE = "additional_table_lineage"
|
|
10
12
|
COLUMN = "column"
|
|
11
13
|
COLUMN_LINEAGE = "column_lineage" # specific to snowflake
|
|
12
14
|
DATABASE = "database"
|
|
@@ -19,12 +21,15 @@ class WarehouseAsset(ExternalAsset):
|
|
|
19
21
|
ROLE = "role"
|
|
20
22
|
SCHEMA = "schema"
|
|
21
23
|
TABLE = "table"
|
|
24
|
+
FUNCTION = "function"
|
|
22
25
|
USER = "user"
|
|
23
26
|
VIEW_DDL = "view_ddl"
|
|
24
27
|
|
|
25
28
|
@classproperty
|
|
26
29
|
def optional(cls) -> Set["WarehouseAsset"]:
|
|
27
30
|
return {
|
|
31
|
+
WarehouseAsset.ADDITIONAL_COLUMN_LINEAGE,
|
|
32
|
+
WarehouseAsset.ADDITIONAL_TABLE_LINEAGE,
|
|
28
33
|
WarehouseAsset.EXTERNAL_COLUMN_LINEAGE,
|
|
29
34
|
WarehouseAsset.EXTERNAL_TABLE_LINEAGE,
|
|
30
35
|
}
|
|
@@ -33,8 +38,10 @@ class WarehouseAsset(ExternalAsset):
|
|
|
33
38
|
class WarehouseAssetGroup(Enum):
|
|
34
39
|
"""Groups of assets that can be extracted together"""
|
|
35
40
|
|
|
41
|
+
ADDITIONAL_LINEAGE = "additional_lineage"
|
|
36
42
|
CATALOG = "catalog"
|
|
37
43
|
EXTERNAL_LINEAGE = "external_lineage"
|
|
44
|
+
FUNCTION = "function"
|
|
38
45
|
QUERY = "query"
|
|
39
46
|
ROLE = "role"
|
|
40
47
|
SNOWFLAKE_LINEAGE = "snowflake_lineage"
|
|
@@ -53,6 +60,7 @@ CATALOG_ASSETS = (
|
|
|
53
60
|
)
|
|
54
61
|
|
|
55
62
|
# shared by technologies supporting queries
|
|
63
|
+
FUNCTIONS_ASSETS = (WarehouseAsset.FUNCTION,)
|
|
56
64
|
QUERIES_ASSETS = (WarehouseAsset.QUERY,)
|
|
57
65
|
VIEWS_ASSETS = (WarehouseAsset.VIEW_DDL,)
|
|
58
66
|
|
|
@@ -61,6 +69,11 @@ EXTERNAL_LINEAGE_ASSETS = (
|
|
|
61
69
|
WarehouseAsset.EXTERNAL_TABLE_LINEAGE,
|
|
62
70
|
)
|
|
63
71
|
|
|
72
|
+
ADDITIONAL_LINEAGE_ASSETS = (
|
|
73
|
+
WarehouseAsset.ADDITIONAL_COLUMN_LINEAGE,
|
|
74
|
+
WarehouseAsset.ADDITIONAL_TABLE_LINEAGE,
|
|
75
|
+
)
|
|
76
|
+
|
|
64
77
|
NON_EXTRACTABLE_ASSETS = {WarehouseAssetGroup.EXTERNAL_LINEAGE}
|
|
65
78
|
|
|
66
79
|
|
|
@@ -1,18 +1,38 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
2
3
|
from datetime import date
|
|
3
4
|
from functools import partial
|
|
4
|
-
from typing import Any, Dict, List, Optional, Set
|
|
5
|
+
from typing import Any, Dict, List, Optional, Set, Tuple, cast
|
|
5
6
|
|
|
6
|
-
|
|
7
|
+
import requests
|
|
8
|
+
|
|
9
|
+
from ...utils import (
|
|
10
|
+
SafeMode,
|
|
11
|
+
at_midnight,
|
|
12
|
+
date_after,
|
|
13
|
+
mapping_from_rows,
|
|
14
|
+
retry,
|
|
15
|
+
safe_mode,
|
|
16
|
+
)
|
|
7
17
|
from ...utils.client.api import APIClient
|
|
8
18
|
from ...utils.pager import PagerOnToken
|
|
9
19
|
from ..abstract.time_filter import TimeFilter
|
|
10
20
|
from .credentials import DatabricksCredentials
|
|
11
21
|
from .format import DatabricksFormatter
|
|
12
|
-
from .types import TablesColumns
|
|
22
|
+
from .types import Link, Ostr, OTimestampedLink, TablesColumns, TimestampedLink
|
|
13
23
|
|
|
14
24
|
logger = logging.getLogger(__name__)
|
|
15
25
|
|
|
26
|
+
_MAX_NUMBER_OF_LINEAGE_ERRORS = 1000
|
|
27
|
+
_MAX_THREADS = 10
|
|
28
|
+
_RETRY_ATTEMPTS = 3
|
|
29
|
+
_RETRY_BASE_MS = 1000
|
|
30
|
+
_RETRY_EXCEPTIONS = [
|
|
31
|
+
requests.exceptions.ConnectTimeout,
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
safe_params = SafeMode((BaseException,), _MAX_NUMBER_OF_LINEAGE_ERRORS)
|
|
35
|
+
|
|
16
36
|
|
|
17
37
|
def _day_to_epoch_ms(day: date) -> int:
|
|
18
38
|
return int(at_midnight(day).timestamp() * 1000)
|
|
@@ -22,6 +42,30 @@ def _day_hour_to_epoch_ms(day: date, hour: int) -> int:
|
|
|
22
42
|
return int(at_midnight(day).timestamp() * 1000) + (hour * 3600 * 1000)
|
|
23
43
|
|
|
24
44
|
|
|
45
|
+
class LineageLinks:
|
|
46
|
+
"""
|
|
47
|
+
helper class that handles lineage deduplication and filtering
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
def __init__(self):
|
|
51
|
+
self.lineage: Dict[Link, Ostr] = dict()
|
|
52
|
+
|
|
53
|
+
def add(self, timestamped_link: TimestampedLink) -> None:
|
|
54
|
+
"""
|
|
55
|
+
keep the most recent lineage link, adding to `self.lineage`
|
|
56
|
+
"""
|
|
57
|
+
parent, child, timestamp = timestamped_link
|
|
58
|
+
link = (parent, child)
|
|
59
|
+
if not self.lineage.get(link):
|
|
60
|
+
self.lineage[link] = timestamp
|
|
61
|
+
else:
|
|
62
|
+
if not timestamp:
|
|
63
|
+
return
|
|
64
|
+
# keep most recent link; cast for mypy
|
|
65
|
+
recent = max(cast(str, self.lineage[link]), cast(str, timestamp))
|
|
66
|
+
self.lineage[link] = recent
|
|
67
|
+
|
|
68
|
+
|
|
25
69
|
class DatabricksClient(APIClient):
|
|
26
70
|
"""Databricks Client"""
|
|
27
71
|
|
|
@@ -123,6 +167,198 @@ class DatabricksClient(APIClient):
|
|
|
123
167
|
columns.extend(c_to_add)
|
|
124
168
|
return tables, columns
|
|
125
169
|
|
|
170
|
+
@staticmethod
|
|
171
|
+
def _to_table_path(table: dict) -> Ostr:
|
|
172
|
+
if table.get("name"):
|
|
173
|
+
return f"{table['catalog_name']}.{table['schema_name']}.{table['name']}"
|
|
174
|
+
return None
|
|
175
|
+
|
|
176
|
+
@staticmethod
|
|
177
|
+
def _to_column_path(column: dict) -> Ostr:
|
|
178
|
+
if column.get("name"):
|
|
179
|
+
return f"{column['catalog_name']}.{column['schema_name']}.{column['table_name']}.{column['name']}"
|
|
180
|
+
return None
|
|
181
|
+
|
|
182
|
+
def _link(
|
|
183
|
+
self, path_from: Ostr, path_to: Ostr, timestamp: Ostr
|
|
184
|
+
) -> OTimestampedLink:
|
|
185
|
+
"""exclude missing path and self-lineage"""
|
|
186
|
+
if (not path_from) or (not path_to):
|
|
187
|
+
return None
|
|
188
|
+
is_self_lineage = path_from.lower() == path_to.lower()
|
|
189
|
+
if is_self_lineage:
|
|
190
|
+
return None
|
|
191
|
+
return (path_from, path_to, timestamp)
|
|
192
|
+
|
|
193
|
+
def _single_table_lineage_links(
|
|
194
|
+
self, table_path: str, single_table_lineage: dict
|
|
195
|
+
) -> List[TimestampedLink]:
|
|
196
|
+
"""
|
|
197
|
+
process databricks lineage API response for a given table
|
|
198
|
+
returns a list of (parent, child, timestamp)
|
|
199
|
+
|
|
200
|
+
Note: in `upstreams` or `downstreams` we only care about `tableInfo`,
|
|
201
|
+
we could also have `notebookInfos` or `fileInfo`
|
|
202
|
+
"""
|
|
203
|
+
links: List[OTimestampedLink] = []
|
|
204
|
+
# add parent:
|
|
205
|
+
for link in single_table_lineage.get("upstreams", []):
|
|
206
|
+
parent = link.get("tableInfo", {})
|
|
207
|
+
parent_path = self._to_table_path(parent)
|
|
208
|
+
timestamp: Ostr = parent.get("lineage_timestamp")
|
|
209
|
+
links.append(self._link(parent_path, table_path, timestamp))
|
|
210
|
+
|
|
211
|
+
# add children:
|
|
212
|
+
for link in single_table_lineage.get("downstreams", []):
|
|
213
|
+
child = link.get("tableInfo", {})
|
|
214
|
+
child_path = self._to_table_path(child)
|
|
215
|
+
timestamp = child.get("lineage_timestamp")
|
|
216
|
+
links.append(self._link(table_path, child_path, timestamp))
|
|
217
|
+
|
|
218
|
+
return list(filter(None, links))
|
|
219
|
+
|
|
220
|
+
@safe_mode(safe_params, lambda: [])
|
|
221
|
+
@retry(
|
|
222
|
+
exceptions=_RETRY_EXCEPTIONS,
|
|
223
|
+
max_retries=_RETRY_ATTEMPTS,
|
|
224
|
+
base_ms=_RETRY_BASE_MS,
|
|
225
|
+
)
|
|
226
|
+
def get_single_table_lineage(
|
|
227
|
+
self, table_path: str
|
|
228
|
+
) -> List[TimestampedLink]:
|
|
229
|
+
"""
|
|
230
|
+
Helper function used in get_lineage_links.
|
|
231
|
+
Call data lineage API and return the content of the result
|
|
232
|
+
eg table_path: broward_prd.bronze.account_adjustments
|
|
233
|
+
FYI: Maximum rate of 50 requests per SECOND
|
|
234
|
+
"""
|
|
235
|
+
path = "api/2.0/lineage-tracking/table-lineage"
|
|
236
|
+
payload = {"table_name": table_path, "include_entity_lineage": True}
|
|
237
|
+
content = self.get(path=path, payload=payload)
|
|
238
|
+
return self._single_table_lineage_links(table_path, content)
|
|
239
|
+
|
|
240
|
+
def _deduplicate_lineage(self, lineages: List[TimestampedLink]) -> dict:
|
|
241
|
+
deduplicated_lineage = LineageLinks()
|
|
242
|
+
for timestamped_link in lineages:
|
|
243
|
+
deduplicated_lineage.add(timestamped_link)
|
|
244
|
+
return deduplicated_lineage.lineage
|
|
245
|
+
|
|
246
|
+
def table_lineage(self, tables: List[dict]) -> List[dict]:
|
|
247
|
+
"""
|
|
248
|
+
Wrapper function that retrieves all table lineage
|
|
249
|
+
"""
|
|
250
|
+
# retrieve table lineage
|
|
251
|
+
with ThreadPoolExecutor(max_workers=_MAX_THREADS) as executor:
|
|
252
|
+
table_paths = [
|
|
253
|
+
".".join([table["schema_id"], table["table_name"]])
|
|
254
|
+
for table in tables
|
|
255
|
+
]
|
|
256
|
+
results = executor.map(self.get_single_table_lineage, table_paths)
|
|
257
|
+
lineages = [link for links in results for link in links]
|
|
258
|
+
deduplicated = self._deduplicate_lineage(lineages)
|
|
259
|
+
return self.formatter.format_lineage(deduplicated)
|
|
260
|
+
|
|
261
|
+
@staticmethod
|
|
262
|
+
def _paths_for_column_lineage(
|
|
263
|
+
tables: List[dict], columns: List[dict], table_lineage: List[dict]
|
|
264
|
+
) -> List[Tuple[str, str]]:
|
|
265
|
+
"""
|
|
266
|
+
helper providing a list of candidate columns to look lineage for:
|
|
267
|
+
we only look for column lineage where there is table lineage
|
|
268
|
+
"""
|
|
269
|
+
# mapping between table id and its path db.schema.table
|
|
270
|
+
# table["schema_id"] follows the pattern `db.schema`
|
|
271
|
+
mapping = {
|
|
272
|
+
table["id"]: ".".join([table["schema_id"], table["table_name"]])
|
|
273
|
+
for table in tables
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
tables_with_lineage: Set[str] = set()
|
|
277
|
+
for t in table_lineage:
|
|
278
|
+
tables_with_lineage.add(t["parent_path"])
|
|
279
|
+
tables_with_lineage.add(t["child_path"])
|
|
280
|
+
|
|
281
|
+
paths_to_return: List[Tuple[str, str]] = []
|
|
282
|
+
for column in columns:
|
|
283
|
+
table_path = mapping[column["table_id"]]
|
|
284
|
+
if table_path not in tables_with_lineage:
|
|
285
|
+
continue
|
|
286
|
+
column_ = (table_path, column["column_name"])
|
|
287
|
+
paths_to_return.append(column_)
|
|
288
|
+
|
|
289
|
+
return paths_to_return
|
|
290
|
+
|
|
291
|
+
def _single_column_lineage_links(
|
|
292
|
+
self, column_path: str, single_column_lineage: dict
|
|
293
|
+
) -> List[TimestampedLink]:
|
|
294
|
+
"""
|
|
295
|
+
process databricks lineage API response for a given table
|
|
296
|
+
returns a list of (parent, child, timestamp)
|
|
297
|
+
|
|
298
|
+
Note: in `upstreams` or `downstreams` we only care about `tableInfo`,
|
|
299
|
+
we could also have `notebookInfos` or `fileInfo`
|
|
300
|
+
"""
|
|
301
|
+
links: List[OTimestampedLink] = []
|
|
302
|
+
# add parent:
|
|
303
|
+
for link in single_column_lineage.get("upstream_cols", []):
|
|
304
|
+
parent_path = self._to_column_path(link)
|
|
305
|
+
timestamp: Ostr = link.get("lineage_timestamp")
|
|
306
|
+
links.append(self._link(parent_path, column_path, timestamp))
|
|
307
|
+
|
|
308
|
+
# add children:
|
|
309
|
+
for link in single_column_lineage.get("downstream_cols", []):
|
|
310
|
+
child_path = self._to_column_path(link)
|
|
311
|
+
timestamp = link.get("lineage_timestamp")
|
|
312
|
+
links.append(self._link(column_path, child_path, timestamp))
|
|
313
|
+
|
|
314
|
+
return list(filter(None, links))
|
|
315
|
+
|
|
316
|
+
@safe_mode(safe_params, lambda: [])
|
|
317
|
+
@retry(
|
|
318
|
+
exceptions=_RETRY_EXCEPTIONS,
|
|
319
|
+
max_retries=_RETRY_ATTEMPTS,
|
|
320
|
+
base_ms=_RETRY_BASE_MS,
|
|
321
|
+
)
|
|
322
|
+
def get_single_column_lineage(
|
|
323
|
+
self,
|
|
324
|
+
names: Tuple[str, str],
|
|
325
|
+
) -> List[TimestampedLink]:
|
|
326
|
+
"""
|
|
327
|
+
Helper function used in get_lineage_links.
|
|
328
|
+
Call data lineage API and return the content of the result
|
|
329
|
+
|
|
330
|
+
eg table_path: broward_prd.bronze.account_adjustments
|
|
331
|
+
FYI: Maximum rate of 10 requests per SECOND
|
|
332
|
+
"""
|
|
333
|
+
table_path, column_name = names
|
|
334
|
+
api_path = "api/2.0/lineage-tracking/column-lineage"
|
|
335
|
+
payload = {
|
|
336
|
+
"table_name": table_path,
|
|
337
|
+
"column_name": column_name,
|
|
338
|
+
"include_entity_lineage": True,
|
|
339
|
+
}
|
|
340
|
+
content = self.get(path=api_path, payload=payload)
|
|
341
|
+
column_path = f"{table_path}.{column_name}"
|
|
342
|
+
return self._single_column_lineage_links(column_path, content)
|
|
343
|
+
|
|
344
|
+
def column_lineage(
|
|
345
|
+
self, tables: List[dict], columns: List[dict], table_lineage: List[dict]
|
|
346
|
+
) -> List[dict]:
|
|
347
|
+
"""
|
|
348
|
+
Wrapper function that retrieves all column lineage
|
|
349
|
+
we only try to retrieve column lineage if we found table lineage
|
|
350
|
+
"""
|
|
351
|
+
candidate_paths = self._paths_for_column_lineage(
|
|
352
|
+
tables, columns, table_lineage
|
|
353
|
+
)
|
|
354
|
+
lineages: List[TimestampedLink] = [
|
|
355
|
+
link
|
|
356
|
+
for paths in candidate_paths
|
|
357
|
+
for link in self.get_single_column_lineage(paths)
|
|
358
|
+
]
|
|
359
|
+
deduplicated = self._deduplicate_lineage(lineages)
|
|
360
|
+
return self.formatter.format_lineage(deduplicated)
|
|
361
|
+
|
|
126
362
|
@staticmethod
|
|
127
363
|
def _time_filter(time_filter: Optional[TimeFilter]) -> dict:
|
|
128
364
|
"""time filter to retrieve Databricks' queries"""
|
|
@@ -1,9 +1,16 @@
|
|
|
1
1
|
from datetime import date
|
|
2
|
+
from unittest.mock import Mock, patch
|
|
2
3
|
|
|
3
4
|
from freezegun import freeze_time
|
|
4
5
|
|
|
5
6
|
from ..abstract.time_filter import TimeFilter
|
|
6
|
-
from .client import DatabricksClient, _day_hour_to_epoch_ms
|
|
7
|
+
from .client import DatabricksClient, LineageLinks, _day_hour_to_epoch_ms
|
|
8
|
+
from .test_constants import (
|
|
9
|
+
CLOSER_DATE,
|
|
10
|
+
MOCK_TABLES_FOR_TABLE_LINEAGE,
|
|
11
|
+
OLDER_DATE,
|
|
12
|
+
TABLE_LINEAGE_SIDE_EFFECT,
|
|
13
|
+
)
|
|
7
14
|
|
|
8
15
|
|
|
9
16
|
def test__day_hour_to_epoch_ms():
|
|
@@ -97,3 +104,56 @@ def test_DatabricksClient__match_table_with_user():
|
|
|
97
104
|
table_without_owner = {"id": 1, "owner_email": None}
|
|
98
105
|
actual = client._match_table_with_user(table_without_owner, user_mapping)
|
|
99
106
|
assert actual == table_without_owner
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@patch(
|
|
110
|
+
"source.packages.extractor.castor_extractor.warehouse.databricks.client.DatabricksClient.get",
|
|
111
|
+
side_effect=TABLE_LINEAGE_SIDE_EFFECT,
|
|
112
|
+
)
|
|
113
|
+
def test_DatabricksClient_table_lineage(mock_get):
|
|
114
|
+
client = DatabricksClient(Mock())
|
|
115
|
+
|
|
116
|
+
lineage = client.table_lineage(MOCK_TABLES_FOR_TABLE_LINEAGE)
|
|
117
|
+
assert len(lineage) == 2
|
|
118
|
+
|
|
119
|
+
expected_link_1 = {
|
|
120
|
+
"parent_path": "dev.silver.pre_analytics",
|
|
121
|
+
"child_path": "dev.silver.analytics",
|
|
122
|
+
"timestamp": OLDER_DATE,
|
|
123
|
+
}
|
|
124
|
+
expected_link_2 = {
|
|
125
|
+
"parent_path": "dev.bronze.analytics",
|
|
126
|
+
"child_path": "dev.silver.analytics",
|
|
127
|
+
"timestamp": CLOSER_DATE,
|
|
128
|
+
}
|
|
129
|
+
assert expected_link_1 in lineage
|
|
130
|
+
assert expected_link_2 in lineage
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def test_LineageLinks_add():
|
|
134
|
+
links = LineageLinks()
|
|
135
|
+
timestamped_link = ("parent", "child", None)
|
|
136
|
+
expected_key = ("parent", "child")
|
|
137
|
+
|
|
138
|
+
links.add(timestamped_link)
|
|
139
|
+
|
|
140
|
+
assert expected_key in links.lineage
|
|
141
|
+
assert links.lineage[expected_key] is None
|
|
142
|
+
|
|
143
|
+
# we replace None by an actual timestamp
|
|
144
|
+
timestamped_link = ("parent", "child", OLDER_DATE)
|
|
145
|
+
links.add(timestamped_link)
|
|
146
|
+
assert expected_key in links.lineage
|
|
147
|
+
assert links.lineage[expected_key] == OLDER_DATE
|
|
148
|
+
|
|
149
|
+
# we update with the more recent timestamp
|
|
150
|
+
timestamped_link = ("parent", "child", CLOSER_DATE)
|
|
151
|
+
links.add(timestamped_link)
|
|
152
|
+
assert expected_key in links.lineage
|
|
153
|
+
assert links.lineage[expected_key] == CLOSER_DATE
|
|
154
|
+
|
|
155
|
+
# we keep the more recent timestamp
|
|
156
|
+
timestamped_link = ("parent", "child", OLDER_DATE)
|
|
157
|
+
links.add(timestamped_link)
|
|
158
|
+
assert expected_key in links.lineage
|
|
159
|
+
assert links.lineage[expected_key] == CLOSER_DATE
|
|
@@ -3,6 +3,7 @@ from typing import Dict, Optional
|
|
|
3
3
|
|
|
4
4
|
from ...utils import AbstractStorage, LocalStorage, write_summary
|
|
5
5
|
from ..abstract import (
|
|
6
|
+
ADDITIONAL_LINEAGE_ASSETS,
|
|
6
7
|
CATALOG_ASSETS,
|
|
7
8
|
EXTERNAL_LINEAGE_ASSETS,
|
|
8
9
|
QUERIES_ASSETS,
|
|
@@ -17,6 +18,7 @@ from .client import DatabricksClient
|
|
|
17
18
|
from .credentials import to_credentials
|
|
18
19
|
|
|
19
20
|
DATABRICKS_ASSETS: SupportedAssets = {
|
|
21
|
+
WarehouseAssetGroup.ADDITIONAL_LINEAGE: ADDITIONAL_LINEAGE_ASSETS,
|
|
20
22
|
WarehouseAssetGroup.CATALOG: CATALOG_ASSETS,
|
|
21
23
|
WarehouseAssetGroup.QUERY: QUERIES_ASSETS,
|
|
22
24
|
WarehouseAssetGroup.ROLE: (WarehouseAsset.USER,),
|
|
@@ -94,6 +96,39 @@ class DatabricksExtractionProcessor:
|
|
|
94
96
|
logger.info(f"Extracted {len(columns)} columns to {location}")
|
|
95
97
|
return catalog_locations
|
|
96
98
|
|
|
99
|
+
def extract_lineage(self) -> Paths:
|
|
100
|
+
if self._should_not_reextract(WarehouseAssetGroup.ADDITIONAL_LINEAGE):
|
|
101
|
+
return self._existing_group_paths(
|
|
102
|
+
WarehouseAssetGroup.ADDITIONAL_LINEAGE
|
|
103
|
+
)
|
|
104
|
+
lineage_locations: Dict[str, str] = dict()
|
|
105
|
+
|
|
106
|
+
# extract catalog
|
|
107
|
+
databases = self._client.databases()
|
|
108
|
+
schemas = self._client.schemas(databases)
|
|
109
|
+
users = self._client.users()
|
|
110
|
+
tables, columns = self._client.tables_and_columns(schemas, users)
|
|
111
|
+
logger.info("Extracted pre-requisite catalog. Next comes lineage")
|
|
112
|
+
|
|
113
|
+
# extract table lineage
|
|
114
|
+
table_lineage = self._client.table_lineage(tables)
|
|
115
|
+
table_lineage_key = WarehouseAsset.ADDITIONAL_TABLE_LINEAGE.value
|
|
116
|
+
location = self._storage.put(table_lineage_key, table_lineage)
|
|
117
|
+
lineage_locations[table_lineage_key] = location
|
|
118
|
+
msg = f"Extracted {len(table_lineage)} table lineage to {location}"
|
|
119
|
+
logger.info(msg)
|
|
120
|
+
|
|
121
|
+
# extract column lineage
|
|
122
|
+
column_lineage = self._client.column_lineage(
|
|
123
|
+
tables, columns, table_lineage
|
|
124
|
+
)
|
|
125
|
+
column_lineage_key = WarehouseAsset.ADDITIONAL_COLUMN_LINEAGE.value
|
|
126
|
+
location = self._storage.put(column_lineage_key, column_lineage)
|
|
127
|
+
lineage_locations[column_lineage_key] = location
|
|
128
|
+
msg = f"Extracted {len(column_lineage)} column lineage to {location}"
|
|
129
|
+
logger.info(msg)
|
|
130
|
+
return lineage_locations
|
|
131
|
+
|
|
97
132
|
def extract_query(self, time_filter: OTimeFilter = None) -> Paths:
|
|
98
133
|
"""extract yesterday's queries and return their location"""
|
|
99
134
|
if self._should_not_reextract(WarehouseAssetGroup.QUERY):
|
|
@@ -149,6 +184,7 @@ def extract_all(**kwargs) -> None:
|
|
|
149
184
|
)
|
|
150
185
|
|
|
151
186
|
extractor.extract_catalog()
|
|
187
|
+
extractor.extract_lineage()
|
|
152
188
|
extractor.extract_query()
|
|
153
189
|
extractor.extract_role()
|
|
154
190
|
extractor.extract_view_ddl()
|
|
@@ -95,6 +95,19 @@ class DatabricksFormatter:
|
|
|
95
95
|
|
|
96
96
|
return tables, columns
|
|
97
97
|
|
|
98
|
+
@staticmethod
|
|
99
|
+
def format_lineage(timestamps: dict) -> List[dict]:
|
|
100
|
+
lineage: List[dict] = []
|
|
101
|
+
for link, timestamp in timestamps.items():
|
|
102
|
+
parent_path, child_path = link
|
|
103
|
+
link_ = {
|
|
104
|
+
"parent_path": parent_path,
|
|
105
|
+
"child_path": child_path,
|
|
106
|
+
"timestamp": timestamp,
|
|
107
|
+
}
|
|
108
|
+
lineage.append(link_)
|
|
109
|
+
return lineage
|
|
110
|
+
|
|
98
111
|
@staticmethod
|
|
99
112
|
def format_query(raw_queries: List[dict]) -> List[dict]:
|
|
100
113
|
queries = []
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
OLDER_DATE = "2024-04-18 20:20:20.0"
|
|
2
|
+
CLOSER_DATE = "2024-04-19 20:20:20.0"
|
|
3
|
+
|
|
4
|
+
MOCK_TABLES_FOR_TABLE_LINEAGE = [
|
|
5
|
+
{
|
|
6
|
+
"id": "f51ba2ca-8cc3-4de6-8f8b-730359e8f40f",
|
|
7
|
+
"schema_id": "dev.silver",
|
|
8
|
+
"table_name": "analytics",
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
"id": "4e140bdc-a67c-4b68-8a07-c684657d8b44",
|
|
12
|
+
"schema_id": "dev.silver",
|
|
13
|
+
"table_name": "pre_analytics",
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
"id": "7d403198-55ea-4a40-9995-6ee2f4c79dfa",
|
|
17
|
+
"schema_id": "dev.bronze",
|
|
18
|
+
"table_name": "analytics",
|
|
19
|
+
},
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
_RAW_LINEAGE_DEV_SILVER_ANALYTICS = {
|
|
23
|
+
"upstreams": [
|
|
24
|
+
{ # there could be other keys: jobInfos, notebookInfos, queryInfos
|
|
25
|
+
"tableInfo": {
|
|
26
|
+
"name": "pre_analytics",
|
|
27
|
+
"catalog_name": "dev",
|
|
28
|
+
"schema_name": "silver",
|
|
29
|
+
"table_type": "PERSISTED_VIEW", # not used
|
|
30
|
+
"lineage_timestamp": OLDER_DATE,
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
"tableInfo": {
|
|
35
|
+
"name": "analytics",
|
|
36
|
+
"catalog_name": "dev",
|
|
37
|
+
"schema_name": "bronze",
|
|
38
|
+
"table_type": "PERSISTED_VIEW", # not used
|
|
39
|
+
"lineage_timestamp": CLOSER_DATE,
|
|
40
|
+
}
|
|
41
|
+
},
|
|
42
|
+
],
|
|
43
|
+
"downstreams": [],
|
|
44
|
+
}
|
|
45
|
+
_RAW_LINEAGE_DEV_SILVER_PRE_ANALYTICS = {
|
|
46
|
+
"upstreams": [],
|
|
47
|
+
"downstreams": [
|
|
48
|
+
{
|
|
49
|
+
"tableInfo": {
|
|
50
|
+
"name": "analytics",
|
|
51
|
+
"catalog_name": "dev",
|
|
52
|
+
"schema_name": "silver",
|
|
53
|
+
"table_type": "PERSISTED_VIEW", # not used
|
|
54
|
+
"lineage_timestamp": OLDER_DATE,
|
|
55
|
+
}
|
|
56
|
+
},
|
|
57
|
+
],
|
|
58
|
+
}
|
|
59
|
+
_RAW_LINEAGE_DEV_BRONZE_ANALYTICS = {
|
|
60
|
+
"upstreams": [],
|
|
61
|
+
"downstreams": [
|
|
62
|
+
{
|
|
63
|
+
"tableInfo": {
|
|
64
|
+
"name": "analytics",
|
|
65
|
+
"catalog_name": "dev",
|
|
66
|
+
"schema_name": "silver",
|
|
67
|
+
"table_type": "PERSISTED_VIEW", # not used
|
|
68
|
+
"lineage_timestamp": OLDER_DATE,
|
|
69
|
+
}
|
|
70
|
+
},
|
|
71
|
+
],
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
# should be in the same order as MOCK_TABLES_FOR_TABLE_LINEAGE
|
|
75
|
+
TABLE_LINEAGE_SIDE_EFFECT: tuple = (
|
|
76
|
+
_RAW_LINEAGE_DEV_SILVER_ANALYTICS,
|
|
77
|
+
_RAW_LINEAGE_DEV_SILVER_PRE_ANALYTICS,
|
|
78
|
+
_RAW_LINEAGE_DEV_BRONZE_ANALYTICS,
|
|
79
|
+
)
|
|
@@ -4,6 +4,7 @@ from ...utils import LocalStorage, from_env, write_summary
|
|
|
4
4
|
from ..abstract import (
|
|
5
5
|
CATALOG_ASSETS,
|
|
6
6
|
EXTERNAL_LINEAGE_ASSETS,
|
|
7
|
+
FUNCTIONS_ASSETS,
|
|
7
8
|
QUERIES_ASSETS,
|
|
8
9
|
VIEWS_ASSETS,
|
|
9
10
|
SQLExtractionProcessor,
|
|
@@ -20,6 +21,7 @@ logger = logging.getLogger(__name__)
|
|
|
20
21
|
|
|
21
22
|
SNOWFLAKE_ASSETS: SupportedAssets = {
|
|
22
23
|
WarehouseAssetGroup.CATALOG: CATALOG_ASSETS,
|
|
24
|
+
WarehouseAssetGroup.FUNCTION: FUNCTIONS_ASSETS,
|
|
23
25
|
WarehouseAssetGroup.QUERY: QUERIES_ASSETS,
|
|
24
26
|
WarehouseAssetGroup.VIEW_DDL: VIEWS_ASSETS,
|
|
25
27
|
WarehouseAssetGroup.ROLE: (
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
SELECT
|
|
2
|
+
f.function_name AS name,
|
|
3
|
+
CONCAT(f.function_catalog, '.', f.function_schema, '.', f.function_name) AS path,
|
|
4
|
+
f.argument_signature AS signature,
|
|
5
|
+
f.function_definition AS definition
|
|
6
|
+
FROM snowflake.account_usage.functions f
|
|
7
|
+
WHERE TRUE
|
|
8
|
+
AND f.function_catalog NOT IN ('SNOWFLAKE', 'UTIL_DB')
|
|
9
|
+
AND f.function_language = 'SQL'
|
|
10
|
+
AND deleted IS NULL
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=QYFobUPMbdi6cidq_yU-oMbXWoAr1BjTE6thfdZ9tA4,10866
|
|
2
2
|
Dockerfile,sha256=HcX5z8OpeSvkScQsN-Y7CNMUig_UB6vTMDl7uqzuLGE,303
|
|
3
3
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
4
4
|
README.md,sha256=uF6PXm9ocPITlKVSh9afTakHmpLx3TvawLf-CbMP3wM,3578
|
|
@@ -47,7 +47,7 @@ castor_extractor/uploader/utils.py,sha256=NCe0tkB28BVhqzOaDhDjaSfODjjcPWB17X6chn
|
|
|
47
47
|
castor_extractor/utils/__init__.py,sha256=bmzAOc-PKsVreMJtF7DGpPQeHrVqxWel_BblRftt6Ag,1186
|
|
48
48
|
castor_extractor/utils/client/__init__.py,sha256=CRE-xJKm6fVV9dB8ljzB5YoOxX4I1sCD1KSgqs3Y8_Y,161
|
|
49
49
|
castor_extractor/utils/client/abstract.py,sha256=aA5Qcb9TwWDSMq8WpXbGkOB20hehwX2VTpqQAwV76wk,2048
|
|
50
|
-
castor_extractor/utils/client/api.py,sha256=
|
|
50
|
+
castor_extractor/utils/client/api.py,sha256=z1o4fteWx1HxNTqCYihl9sGkIgSQTbd8lW_B9Y2wyeQ,1742
|
|
51
51
|
castor_extractor/utils/client/api_test.py,sha256=NSMdXg1FLc37erqHp2FZsIsogWVv6lFSs7rDXHikr-E,542
|
|
52
52
|
castor_extractor/utils/client/postgres.py,sha256=n6ulaT222WWPY0_6qAZ0MHF0m91HtI9mMqL71nyygo0,866
|
|
53
53
|
castor_extractor/utils/client/query.py,sha256=O6D5EjD1KmBlwa786Uw4D4kzxx97_HH50xIIeSWt0B8,205
|
|
@@ -80,7 +80,7 @@ castor_extractor/utils/pager/pager_on_id_test.py,sha256=CfAXhXaAmCXnm0oflj8_82An
|
|
|
80
80
|
castor_extractor/utils/pager/pager_on_token.py,sha256=G442SKl4BXJFMPbYIIgCk5M8wl7V3jMg3K1WUUkl0I0,1579
|
|
81
81
|
castor_extractor/utils/pager/pager_on_token_test.py,sha256=w2GCUGKR3cD5lfmtFAsNvExtzxkYdBR0pusBrGKFQ08,2548
|
|
82
82
|
castor_extractor/utils/pager/pager_test.py,sha256=QPBVShSXhkiYZUfnAMs43xnys6CD8pAhL3Jhj-Ov2Xc,1705
|
|
83
|
-
castor_extractor/utils/retry.py,sha256=
|
|
83
|
+
castor_extractor/utils/retry.py,sha256=OsUS3qysHCkgWge8BgBwyuvoWcJ6pR_RQmQDcHlors4,3410
|
|
84
84
|
castor_extractor/utils/retry_test.py,sha256=nsMttlmyKygVcffX3Hay8U2S1BspkGPiCmzIXPpLKyk,2230
|
|
85
85
|
castor_extractor/utils/safe.py,sha256=jpfIimwdBSVUvU2DPFrhqpKC_DSYwxQqd08MlIkSODY,1967
|
|
86
86
|
castor_extractor/utils/safe_test.py,sha256=IHN1Z761tYMFslYC-2HAfkXmFPh4LYSqNLs4QZwykjk,2160
|
|
@@ -244,16 +244,16 @@ castor_extractor/visualization/tableau/usage.py,sha256=LlFwlbEr-EnYUJjKZha99CRCR
|
|
|
244
244
|
castor_extractor/visualization/tableau_revamp/__init__.py,sha256=a3DGjQhaz17gBqW-E84TAgupKbqLC40y5Ajo1yn-ot4,156
|
|
245
245
|
castor_extractor/visualization/tableau_revamp/assets.py,sha256=owlwaI2E4UKk1YhkaHgaAXx6gu3Op6EqZ7bjp0tHI6s,351
|
|
246
246
|
castor_extractor/visualization/tableau_revamp/client/__init__.py,sha256=wmS9uLtUiqNYVloi0-DgD8d2qzu3RVZEAtWiaDp6G_M,90
|
|
247
|
-
castor_extractor/visualization/tableau_revamp/client/client.py,sha256=
|
|
247
|
+
castor_extractor/visualization/tableau_revamp/client/client.py,sha256=RSoHDfz79ma0YJRGpiCihnwLGmoxLzphYrxRVyvByHI,9742
|
|
248
248
|
castor_extractor/visualization/tableau_revamp/client/credentials.py,sha256=fHG32egq6ll2U4BNazalMof_plzfCMQjrN9WOs6kezk,3014
|
|
249
249
|
castor_extractor/visualization/tableau_revamp/client/errors.py,sha256=dTe1shqmWmAXpDpCz-E24m8dGYjt6rvIGV9qQb4jnvI,150
|
|
250
|
-
castor_extractor/visualization/tableau_revamp/client/gql_queries.py,sha256
|
|
250
|
+
castor_extractor/visualization/tableau_revamp/client/gql_queries.py,sha256=-V3ToD5Gi7nmfVB2OxTOZw8dcOiF7_ciSWjjW2UdvvI,2270
|
|
251
251
|
castor_extractor/visualization/tableau_revamp/client/tsc_fields.py,sha256=WsDliPCo-XsQ7wN-j0gpW9bdxCHvgH-aePywiltzfbU,688
|
|
252
252
|
castor_extractor/visualization/tableau_revamp/constants.py,sha256=PcdudAogQhi3e-knalhgliMKjy5ahN0em_-7XSLrnxM,87
|
|
253
253
|
castor_extractor/visualization/tableau_revamp/extract.py,sha256=2SLUxp5okM4AcEJJ61ZgcC2ikfZZl9MH17CEXMXmgl0,1450
|
|
254
254
|
castor_extractor/warehouse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
255
|
-
castor_extractor/warehouse/abstract/__init__.py,sha256=
|
|
256
|
-
castor_extractor/warehouse/abstract/asset.py,sha256=
|
|
255
|
+
castor_extractor/warehouse/abstract/__init__.py,sha256=Fdfa026tgOo64MvzVRLHM_F2G-JmcehrF0mh3dHgb7s,419
|
|
256
|
+
castor_extractor/warehouse/abstract/asset.py,sha256=9nHL4WKUU_vRgj7u3sUdIzgI4rRpdS7YrfwNku4Gz9Q,2652
|
|
257
257
|
castor_extractor/warehouse/abstract/asset_test.py,sha256=_kd4ybNlWSAdSdEgJKC-jhJTa1nMRa9i8RO3YbqKLM4,758
|
|
258
258
|
castor_extractor/warehouse/abstract/extract.py,sha256=fVBhdE-yMI_g6RBYZcr7q-ZVW7jK7WVkO_GO_KfkRqg,2908
|
|
259
259
|
castor_extractor/warehouse/abstract/query.py,sha256=GAgeISCmAdrkTKzFGO79hQDf6SA6EFrrlW43w-LiXKo,2632
|
|
@@ -277,13 +277,14 @@ castor_extractor/warehouse/bigquery/queries/view_ddl.sql,sha256=obCm-IN9V8_YSZTw
|
|
|
277
277
|
castor_extractor/warehouse/bigquery/query.py,sha256=hrFfjd5jW2oQnZ6ozlkn-gDe6sCIzu5zSX19T9W6fIk,4162
|
|
278
278
|
castor_extractor/warehouse/bigquery/types.py,sha256=LZVWSmE57lOemNbB5hBRyYmDk9bFAU4nbRaJWALl6N8,140
|
|
279
279
|
castor_extractor/warehouse/databricks/__init__.py,sha256=bTvDxjGQGM2J3hOnVhfNmFP1y8DK0tySiD_EXe5_xWE,200
|
|
280
|
-
castor_extractor/warehouse/databricks/client.py,sha256=
|
|
281
|
-
castor_extractor/warehouse/databricks/client_test.py,sha256=
|
|
280
|
+
castor_extractor/warehouse/databricks/client.py,sha256=oHR_htE25p5tiAAFZKbF48efo7tqIENW4dAGA7yEqHg,16895
|
|
281
|
+
castor_extractor/warehouse/databricks/client_test.py,sha256=KNp4Hi_CC6GwiW2QDJQQwqALfUebuT9D_qL6FuP_8tY,5246
|
|
282
282
|
castor_extractor/warehouse/databricks/credentials.py,sha256=PpGv5_GP320UQjV_gvaxSpOw58AmqSznmjGhGfe6bdU,655
|
|
283
|
-
castor_extractor/warehouse/databricks/extract.py,sha256
|
|
284
|
-
castor_extractor/warehouse/databricks/format.py,sha256=
|
|
283
|
+
castor_extractor/warehouse/databricks/extract.py,sha256=VX-3uo5dZucenrg-wnPur3CxOgpC5H7Ds92TO7OTAjc,7379
|
|
284
|
+
castor_extractor/warehouse/databricks/format.py,sha256=2bRy2fa45NW3uk030rmyba4n2Em-NnyZPBurUslEbcw,5522
|
|
285
285
|
castor_extractor/warehouse/databricks/format_test.py,sha256=iPmdJof43fBYL1Sa_fBrCWDQHCHgm7IWCZag1kWkj9E,1970
|
|
286
|
-
castor_extractor/warehouse/databricks/
|
|
286
|
+
castor_extractor/warehouse/databricks/test_constants.py,sha256=Hm96yq_ltVAKv7WYhYz637r4Cuj-1cCdyOuxMEe3J-Q,2246
|
|
287
|
+
castor_extractor/warehouse/databricks/types.py,sha256=hD6gC8oiT3QSWEvbtgUOGK_lLzzz36sEauB3lS_wxlE,218
|
|
287
288
|
castor_extractor/warehouse/mysql/__init__.py,sha256=2KFDogo9GNbApHqw3Vm5t_uNmIRjdp76nmP_WQQMfQY,116
|
|
288
289
|
castor_extractor/warehouse/mysql/client.py,sha256=IwoJvbmE5VZkMCP9yHf6ta3_AQPEuBPrZZ3meefbcJs,974
|
|
289
290
|
castor_extractor/warehouse/mysql/client_test.py,sha256=wRTv-3c5chy_HKj-buasNiYOOCIfynYqbabM4Hxdh5E,1052
|
|
@@ -334,11 +335,12 @@ castor_extractor/warehouse/snowflake/client.py,sha256=XT0QLVNff_586SDuMe40iu8FCw
|
|
|
334
335
|
castor_extractor/warehouse/snowflake/client_test.py,sha256=ihWtOOAQfh8pu5JTr_EWfqefKOVIaJXznACURzaU1Qs,1432
|
|
335
336
|
castor_extractor/warehouse/snowflake/credentials.py,sha256=wbUdbx9jVSHzg2kNDhMFuDstbVTyZOcGAwnSeGeFIqs,875
|
|
336
337
|
castor_extractor/warehouse/snowflake/credentials_test.py,sha256=Lkc-DHXOvr50KrqAW4nt_x0IA0Mu_CsBVu6ATnzQB6I,673
|
|
337
|
-
castor_extractor/warehouse/snowflake/extract.py,sha256=
|
|
338
|
+
castor_extractor/warehouse/snowflake/extract.py,sha256=fcze0VBe9OOAFSr25T9L6CY506Vm_xDEvvy8NWuLW1s,2956
|
|
338
339
|
castor_extractor/warehouse/snowflake/queries/.sqlfluff,sha256=vttrwcr64JVIuvc7WIg9C54cbOkjg_VjXNR7YnTGOPE,31
|
|
339
340
|
castor_extractor/warehouse/snowflake/queries/column.sql,sha256=pAW2UNnut0a483OY2rjOXCdCtQg0g254g61Bt51CIB4,1803
|
|
340
341
|
castor_extractor/warehouse/snowflake/queries/column_lineage.sql,sha256=YKBiZ6zySSNcXLDXwm31EjGIIkkkZc0-S6hI1SRM80o,1179
|
|
341
342
|
castor_extractor/warehouse/snowflake/queries/database.sql,sha256=ifZXoKUXtsrGOxml6AcNhA4yybIyatH5va7bcp-lgCU,483
|
|
343
|
+
castor_extractor/warehouse/snowflake/queries/function.sql,sha256=8LRh0ybhd-RldJ8UZspWUm3yv52evq11O2uqIO4KqeQ,372
|
|
342
344
|
castor_extractor/warehouse/snowflake/queries/grant_to_role.sql,sha256=O7AJ1LzoXGDFmiVvQ8EMJ5x8FSAnaxRPdmRyAlEmkUM,272
|
|
343
345
|
castor_extractor/warehouse/snowflake/queries/grant_to_user.sql,sha256=7AalVajU5vRRpIiys1igSwmDXirbwpMTvJr2ihSz2NE,143
|
|
344
346
|
castor_extractor/warehouse/snowflake/queries/query.sql,sha256=-OYcWUvdPBkpOfezkZaW7hrOdDz3JyoqjNdRm_88Rsk,1779
|
|
@@ -368,8 +370,8 @@ castor_extractor/warehouse/synapse/queries/schema.sql,sha256=aX9xNrBD_ydwl-znGSF
|
|
|
368
370
|
castor_extractor/warehouse/synapse/queries/table.sql,sha256=mCE8bR1Vb7j7SwZW2gafcXidQ2fo1HwxcybA8wP2Kfs,1049
|
|
369
371
|
castor_extractor/warehouse/synapse/queries/user.sql,sha256=sTb_SS7Zj3AXW1SggKPLNMCd0qoTpL7XI_BJRMaEpBg,67
|
|
370
372
|
castor_extractor/warehouse/synapse/queries/view_ddl.sql,sha256=3EVbp5_yTgdByHFIPLHmnoOnqqLE77SrjAwFDvu4e54,249
|
|
371
|
-
castor_extractor-0.16.
|
|
372
|
-
castor_extractor-0.16.
|
|
373
|
-
castor_extractor-0.16.
|
|
374
|
-
castor_extractor-0.16.
|
|
375
|
-
castor_extractor-0.16.
|
|
373
|
+
castor_extractor-0.16.15.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
374
|
+
castor_extractor-0.16.15.dist-info/METADATA,sha256=CsdtS6LQFjsgi0A7tj0sMwtkQVYBye4Savn2DFGBHso,6583
|
|
375
|
+
castor_extractor-0.16.15.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
376
|
+
castor_extractor-0.16.15.dist-info/entry_points.txt,sha256=SbyPk58Gh-FRztfCNnUZQ6w7SatzNJFZ6GIJLNsy7tI,1427
|
|
377
|
+
castor_extractor-0.16.15.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|