castor-extractor 0.16.9__py3-none-any.whl → 0.16.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

Files changed (24) hide show
  1. CHANGELOG.md +24 -0
  2. castor_extractor/utils/client/api.py +8 -3
  3. castor_extractor/utils/retry.py +3 -1
  4. castor_extractor/visualization/tableau_revamp/client/client.py +8 -2
  5. castor_extractor/visualization/tableau_revamp/client/gql_queries.py +15 -2
  6. castor_extractor/warehouse/abstract/__init__.py +2 -0
  7. castor_extractor/warehouse/abstract/asset.py +13 -0
  8. castor_extractor/warehouse/databricks/client.py +239 -3
  9. castor_extractor/warehouse/databricks/client_test.py +61 -1
  10. castor_extractor/warehouse/databricks/extract.py +36 -0
  11. castor_extractor/warehouse/databricks/format.py +13 -0
  12. castor_extractor/warehouse/databricks/test_constants.py +79 -0
  13. castor_extractor/warehouse/databricks/types.py +6 -1
  14. castor_extractor/warehouse/salesforce/client.py +8 -6
  15. castor_extractor/warehouse/salesforce/extract.py +2 -2
  16. castor_extractor/warehouse/salesforce/format.py +34 -7
  17. castor_extractor/warehouse/salesforce/format_test.py +49 -1
  18. castor_extractor/warehouse/snowflake/extract.py +2 -0
  19. castor_extractor/warehouse/snowflake/queries/function.sql +10 -0
  20. {castor_extractor-0.16.9.dist-info → castor_extractor-0.16.15.dist-info}/METADATA +1 -1
  21. {castor_extractor-0.16.9.dist-info → castor_extractor-0.16.15.dist-info}/RECORD +24 -22
  22. {castor_extractor-0.16.9.dist-info → castor_extractor-0.16.15.dist-info}/LICENCE +0 -0
  23. {castor_extractor-0.16.9.dist-info → castor_extractor-0.16.15.dist-info}/WHEEL +0 -0
  24. {castor_extractor-0.16.9.dist-info → castor_extractor-0.16.15.dist-info}/entry_points.txt +0 -0
CHANGELOG.md CHANGED
@@ -1,5 +1,29 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.16.15 - 2024-06-07
4
+
5
+ * Tableau: extract database_name for CustomSQLTables
6
+
7
+ ## 0.16.14 - 2024-06-06
8
+
9
+ * Snowflake: Extract SQL user defined function
10
+
11
+ ## 0.16.13 - 2024-06-05
12
+
13
+ * Tableau: extract database_name for tables
14
+
15
+ ## 0.16.12 - 2024-06-04
16
+
17
+ * Databricks: Extract lineage
18
+
19
+ ## 0.16.11 - 2024-06-03
20
+
21
+ * Tableau: add extra fields to optimise storage
22
+
23
+ ## 0.16.10 - 2024-05-30
24
+
25
+ * Salesforce: extract sobjects Label as table name
26
+
3
27
  ## 0.16.9 - 2024-05-28
4
28
 
5
29
  * Tableau: extract only fields that are necessary
@@ -5,7 +5,7 @@ import requests
5
5
 
6
6
  logger = logging.getLogger(__name__)
7
7
 
8
- DEFAULT_TIMEOUT_MS = 30_000
8
+ DEFAULT_TIMEOUT_S = 30
9
9
 
10
10
  # https://requests.readthedocs.io/en/latest/api/#requests.request
11
11
  HttpMethod = Literal["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"]
@@ -20,7 +20,7 @@ class APIClient:
20
20
  def __init__(self, host: str, token: Optional[str] = None):
21
21
  self._host = host
22
22
  self._token = token or ""
23
- self._timeout = DEFAULT_TIMEOUT_MS
23
+ self._timeout = DEFAULT_TIMEOUT_S
24
24
 
25
25
  @staticmethod
26
26
  def build_url(host: str, path: str):
@@ -44,7 +44,12 @@ class APIClient:
44
44
  ) -> Any:
45
45
  logger.debug(f"Calling {method} on {url}")
46
46
  result = requests.request(
47
- method, url, headers=self._headers(), params=params, json=data
47
+ method,
48
+ url,
49
+ headers=self._headers(),
50
+ params=params,
51
+ json=data,
52
+ timeout=self._timeout,
48
53
  )
49
54
  result.raise_for_status()
50
55
 
@@ -68,7 +68,8 @@ class Retry(BaseModel):
68
68
  self._retry_attempts += 1
69
69
  wait_ms = self.base() + self.jitter()
70
70
  wait_s = float(wait_ms) / MS_IN_SEC
71
- logger.warning(f"Attempting a new call in {wait_s} seconds")
71
+ msg = f"Attempting a new call in {wait_s} seconds, {self._retry_attempts} attempt(s) / {self.max_retries} max retries"
72
+ logger.warning(msg)
72
73
  time.sleep(wait_s)
73
74
  return True
74
75
 
@@ -93,6 +94,7 @@ def retry(
93
94
  try:
94
95
  return None, callable(*args, **kwargs)
95
96
  except exceptions_ as err:
97
+ logger.warning(f"Exception within {callable.__name__}")
96
98
  return err, None
97
99
 
98
100
  def _func(*args, **kwargs) -> Any:
@@ -28,10 +28,16 @@ _TSC_ASSETS = (
28
28
  TableauRevampAsset.USAGE,
29
29
  )
30
30
 
31
- # speed up extraction: fields and columns are smaller but volumes are bigger
31
+ # increase the value when extraction is too slow
32
+ # decrease the value when timeouts arise
32
33
  _CUSTOM_PAGE_SIZE: Dict[TableauRevampAsset, int] = {
34
+ # for some clients, extraction of columns tend to hit the node limit
35
+ # https://community.tableau.com/s/question/0D54T00000YuK60SAF/metadata-query-nodelimitexceeded-error
36
+ # the workaround is to reduce pagination
37
+ TableauRevampAsset.COLUMN: 50,
38
+ # fields are light but volumes are bigger
33
39
  TableauRevampAsset.FIELD: 1000,
34
- TableauRevampAsset.COLUMN: 1000,
40
+ TableauRevampAsset.TABLE: 50,
35
41
  }
36
42
 
37
43
 
@@ -18,7 +18,11 @@ QUERY_TEMPLATE = """
18
18
 
19
19
  _COLUMNS_QUERY = """
20
20
  downstreamDashboards { id }
21
- downstreamFields { id }
21
+ downstreamFields {
22
+ id
23
+ __typename
24
+ datasource { id }
25
+ }
22
26
  downstreamWorkbooks { id }
23
27
  id
24
28
  name
@@ -59,12 +63,21 @@ downstreamWorkbooks { id }
59
63
  id
60
64
  name
61
65
  ... on DatabaseTable {
62
- connectionType
63
66
  fullName
64
67
  schema
68
+ database {
69
+ connectionType
70
+ id
71
+ name
72
+ }
65
73
  }
66
74
  ... on CustomSQLTable {
67
75
  query
76
+ database {
77
+ connectionType
78
+ id
79
+ name
80
+ }
68
81
  }
69
82
  """
70
83
 
@@ -1,6 +1,8 @@
1
1
  from .asset import (
2
+ ADDITIONAL_LINEAGE_ASSETS,
2
3
  CATALOG_ASSETS,
3
4
  EXTERNAL_LINEAGE_ASSETS,
5
+ FUNCTIONS_ASSETS,
4
6
  QUERIES_ASSETS,
5
7
  VIEWS_ASSETS,
6
8
  SupportedAssets,
@@ -7,6 +7,8 @@ from ...types import ExternalAsset, classproperty
7
7
  class WarehouseAsset(ExternalAsset):
8
8
  """Assets that can be extracted from warehouses"""
9
9
 
10
+ ADDITIONAL_COLUMN_LINEAGE = "additional_column_lineage"
11
+ ADDITIONAL_TABLE_LINEAGE = "additional_table_lineage"
10
12
  COLUMN = "column"
11
13
  COLUMN_LINEAGE = "column_lineage" # specific to snowflake
12
14
  DATABASE = "database"
@@ -19,12 +21,15 @@ class WarehouseAsset(ExternalAsset):
19
21
  ROLE = "role"
20
22
  SCHEMA = "schema"
21
23
  TABLE = "table"
24
+ FUNCTION = "function"
22
25
  USER = "user"
23
26
  VIEW_DDL = "view_ddl"
24
27
 
25
28
  @classproperty
26
29
  def optional(cls) -> Set["WarehouseAsset"]:
27
30
  return {
31
+ WarehouseAsset.ADDITIONAL_COLUMN_LINEAGE,
32
+ WarehouseAsset.ADDITIONAL_TABLE_LINEAGE,
28
33
  WarehouseAsset.EXTERNAL_COLUMN_LINEAGE,
29
34
  WarehouseAsset.EXTERNAL_TABLE_LINEAGE,
30
35
  }
@@ -33,8 +38,10 @@ class WarehouseAsset(ExternalAsset):
33
38
  class WarehouseAssetGroup(Enum):
34
39
  """Groups of assets that can be extracted together"""
35
40
 
41
+ ADDITIONAL_LINEAGE = "additional_lineage"
36
42
  CATALOG = "catalog"
37
43
  EXTERNAL_LINEAGE = "external_lineage"
44
+ FUNCTION = "function"
38
45
  QUERY = "query"
39
46
  ROLE = "role"
40
47
  SNOWFLAKE_LINEAGE = "snowflake_lineage"
@@ -53,6 +60,7 @@ CATALOG_ASSETS = (
53
60
  )
54
61
 
55
62
  # shared by technologies supporting queries
63
+ FUNCTIONS_ASSETS = (WarehouseAsset.FUNCTION,)
56
64
  QUERIES_ASSETS = (WarehouseAsset.QUERY,)
57
65
  VIEWS_ASSETS = (WarehouseAsset.VIEW_DDL,)
58
66
 
@@ -61,6 +69,11 @@ EXTERNAL_LINEAGE_ASSETS = (
61
69
  WarehouseAsset.EXTERNAL_TABLE_LINEAGE,
62
70
  )
63
71
 
72
+ ADDITIONAL_LINEAGE_ASSETS = (
73
+ WarehouseAsset.ADDITIONAL_COLUMN_LINEAGE,
74
+ WarehouseAsset.ADDITIONAL_TABLE_LINEAGE,
75
+ )
76
+
64
77
  NON_EXTRACTABLE_ASSETS = {WarehouseAssetGroup.EXTERNAL_LINEAGE}
65
78
 
66
79
 
@@ -1,18 +1,38 @@
1
1
  import logging
2
+ from concurrent.futures import ThreadPoolExecutor
2
3
  from datetime import date
3
4
  from functools import partial
4
- from typing import Any, Dict, List, Optional, Set
5
+ from typing import Any, Dict, List, Optional, Set, Tuple, cast
5
6
 
6
- from ...utils import at_midnight, date_after, mapping_from_rows
7
+ import requests
8
+
9
+ from ...utils import (
10
+ SafeMode,
11
+ at_midnight,
12
+ date_after,
13
+ mapping_from_rows,
14
+ retry,
15
+ safe_mode,
16
+ )
7
17
  from ...utils.client.api import APIClient
8
18
  from ...utils.pager import PagerOnToken
9
19
  from ..abstract.time_filter import TimeFilter
10
20
  from .credentials import DatabricksCredentials
11
21
  from .format import DatabricksFormatter
12
- from .types import TablesColumns
22
+ from .types import Link, Ostr, OTimestampedLink, TablesColumns, TimestampedLink
13
23
 
14
24
  logger = logging.getLogger(__name__)
15
25
 
26
+ _MAX_NUMBER_OF_LINEAGE_ERRORS = 1000
27
+ _MAX_THREADS = 10
28
+ _RETRY_ATTEMPTS = 3
29
+ _RETRY_BASE_MS = 1000
30
+ _RETRY_EXCEPTIONS = [
31
+ requests.exceptions.ConnectTimeout,
32
+ ]
33
+
34
+ safe_params = SafeMode((BaseException,), _MAX_NUMBER_OF_LINEAGE_ERRORS)
35
+
16
36
 
17
37
  def _day_to_epoch_ms(day: date) -> int:
18
38
  return int(at_midnight(day).timestamp() * 1000)
@@ -22,6 +42,30 @@ def _day_hour_to_epoch_ms(day: date, hour: int) -> int:
22
42
  return int(at_midnight(day).timestamp() * 1000) + (hour * 3600 * 1000)
23
43
 
24
44
 
45
+ class LineageLinks:
46
+ """
47
+ helper class that handles lineage deduplication and filtering
48
+ """
49
+
50
+ def __init__(self):
51
+ self.lineage: Dict[Link, Ostr] = dict()
52
+
53
+ def add(self, timestamped_link: TimestampedLink) -> None:
54
+ """
55
+ keep the most recent lineage link, adding to `self.lineage`
56
+ """
57
+ parent, child, timestamp = timestamped_link
58
+ link = (parent, child)
59
+ if not self.lineage.get(link):
60
+ self.lineage[link] = timestamp
61
+ else:
62
+ if not timestamp:
63
+ return
64
+ # keep most recent link; cast for mypy
65
+ recent = max(cast(str, self.lineage[link]), cast(str, timestamp))
66
+ self.lineage[link] = recent
67
+
68
+
25
69
  class DatabricksClient(APIClient):
26
70
  """Databricks Client"""
27
71
 
@@ -123,6 +167,198 @@ class DatabricksClient(APIClient):
123
167
  columns.extend(c_to_add)
124
168
  return tables, columns
125
169
 
170
+ @staticmethod
171
+ def _to_table_path(table: dict) -> Ostr:
172
+ if table.get("name"):
173
+ return f"{table['catalog_name']}.{table['schema_name']}.{table['name']}"
174
+ return None
175
+
176
+ @staticmethod
177
+ def _to_column_path(column: dict) -> Ostr:
178
+ if column.get("name"):
179
+ return f"{column['catalog_name']}.{column['schema_name']}.{column['table_name']}.{column['name']}"
180
+ return None
181
+
182
+ def _link(
183
+ self, path_from: Ostr, path_to: Ostr, timestamp: Ostr
184
+ ) -> OTimestampedLink:
185
+ """exclude missing path and self-lineage"""
186
+ if (not path_from) or (not path_to):
187
+ return None
188
+ is_self_lineage = path_from.lower() == path_to.lower()
189
+ if is_self_lineage:
190
+ return None
191
+ return (path_from, path_to, timestamp)
192
+
193
+ def _single_table_lineage_links(
194
+ self, table_path: str, single_table_lineage: dict
195
+ ) -> List[TimestampedLink]:
196
+ """
197
+ process databricks lineage API response for a given table
198
+ returns a list of (parent, child, timestamp)
199
+
200
+ Note: in `upstreams` or `downstreams` we only care about `tableInfo`,
201
+ we could also have `notebookInfos` or `fileInfo`
202
+ """
203
+ links: List[OTimestampedLink] = []
204
+ # add parent:
205
+ for link in single_table_lineage.get("upstreams", []):
206
+ parent = link.get("tableInfo", {})
207
+ parent_path = self._to_table_path(parent)
208
+ timestamp: Ostr = parent.get("lineage_timestamp")
209
+ links.append(self._link(parent_path, table_path, timestamp))
210
+
211
+ # add children:
212
+ for link in single_table_lineage.get("downstreams", []):
213
+ child = link.get("tableInfo", {})
214
+ child_path = self._to_table_path(child)
215
+ timestamp = child.get("lineage_timestamp")
216
+ links.append(self._link(table_path, child_path, timestamp))
217
+
218
+ return list(filter(None, links))
219
+
220
+ @safe_mode(safe_params, lambda: [])
221
+ @retry(
222
+ exceptions=_RETRY_EXCEPTIONS,
223
+ max_retries=_RETRY_ATTEMPTS,
224
+ base_ms=_RETRY_BASE_MS,
225
+ )
226
+ def get_single_table_lineage(
227
+ self, table_path: str
228
+ ) -> List[TimestampedLink]:
229
+ """
230
+ Helper function used in get_lineage_links.
231
+ Call data lineage API and return the content of the result
232
+ eg table_path: broward_prd.bronze.account_adjustments
233
+ FYI: Maximum rate of 50 requests per SECOND
234
+ """
235
+ path = "api/2.0/lineage-tracking/table-lineage"
236
+ payload = {"table_name": table_path, "include_entity_lineage": True}
237
+ content = self.get(path=path, payload=payload)
238
+ return self._single_table_lineage_links(table_path, content)
239
+
240
+ def _deduplicate_lineage(self, lineages: List[TimestampedLink]) -> dict:
241
+ deduplicated_lineage = LineageLinks()
242
+ for timestamped_link in lineages:
243
+ deduplicated_lineage.add(timestamped_link)
244
+ return deduplicated_lineage.lineage
245
+
246
+ def table_lineage(self, tables: List[dict]) -> List[dict]:
247
+ """
248
+ Wrapper function that retrieves all table lineage
249
+ """
250
+ # retrieve table lineage
251
+ with ThreadPoolExecutor(max_workers=_MAX_THREADS) as executor:
252
+ table_paths = [
253
+ ".".join([table["schema_id"], table["table_name"]])
254
+ for table in tables
255
+ ]
256
+ results = executor.map(self.get_single_table_lineage, table_paths)
257
+ lineages = [link for links in results for link in links]
258
+ deduplicated = self._deduplicate_lineage(lineages)
259
+ return self.formatter.format_lineage(deduplicated)
260
+
261
+ @staticmethod
262
+ def _paths_for_column_lineage(
263
+ tables: List[dict], columns: List[dict], table_lineage: List[dict]
264
+ ) -> List[Tuple[str, str]]:
265
+ """
266
+ helper providing a list of candidate columns to look lineage for:
267
+ we only look for column lineage where there is table lineage
268
+ """
269
+ # mapping between table id and its path db.schema.table
270
+ # table["schema_id"] follows the pattern `db.schema`
271
+ mapping = {
272
+ table["id"]: ".".join([table["schema_id"], table["table_name"]])
273
+ for table in tables
274
+ }
275
+
276
+ tables_with_lineage: Set[str] = set()
277
+ for t in table_lineage:
278
+ tables_with_lineage.add(t["parent_path"])
279
+ tables_with_lineage.add(t["child_path"])
280
+
281
+ paths_to_return: List[Tuple[str, str]] = []
282
+ for column in columns:
283
+ table_path = mapping[column["table_id"]]
284
+ if table_path not in tables_with_lineage:
285
+ continue
286
+ column_ = (table_path, column["column_name"])
287
+ paths_to_return.append(column_)
288
+
289
+ return paths_to_return
290
+
291
+ def _single_column_lineage_links(
292
+ self, column_path: str, single_column_lineage: dict
293
+ ) -> List[TimestampedLink]:
294
+ """
295
+ process databricks lineage API response for a given table
296
+ returns a list of (parent, child, timestamp)
297
+
298
+ Note: in `upstreams` or `downstreams` we only care about `tableInfo`,
299
+ we could also have `notebookInfos` or `fileInfo`
300
+ """
301
+ links: List[OTimestampedLink] = []
302
+ # add parent:
303
+ for link in single_column_lineage.get("upstream_cols", []):
304
+ parent_path = self._to_column_path(link)
305
+ timestamp: Ostr = link.get("lineage_timestamp")
306
+ links.append(self._link(parent_path, column_path, timestamp))
307
+
308
+ # add children:
309
+ for link in single_column_lineage.get("downstream_cols", []):
310
+ child_path = self._to_column_path(link)
311
+ timestamp = link.get("lineage_timestamp")
312
+ links.append(self._link(column_path, child_path, timestamp))
313
+
314
+ return list(filter(None, links))
315
+
316
+ @safe_mode(safe_params, lambda: [])
317
+ @retry(
318
+ exceptions=_RETRY_EXCEPTIONS,
319
+ max_retries=_RETRY_ATTEMPTS,
320
+ base_ms=_RETRY_BASE_MS,
321
+ )
322
+ def get_single_column_lineage(
323
+ self,
324
+ names: Tuple[str, str],
325
+ ) -> List[TimestampedLink]:
326
+ """
327
+ Helper function used in get_lineage_links.
328
+ Call data lineage API and return the content of the result
329
+
330
+ eg table_path: broward_prd.bronze.account_adjustments
331
+ FYI: Maximum rate of 10 requests per SECOND
332
+ """
333
+ table_path, column_name = names
334
+ api_path = "api/2.0/lineage-tracking/column-lineage"
335
+ payload = {
336
+ "table_name": table_path,
337
+ "column_name": column_name,
338
+ "include_entity_lineage": True,
339
+ }
340
+ content = self.get(path=api_path, payload=payload)
341
+ column_path = f"{table_path}.{column_name}"
342
+ return self._single_column_lineage_links(column_path, content)
343
+
344
+ def column_lineage(
345
+ self, tables: List[dict], columns: List[dict], table_lineage: List[dict]
346
+ ) -> List[dict]:
347
+ """
348
+ Wrapper function that retrieves all column lineage
349
+ we only try to retrieve column lineage if we found table lineage
350
+ """
351
+ candidate_paths = self._paths_for_column_lineage(
352
+ tables, columns, table_lineage
353
+ )
354
+ lineages: List[TimestampedLink] = [
355
+ link
356
+ for paths in candidate_paths
357
+ for link in self.get_single_column_lineage(paths)
358
+ ]
359
+ deduplicated = self._deduplicate_lineage(lineages)
360
+ return self.formatter.format_lineage(deduplicated)
361
+
126
362
  @staticmethod
127
363
  def _time_filter(time_filter: Optional[TimeFilter]) -> dict:
128
364
  """time filter to retrieve Databricks' queries"""
@@ -1,9 +1,16 @@
1
1
  from datetime import date
2
+ from unittest.mock import Mock, patch
2
3
 
3
4
  from freezegun import freeze_time
4
5
 
5
6
  from ..abstract.time_filter import TimeFilter
6
- from .client import DatabricksClient, _day_hour_to_epoch_ms
7
+ from .client import DatabricksClient, LineageLinks, _day_hour_to_epoch_ms
8
+ from .test_constants import (
9
+ CLOSER_DATE,
10
+ MOCK_TABLES_FOR_TABLE_LINEAGE,
11
+ OLDER_DATE,
12
+ TABLE_LINEAGE_SIDE_EFFECT,
13
+ )
7
14
 
8
15
 
9
16
  def test__day_hour_to_epoch_ms():
@@ -97,3 +104,56 @@ def test_DatabricksClient__match_table_with_user():
97
104
  table_without_owner = {"id": 1, "owner_email": None}
98
105
  actual = client._match_table_with_user(table_without_owner, user_mapping)
99
106
  assert actual == table_without_owner
107
+
108
+
109
+ @patch(
110
+ "source.packages.extractor.castor_extractor.warehouse.databricks.client.DatabricksClient.get",
111
+ side_effect=TABLE_LINEAGE_SIDE_EFFECT,
112
+ )
113
+ def test_DatabricksClient_table_lineage(mock_get):
114
+ client = DatabricksClient(Mock())
115
+
116
+ lineage = client.table_lineage(MOCK_TABLES_FOR_TABLE_LINEAGE)
117
+ assert len(lineage) == 2
118
+
119
+ expected_link_1 = {
120
+ "parent_path": "dev.silver.pre_analytics",
121
+ "child_path": "dev.silver.analytics",
122
+ "timestamp": OLDER_DATE,
123
+ }
124
+ expected_link_2 = {
125
+ "parent_path": "dev.bronze.analytics",
126
+ "child_path": "dev.silver.analytics",
127
+ "timestamp": CLOSER_DATE,
128
+ }
129
+ assert expected_link_1 in lineage
130
+ assert expected_link_2 in lineage
131
+
132
+
133
+ def test_LineageLinks_add():
134
+ links = LineageLinks()
135
+ timestamped_link = ("parent", "child", None)
136
+ expected_key = ("parent", "child")
137
+
138
+ links.add(timestamped_link)
139
+
140
+ assert expected_key in links.lineage
141
+ assert links.lineage[expected_key] is None
142
+
143
+ # we replace None by an actual timestamp
144
+ timestamped_link = ("parent", "child", OLDER_DATE)
145
+ links.add(timestamped_link)
146
+ assert expected_key in links.lineage
147
+ assert links.lineage[expected_key] == OLDER_DATE
148
+
149
+ # we update with the more recent timestamp
150
+ timestamped_link = ("parent", "child", CLOSER_DATE)
151
+ links.add(timestamped_link)
152
+ assert expected_key in links.lineage
153
+ assert links.lineage[expected_key] == CLOSER_DATE
154
+
155
+ # we keep the more recent timestamp
156
+ timestamped_link = ("parent", "child", OLDER_DATE)
157
+ links.add(timestamped_link)
158
+ assert expected_key in links.lineage
159
+ assert links.lineage[expected_key] == CLOSER_DATE
@@ -3,6 +3,7 @@ from typing import Dict, Optional
3
3
 
4
4
  from ...utils import AbstractStorage, LocalStorage, write_summary
5
5
  from ..abstract import (
6
+ ADDITIONAL_LINEAGE_ASSETS,
6
7
  CATALOG_ASSETS,
7
8
  EXTERNAL_LINEAGE_ASSETS,
8
9
  QUERIES_ASSETS,
@@ -17,6 +18,7 @@ from .client import DatabricksClient
17
18
  from .credentials import to_credentials
18
19
 
19
20
  DATABRICKS_ASSETS: SupportedAssets = {
21
+ WarehouseAssetGroup.ADDITIONAL_LINEAGE: ADDITIONAL_LINEAGE_ASSETS,
20
22
  WarehouseAssetGroup.CATALOG: CATALOG_ASSETS,
21
23
  WarehouseAssetGroup.QUERY: QUERIES_ASSETS,
22
24
  WarehouseAssetGroup.ROLE: (WarehouseAsset.USER,),
@@ -94,6 +96,39 @@ class DatabricksExtractionProcessor:
94
96
  logger.info(f"Extracted {len(columns)} columns to {location}")
95
97
  return catalog_locations
96
98
 
99
+ def extract_lineage(self) -> Paths:
100
+ if self._should_not_reextract(WarehouseAssetGroup.ADDITIONAL_LINEAGE):
101
+ return self._existing_group_paths(
102
+ WarehouseAssetGroup.ADDITIONAL_LINEAGE
103
+ )
104
+ lineage_locations: Dict[str, str] = dict()
105
+
106
+ # extract catalog
107
+ databases = self._client.databases()
108
+ schemas = self._client.schemas(databases)
109
+ users = self._client.users()
110
+ tables, columns = self._client.tables_and_columns(schemas, users)
111
+ logger.info("Extracted pre-requisite catalog. Next comes lineage")
112
+
113
+ # extract table lineage
114
+ table_lineage = self._client.table_lineage(tables)
115
+ table_lineage_key = WarehouseAsset.ADDITIONAL_TABLE_LINEAGE.value
116
+ location = self._storage.put(table_lineage_key, table_lineage)
117
+ lineage_locations[table_lineage_key] = location
118
+ msg = f"Extracted {len(table_lineage)} table lineage to {location}"
119
+ logger.info(msg)
120
+
121
+ # extract column lineage
122
+ column_lineage = self._client.column_lineage(
123
+ tables, columns, table_lineage
124
+ )
125
+ column_lineage_key = WarehouseAsset.ADDITIONAL_COLUMN_LINEAGE.value
126
+ location = self._storage.put(column_lineage_key, column_lineage)
127
+ lineage_locations[column_lineage_key] = location
128
+ msg = f"Extracted {len(column_lineage)} column lineage to {location}"
129
+ logger.info(msg)
130
+ return lineage_locations
131
+
97
132
  def extract_query(self, time_filter: OTimeFilter = None) -> Paths:
98
133
  """extract yesterday's queries and return their location"""
99
134
  if self._should_not_reextract(WarehouseAssetGroup.QUERY):
@@ -149,6 +184,7 @@ def extract_all(**kwargs) -> None:
149
184
  )
150
185
 
151
186
  extractor.extract_catalog()
187
+ extractor.extract_lineage()
152
188
  extractor.extract_query()
153
189
  extractor.extract_role()
154
190
  extractor.extract_view_ddl()
@@ -95,6 +95,19 @@ class DatabricksFormatter:
95
95
 
96
96
  return tables, columns
97
97
 
98
+ @staticmethod
99
+ def format_lineage(timestamps: dict) -> List[dict]:
100
+ lineage: List[dict] = []
101
+ for link, timestamp in timestamps.items():
102
+ parent_path, child_path = link
103
+ link_ = {
104
+ "parent_path": parent_path,
105
+ "child_path": child_path,
106
+ "timestamp": timestamp,
107
+ }
108
+ lineage.append(link_)
109
+ return lineage
110
+
98
111
  @staticmethod
99
112
  def format_query(raw_queries: List[dict]) -> List[dict]:
100
113
  queries = []
@@ -0,0 +1,79 @@
1
+ OLDER_DATE = "2024-04-18 20:20:20.0"
2
+ CLOSER_DATE = "2024-04-19 20:20:20.0"
3
+
4
+ MOCK_TABLES_FOR_TABLE_LINEAGE = [
5
+ {
6
+ "id": "f51ba2ca-8cc3-4de6-8f8b-730359e8f40f",
7
+ "schema_id": "dev.silver",
8
+ "table_name": "analytics",
9
+ },
10
+ {
11
+ "id": "4e140bdc-a67c-4b68-8a07-c684657d8b44",
12
+ "schema_id": "dev.silver",
13
+ "table_name": "pre_analytics",
14
+ },
15
+ {
16
+ "id": "7d403198-55ea-4a40-9995-6ee2f4c79dfa",
17
+ "schema_id": "dev.bronze",
18
+ "table_name": "analytics",
19
+ },
20
+ ]
21
+
22
+ _RAW_LINEAGE_DEV_SILVER_ANALYTICS = {
23
+ "upstreams": [
24
+ { # there could be other keys: jobInfos, notebookInfos, queryInfos
25
+ "tableInfo": {
26
+ "name": "pre_analytics",
27
+ "catalog_name": "dev",
28
+ "schema_name": "silver",
29
+ "table_type": "PERSISTED_VIEW", # not used
30
+ "lineage_timestamp": OLDER_DATE,
31
+ }
32
+ },
33
+ {
34
+ "tableInfo": {
35
+ "name": "analytics",
36
+ "catalog_name": "dev",
37
+ "schema_name": "bronze",
38
+ "table_type": "PERSISTED_VIEW", # not used
39
+ "lineage_timestamp": CLOSER_DATE,
40
+ }
41
+ },
42
+ ],
43
+ "downstreams": [],
44
+ }
45
+ _RAW_LINEAGE_DEV_SILVER_PRE_ANALYTICS = {
46
+ "upstreams": [],
47
+ "downstreams": [
48
+ {
49
+ "tableInfo": {
50
+ "name": "analytics",
51
+ "catalog_name": "dev",
52
+ "schema_name": "silver",
53
+ "table_type": "PERSISTED_VIEW", # not used
54
+ "lineage_timestamp": OLDER_DATE,
55
+ }
56
+ },
57
+ ],
58
+ }
59
+ _RAW_LINEAGE_DEV_BRONZE_ANALYTICS = {
60
+ "upstreams": [],
61
+ "downstreams": [
62
+ {
63
+ "tableInfo": {
64
+ "name": "analytics",
65
+ "catalog_name": "dev",
66
+ "schema_name": "silver",
67
+ "table_type": "PERSISTED_VIEW", # not used
68
+ "lineage_timestamp": OLDER_DATE,
69
+ }
70
+ },
71
+ ],
72
+ }
73
+
74
+ # should be in the same order as MOCK_TABLES_FOR_TABLE_LINEAGE
75
+ TABLE_LINEAGE_SIDE_EFFECT: tuple = (
76
+ _RAW_LINEAGE_DEV_SILVER_ANALYTICS,
77
+ _RAW_LINEAGE_DEV_SILVER_PRE_ANALYTICS,
78
+ _RAW_LINEAGE_DEV_BRONZE_ANALYTICS,
79
+ )
@@ -1,3 +1,8 @@
1
- from typing import List, Tuple
1
+ from typing import List, Optional, Tuple
2
2
 
3
+ Link = Tuple[str, str]
3
4
  TablesColumns = Tuple[List[dict], List[dict]]
5
+ Ostr = Optional[str]
6
+ TimestampedLink = Tuple[str, str, Ostr]
7
+
8
+ OTimestampedLink = Optional[TimestampedLink]
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Dict, Iterator, List
2
+ from typing import Dict, Iterator, List, Tuple
3
3
 
4
4
  from tqdm import tqdm # type: ignore
5
5
 
@@ -96,17 +96,19 @@ class SalesforceClient(SalesforceBaseClient):
96
96
  """
97
97
  sobjects = self.fetch_sobjects()
98
98
  logger.info(f"Extracted {len(sobjects)} sobjects")
99
- return self.formatter.tables(sobjects)
99
+ return list(self.formatter.tables(sobjects))
100
100
 
101
101
  def columns(
102
- self, sobject_names: List[str], show_progress: bool = True
102
+ self, sobject_names: List[Tuple[str, str]], show_progress: bool = True
103
103
  ) -> List[dict]:
104
104
  """
105
105
  Get salesforce sobject fields as columns
106
106
  show_progress: optionally deactivate the tqdm progress bar
107
107
  """
108
108
  sobject_fields: Dict[str, List[dict]] = dict()
109
- for sobject_name in tqdm(sobject_names, disable=not show_progress):
110
- fields = self.fetch_fields(sobject_name)
111
- sobject_fields[sobject_name] = fields
109
+ for api_name, table_name in tqdm(
110
+ sobject_names, disable=not show_progress
111
+ ):
112
+ fields = self.fetch_fields(api_name)
113
+ sobject_fields[table_name] = fields
112
114
  return self.formatter.columns(sobject_fields)
@@ -72,8 +72,8 @@ class SalesforceExtractionProcessor:
72
72
  catalog_locations[WarehouseAsset.TABLE.value] = location
73
73
  logger.info(f"Extracted {len(tables)} tables to {location}")
74
74
 
75
- table_names = [t["table_name"] for t in tables]
76
- columns = self._client.columns(table_names, show_progress)
75
+ sobject_names = [(t["api_name"], t["table_name"]) for t in tables]
76
+ columns = self._client.columns(sobject_names, show_progress)
77
77
  location = self._storage.put(WarehouseAsset.COLUMN.value, columns)
78
78
  catalog_locations[WarehouseAsset.COLUMN.value] = location
79
79
  logger.info(f"Extracted {len(columns)} columns to {location}")
@@ -1,4 +1,4 @@
1
- from typing import Any, Dict, List
1
+ from typing import Any, Dict, Iterator, List
2
2
 
3
3
  from .constants import SCHEMA_NAME
4
4
 
@@ -35,17 +35,35 @@ def _to_column_payload(field: dict, position: int, table_name: str) -> dict:
35
35
  }
36
36
 
37
37
 
38
- def _to_table_payload(table: dict) -> dict:
38
+ def _to_table_payload(sobject: dict, table_name: str) -> dict:
39
39
  return {
40
- "id": table["QualifiedApiName"],
40
+ "id": table_name,
41
+ "api_name": sobject["QualifiedApiName"],
42
+ "label": sobject["Label"],
41
43
  "schema_id": SCHEMA_NAME,
42
- "table_name": table["QualifiedApiName"],
44
+ "table_name": table_name,
43
45
  "description": "",
44
46
  "tags": [],
45
47
  "type": "TABLE",
46
48
  }
47
49
 
48
50
 
51
+ def _merge_label_and_api_name(sobject: dict) -> dict:
52
+ label = sobject["Label"]
53
+ api_name = sobject["QualifiedApiName"]
54
+ table_name = f"{label} ({api_name})"
55
+ return _to_table_payload(sobject, table_name)
56
+
57
+
58
+ def _by_label(sobjects: List[dict]) -> Dict[str, List[dict]]:
59
+ by_label: Dict[str, List[dict]] = dict()
60
+ for sobject in sobjects:
61
+ label = sobject["Label"]
62
+ similar_sobjects = by_label.setdefault(label, [])
63
+ similar_sobjects.append(sobject)
64
+ return by_label
65
+
66
+
49
67
  class SalesforceFormatter:
50
68
  """
51
69
  Helper functions that format the response in the format to be exported as
@@ -53,9 +71,18 @@ class SalesforceFormatter:
53
71
  """
54
72
 
55
73
  @staticmethod
56
- def tables(sobjects: List[dict]) -> List[dict]:
57
- """formats the raw list of sobjects to tables"""
58
- return [_to_table_payload(s) for s in sobjects]
74
+ def tables(sobjects: List[dict]) -> Iterator[dict]:
75
+ """
76
+ formats the raw list of sobjects to tables
77
+ if two tables share the same label, then we add the api name as well
78
+ """
79
+ by_label = _by_label(sobjects)
80
+ for label, similars in by_label.items():
81
+ if len(similars) > 1:
82
+ yield from [_merge_label_and_api_name(s) for s in similars]
83
+ else:
84
+ sobject = similars[0] # unique sobject on label
85
+ yield _to_table_payload(sobject, label)
59
86
 
60
87
  @staticmethod
61
88
  def columns(sobject_fields: Dict[str, List[dict]]) -> List[dict]:
@@ -1,4 +1,21 @@
1
- from .format import _field_description
1
+ from typing import Dict, Tuple
2
+
3
+ from .format import (
4
+ SCHEMA_NAME,
5
+ SalesforceFormatter,
6
+ _by_label,
7
+ _field_description,
8
+ _merge_label_and_api_name,
9
+ )
10
+
11
+
12
+ def _example_sobjects() -> Tuple[Dict[str, str], ...]:
13
+ """Returns 4 sobjects with 2 sharing the same label"""
14
+ a = {"Label": "a", "QualifiedApiName": "a_one"}
15
+ b = {"Label": "b", "QualifiedApiName": "b"}
16
+ c = {"Label": "c", "QualifiedApiName": "c"}
17
+ a_prime = {"Label": "a", "QualifiedApiName": "a_two"}
18
+ return a, b, c, a_prime
2
19
 
3
20
 
4
21
  def test__field_description():
@@ -30,3 +47,34 @@ def test__field_description():
30
47
  "- Data Sensitivity Level: bam"
31
48
  )
32
49
  assert description == expected
50
+
51
+
52
+ def test__merge_label_and_api_name():
53
+ sobject = {"Label": "foo", "QualifiedApiName": "bar"}
54
+ payload = _merge_label_and_api_name(sobject)
55
+ expected_name = "foo (bar)"
56
+ assert payload == {
57
+ "id": expected_name,
58
+ "api_name": "bar",
59
+ "label": "foo",
60
+ "schema_id": SCHEMA_NAME,
61
+ "table_name": expected_name,
62
+ "description": "",
63
+ "tags": [],
64
+ "type": "TABLE",
65
+ }
66
+
67
+
68
+ def test__by_label():
69
+ a, b, c, a_prime = _example_sobjects()
70
+ sobjects = [a, b, c, a_prime]
71
+ by_label = _by_label(sobjects)
72
+ assert by_label == {"a": [a, a_prime], "b": [b], "c": [c]}
73
+
74
+
75
+ def test_salesforce_formatter_tables():
76
+ sobjects = [*_example_sobjects()]
77
+ tables = SalesforceFormatter.tables(sobjects)
78
+ expected_names = {"a (a_one)", "a (a_two)", "b", "c"}
79
+ payload_names = {t["table_name"] for t in tables}
80
+ assert payload_names == expected_names
@@ -4,6 +4,7 @@ from ...utils import LocalStorage, from_env, write_summary
4
4
  from ..abstract import (
5
5
  CATALOG_ASSETS,
6
6
  EXTERNAL_LINEAGE_ASSETS,
7
+ FUNCTIONS_ASSETS,
7
8
  QUERIES_ASSETS,
8
9
  VIEWS_ASSETS,
9
10
  SQLExtractionProcessor,
@@ -20,6 +21,7 @@ logger = logging.getLogger(__name__)
20
21
 
21
22
  SNOWFLAKE_ASSETS: SupportedAssets = {
22
23
  WarehouseAssetGroup.CATALOG: CATALOG_ASSETS,
24
+ WarehouseAssetGroup.FUNCTION: FUNCTIONS_ASSETS,
23
25
  WarehouseAssetGroup.QUERY: QUERIES_ASSETS,
24
26
  WarehouseAssetGroup.VIEW_DDL: VIEWS_ASSETS,
25
27
  WarehouseAssetGroup.ROLE: (
@@ -0,0 +1,10 @@
1
+ SELECT
2
+ f.function_name AS name,
3
+ CONCAT(f.function_catalog, '.', f.function_schema, '.', f.function_name) AS path,
4
+ f.argument_signature AS signature,
5
+ f.function_definition AS definition
6
+ FROM snowflake.account_usage.functions f
7
+ WHERE TRUE
8
+ AND f.function_catalog NOT IN ('SNOWFLAKE', 'UTIL_DB')
9
+ AND f.function_language = 'SQL'
10
+ AND deleted IS NULL
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: castor-extractor
3
- Version: 0.16.9
3
+ Version: 0.16.15
4
4
  Summary: Extract your metadata assets.
5
5
  Home-page: https://www.castordoc.com/
6
6
  License: EULA
@@ -1,4 +1,4 @@
1
- CHANGELOG.md,sha256=WwEWPQQuGqVnWLhPtEh3SuOlBrNgHyHcLsYuvahpN7E,10437
1
+ CHANGELOG.md,sha256=QYFobUPMbdi6cidq_yU-oMbXWoAr1BjTE6thfdZ9tA4,10866
2
2
  Dockerfile,sha256=HcX5z8OpeSvkScQsN-Y7CNMUig_UB6vTMDl7uqzuLGE,303
3
3
  LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
4
4
  README.md,sha256=uF6PXm9ocPITlKVSh9afTakHmpLx3TvawLf-CbMP3wM,3578
@@ -47,7 +47,7 @@ castor_extractor/uploader/utils.py,sha256=NCe0tkB28BVhqzOaDhDjaSfODjjcPWB17X6chn
47
47
  castor_extractor/utils/__init__.py,sha256=bmzAOc-PKsVreMJtF7DGpPQeHrVqxWel_BblRftt6Ag,1186
48
48
  castor_extractor/utils/client/__init__.py,sha256=CRE-xJKm6fVV9dB8ljzB5YoOxX4I1sCD1KSgqs3Y8_Y,161
49
49
  castor_extractor/utils/client/abstract.py,sha256=aA5Qcb9TwWDSMq8WpXbGkOB20hehwX2VTpqQAwV76wk,2048
50
- castor_extractor/utils/client/api.py,sha256=tHa7eC11sS_eOCXhlnvUa2haRfOLENmjKgjB09Ijt0s,1664
50
+ castor_extractor/utils/client/api.py,sha256=z1o4fteWx1HxNTqCYihl9sGkIgSQTbd8lW_B9Y2wyeQ,1742
51
51
  castor_extractor/utils/client/api_test.py,sha256=NSMdXg1FLc37erqHp2FZsIsogWVv6lFSs7rDXHikr-E,542
52
52
  castor_extractor/utils/client/postgres.py,sha256=n6ulaT222WWPY0_6qAZ0MHF0m91HtI9mMqL71nyygo0,866
53
53
  castor_extractor/utils/client/query.py,sha256=O6D5EjD1KmBlwa786Uw4D4kzxx97_HH50xIIeSWt0B8,205
@@ -80,7 +80,7 @@ castor_extractor/utils/pager/pager_on_id_test.py,sha256=CfAXhXaAmCXnm0oflj8_82An
80
80
  castor_extractor/utils/pager/pager_on_token.py,sha256=G442SKl4BXJFMPbYIIgCk5M8wl7V3jMg3K1WUUkl0I0,1579
81
81
  castor_extractor/utils/pager/pager_on_token_test.py,sha256=w2GCUGKR3cD5lfmtFAsNvExtzxkYdBR0pusBrGKFQ08,2548
82
82
  castor_extractor/utils/pager/pager_test.py,sha256=QPBVShSXhkiYZUfnAMs43xnys6CD8pAhL3Jhj-Ov2Xc,1705
83
- castor_extractor/utils/retry.py,sha256=vYdJMiM-Nr82H1MuD7_KZdqbFz98ffQGqJ4Owbr6mpY,3252
83
+ castor_extractor/utils/retry.py,sha256=OsUS3qysHCkgWge8BgBwyuvoWcJ6pR_RQmQDcHlors4,3410
84
84
  castor_extractor/utils/retry_test.py,sha256=nsMttlmyKygVcffX3Hay8U2S1BspkGPiCmzIXPpLKyk,2230
85
85
  castor_extractor/utils/safe.py,sha256=jpfIimwdBSVUvU2DPFrhqpKC_DSYwxQqd08MlIkSODY,1967
86
86
  castor_extractor/utils/safe_test.py,sha256=IHN1Z761tYMFslYC-2HAfkXmFPh4LYSqNLs4QZwykjk,2160
@@ -244,16 +244,16 @@ castor_extractor/visualization/tableau/usage.py,sha256=LlFwlbEr-EnYUJjKZha99CRCR
244
244
  castor_extractor/visualization/tableau_revamp/__init__.py,sha256=a3DGjQhaz17gBqW-E84TAgupKbqLC40y5Ajo1yn-ot4,156
245
245
  castor_extractor/visualization/tableau_revamp/assets.py,sha256=owlwaI2E4UKk1YhkaHgaAXx6gu3Op6EqZ7bjp0tHI6s,351
246
246
  castor_extractor/visualization/tableau_revamp/client/__init__.py,sha256=wmS9uLtUiqNYVloi0-DgD8d2qzu3RVZEAtWiaDp6G_M,90
247
- castor_extractor/visualization/tableau_revamp/client/client.py,sha256=8BO7J-HFM2j6_f-Hjj3uSWip11eKeZ0cjhxGEqMTPRA,9428
247
+ castor_extractor/visualization/tableau_revamp/client/client.py,sha256=RSoHDfz79ma0YJRGpiCihnwLGmoxLzphYrxRVyvByHI,9742
248
248
  castor_extractor/visualization/tableau_revamp/client/credentials.py,sha256=fHG32egq6ll2U4BNazalMof_plzfCMQjrN9WOs6kezk,3014
249
249
  castor_extractor/visualization/tableau_revamp/client/errors.py,sha256=dTe1shqmWmAXpDpCz-E24m8dGYjt6rvIGV9qQb4jnvI,150
250
- castor_extractor/visualization/tableau_revamp/client/gql_queries.py,sha256=jBxvjQnOIWfFjMJpr7S_ZPnQhdzabxoO3jyEKi8A8ns,2112
250
+ castor_extractor/visualization/tableau_revamp/client/gql_queries.py,sha256=-V3ToD5Gi7nmfVB2OxTOZw8dcOiF7_ciSWjjW2UdvvI,2270
251
251
  castor_extractor/visualization/tableau_revamp/client/tsc_fields.py,sha256=WsDliPCo-XsQ7wN-j0gpW9bdxCHvgH-aePywiltzfbU,688
252
252
  castor_extractor/visualization/tableau_revamp/constants.py,sha256=PcdudAogQhi3e-knalhgliMKjy5ahN0em_-7XSLrnxM,87
253
253
  castor_extractor/visualization/tableau_revamp/extract.py,sha256=2SLUxp5okM4AcEJJ61ZgcC2ikfZZl9MH17CEXMXmgl0,1450
254
254
  castor_extractor/warehouse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
255
- castor_extractor/warehouse/abstract/__init__.py,sha256=QNwFRsLpH6aqVpl37qzklLr62iA85Yx6nZAivHDhpyk,366
256
- castor_extractor/warehouse/abstract/asset.py,sha256=qe5ugm7fnkvjbzdELRAeywbuKH4OLq2YHlXdjepehxE,2159
255
+ castor_extractor/warehouse/abstract/__init__.py,sha256=Fdfa026tgOo64MvzVRLHM_F2G-JmcehrF0mh3dHgb7s,419
256
+ castor_extractor/warehouse/abstract/asset.py,sha256=9nHL4WKUU_vRgj7u3sUdIzgI4rRpdS7YrfwNku4Gz9Q,2652
257
257
  castor_extractor/warehouse/abstract/asset_test.py,sha256=_kd4ybNlWSAdSdEgJKC-jhJTa1nMRa9i8RO3YbqKLM4,758
258
258
  castor_extractor/warehouse/abstract/extract.py,sha256=fVBhdE-yMI_g6RBYZcr7q-ZVW7jK7WVkO_GO_KfkRqg,2908
259
259
  castor_extractor/warehouse/abstract/query.py,sha256=GAgeISCmAdrkTKzFGO79hQDf6SA6EFrrlW43w-LiXKo,2632
@@ -277,13 +277,14 @@ castor_extractor/warehouse/bigquery/queries/view_ddl.sql,sha256=obCm-IN9V8_YSZTw
277
277
  castor_extractor/warehouse/bigquery/query.py,sha256=hrFfjd5jW2oQnZ6ozlkn-gDe6sCIzu5zSX19T9W6fIk,4162
278
278
  castor_extractor/warehouse/bigquery/types.py,sha256=LZVWSmE57lOemNbB5hBRyYmDk9bFAU4nbRaJWALl6N8,140
279
279
  castor_extractor/warehouse/databricks/__init__.py,sha256=bTvDxjGQGM2J3hOnVhfNmFP1y8DK0tySiD_EXe5_xWE,200
280
- castor_extractor/warehouse/databricks/client.py,sha256=FsHlpHZ9JTG92Rf_8Z7277o9HBaAD0CKxSEHiujOgXg,8271
281
- castor_extractor/warehouse/databricks/client_test.py,sha256=Y-LBveZFRVaaL49Lo2MwbcJReBcYLNRdHtR_w7xWNWQ,3381
280
+ castor_extractor/warehouse/databricks/client.py,sha256=oHR_htE25p5tiAAFZKbF48efo7tqIENW4dAGA7yEqHg,16895
281
+ castor_extractor/warehouse/databricks/client_test.py,sha256=KNp4Hi_CC6GwiW2QDJQQwqALfUebuT9D_qL6FuP_8tY,5246
282
282
  castor_extractor/warehouse/databricks/credentials.py,sha256=PpGv5_GP320UQjV_gvaxSpOw58AmqSznmjGhGfe6bdU,655
283
- castor_extractor/warehouse/databricks/extract.py,sha256=-vJhAIxSu1lD_xGl-GXZYTmc5BGu0aXM3l-U0UghREM,5773
284
- castor_extractor/warehouse/databricks/format.py,sha256=Nd5L89yWhpIl0OEMV7WK1H3JYUa9WGPC0c-NUOT_uXM,5101
283
+ castor_extractor/warehouse/databricks/extract.py,sha256=VX-3uo5dZucenrg-wnPur3CxOgpC5H7Ds92TO7OTAjc,7379
284
+ castor_extractor/warehouse/databricks/format.py,sha256=2bRy2fa45NW3uk030rmyba4n2Em-NnyZPBurUslEbcw,5522
285
285
  castor_extractor/warehouse/databricks/format_test.py,sha256=iPmdJof43fBYL1Sa_fBrCWDQHCHgm7IWCZag1kWkj9E,1970
286
- castor_extractor/warehouse/databricks/types.py,sha256=T2SyLy9pY_olLtstdC77moPxIiikVsuQLMxh92YMJQo,78
286
+ castor_extractor/warehouse/databricks/test_constants.py,sha256=Hm96yq_ltVAKv7WYhYz637r4Cuj-1cCdyOuxMEe3J-Q,2246
287
+ castor_extractor/warehouse/databricks/types.py,sha256=hD6gC8oiT3QSWEvbtgUOGK_lLzzz36sEauB3lS_wxlE,218
287
288
  castor_extractor/warehouse/mysql/__init__.py,sha256=2KFDogo9GNbApHqw3Vm5t_uNmIRjdp76nmP_WQQMfQY,116
288
289
  castor_extractor/warehouse/mysql/client.py,sha256=IwoJvbmE5VZkMCP9yHf6ta3_AQPEuBPrZZ3meefbcJs,974
289
290
  castor_extractor/warehouse/mysql/client_test.py,sha256=wRTv-3c5chy_HKj-buasNiYOOCIfynYqbabM4Hxdh5E,1052
@@ -323,22 +324,23 @@ castor_extractor/warehouse/redshift/queries/user.sql,sha256=sEXveJAuNvZacvpI6Wfw
323
324
  castor_extractor/warehouse/redshift/queries/view_ddl.sql,sha256=Pkyh_QT6d4rhTeyiVcqw6O8CRl7NEhk2p7eM5YIn5kg,719
324
325
  castor_extractor/warehouse/redshift/query.py,sha256=0C81rkt2cpkWrJIxxwALDyqr-49vlqQM04y_N6wwStc,540
325
326
  castor_extractor/warehouse/salesforce/__init__.py,sha256=NR4aNea5jeE1xYqeZ_29deeN84CkN0_D_Z7CLQdJvFY,137
326
- castor_extractor/warehouse/salesforce/client.py,sha256=_XiQJJJfELKGmzuBv8Mr_C0FJ-oLg71KbvpehrGvJ_k,3842
327
+ castor_extractor/warehouse/salesforce/client.py,sha256=ETnZ3n-GFFH0XohDB2ft74wI1HMspvTefR3k7ne-pmI,3891
327
328
  castor_extractor/warehouse/salesforce/constants.py,sha256=GusduVBCPvwpk_Im6F3bDvXeNQ7hRnCMdIAjIg65RnE,52
328
- castor_extractor/warehouse/salesforce/extract.py,sha256=ZTb58t7mqhavNvErrnw8M0L4Uu3qJpQEIldymurbgl0,3417
329
- castor_extractor/warehouse/salesforce/format.py,sha256=_BSj_G6C-kPwRubxSx1WuHg-_nYVQVNgAANqNfXL5RM,2154
330
- castor_extractor/warehouse/salesforce/format_test.py,sha256=6hy0USZH7-PDQt3oZ9_3Nwlr3eHLkqNEchqIM3bIDrU,858
329
+ castor_extractor/warehouse/salesforce/extract.py,sha256=IbhkCli8bSn7tjhRNlaD_HhfmZmv-5E5ajZfEUh68Hs,3438
330
+ castor_extractor/warehouse/salesforce/format.py,sha256=f5mMJyPsVU1ZSLe5WGCUOpj2SyW7_DFfzNVNu_m2aV0,3126
331
+ castor_extractor/warehouse/salesforce/format_test.py,sha256=HBlAYBoCOHaq_QOFudZlpcZb5TyZWV9v-cxK4tklg50,2253
331
332
  castor_extractor/warehouse/salesforce/soql.py,sha256=pAEaJE8ZUcyN3ptBsZGzNcGRhCcU81X6RMlnF1HRMw4,1063
332
333
  castor_extractor/warehouse/snowflake/__init__.py,sha256=TEGXTyxWp4Tr9gIHb-UFVTRKj6YWmrRtqHruiKSZGiY,174
333
334
  castor_extractor/warehouse/snowflake/client.py,sha256=XT0QLVNff_586SDuMe40iu8FCwPDh2uBV5aKc1Ql914,5555
334
335
  castor_extractor/warehouse/snowflake/client_test.py,sha256=ihWtOOAQfh8pu5JTr_EWfqefKOVIaJXznACURzaU1Qs,1432
335
336
  castor_extractor/warehouse/snowflake/credentials.py,sha256=wbUdbx9jVSHzg2kNDhMFuDstbVTyZOcGAwnSeGeFIqs,875
336
337
  castor_extractor/warehouse/snowflake/credentials_test.py,sha256=Lkc-DHXOvr50KrqAW4nt_x0IA0Mu_CsBVu6ATnzQB6I,673
337
- castor_extractor/warehouse/snowflake/extract.py,sha256=x-qCz51wAsPyeP91-nuGqT1Q-AH-5iXGUKCiIV6tlFY,2882
338
+ castor_extractor/warehouse/snowflake/extract.py,sha256=fcze0VBe9OOAFSr25T9L6CY506Vm_xDEvvy8NWuLW1s,2956
338
339
  castor_extractor/warehouse/snowflake/queries/.sqlfluff,sha256=vttrwcr64JVIuvc7WIg9C54cbOkjg_VjXNR7YnTGOPE,31
339
340
  castor_extractor/warehouse/snowflake/queries/column.sql,sha256=pAW2UNnut0a483OY2rjOXCdCtQg0g254g61Bt51CIB4,1803
340
341
  castor_extractor/warehouse/snowflake/queries/column_lineage.sql,sha256=YKBiZ6zySSNcXLDXwm31EjGIIkkkZc0-S6hI1SRM80o,1179
341
342
  castor_extractor/warehouse/snowflake/queries/database.sql,sha256=ifZXoKUXtsrGOxml6AcNhA4yybIyatH5va7bcp-lgCU,483
343
+ castor_extractor/warehouse/snowflake/queries/function.sql,sha256=8LRh0ybhd-RldJ8UZspWUm3yv52evq11O2uqIO4KqeQ,372
342
344
  castor_extractor/warehouse/snowflake/queries/grant_to_role.sql,sha256=O7AJ1LzoXGDFmiVvQ8EMJ5x8FSAnaxRPdmRyAlEmkUM,272
343
345
  castor_extractor/warehouse/snowflake/queries/grant_to_user.sql,sha256=7AalVajU5vRRpIiys1igSwmDXirbwpMTvJr2ihSz2NE,143
344
346
  castor_extractor/warehouse/snowflake/queries/query.sql,sha256=-OYcWUvdPBkpOfezkZaW7hrOdDz3JyoqjNdRm_88Rsk,1779
@@ -368,8 +370,8 @@ castor_extractor/warehouse/synapse/queries/schema.sql,sha256=aX9xNrBD_ydwl-znGSF
368
370
  castor_extractor/warehouse/synapse/queries/table.sql,sha256=mCE8bR1Vb7j7SwZW2gafcXidQ2fo1HwxcybA8wP2Kfs,1049
369
371
  castor_extractor/warehouse/synapse/queries/user.sql,sha256=sTb_SS7Zj3AXW1SggKPLNMCd0qoTpL7XI_BJRMaEpBg,67
370
372
  castor_extractor/warehouse/synapse/queries/view_ddl.sql,sha256=3EVbp5_yTgdByHFIPLHmnoOnqqLE77SrjAwFDvu4e54,249
371
- castor_extractor-0.16.9.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
372
- castor_extractor-0.16.9.dist-info/METADATA,sha256=qRP78w8BztI4N8IyOLoESkFdhKWByXf7PQQjFLTvu6A,6582
373
- castor_extractor-0.16.9.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
374
- castor_extractor-0.16.9.dist-info/entry_points.txt,sha256=SbyPk58Gh-FRztfCNnUZQ6w7SatzNJFZ6GIJLNsy7tI,1427
375
- castor_extractor-0.16.9.dist-info/RECORD,,
373
+ castor_extractor-0.16.15.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
374
+ castor_extractor-0.16.15.dist-info/METADATA,sha256=CsdtS6LQFjsgi0A7tj0sMwtkQVYBye4Savn2DFGBHso,6583
375
+ castor_extractor-0.16.15.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
376
+ castor_extractor-0.16.15.dist-info/entry_points.txt,sha256=SbyPk58Gh-FRztfCNnUZQ6w7SatzNJFZ6GIJLNsy7tI,1427
377
+ castor_extractor-0.16.15.dist-info/RECORD,,