castor-extractor 0.24.29__py3-none-any.whl → 0.24.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +16 -0
- castor_extractor/commands/extract_looker_studio.py +8 -0
- castor_extractor/commands/extract_tableau.py +15 -0
- castor_extractor/visualization/looker_studio/client/client.py +23 -6
- castor_extractor/visualization/looker_studio/extract.py +32 -0
- castor_extractor/visualization/looker_studio/extract_test.py +19 -0
- castor_extractor/visualization/salesforce_reporting/assets.py +1 -0
- castor_extractor/visualization/salesforce_reporting/client/rest.py +52 -0
- castor_extractor/visualization/sigma/client/client.py +47 -7
- castor_extractor/visualization/sigma/client/client_test.py +19 -0
- castor_extractor/visualization/sigma/client/pagination.py +1 -0
- castor_extractor/visualization/tableau/client/client.py +9 -1
- castor_extractor/visualization/tableau/client/client_metadata_api.py +49 -11
- castor_extractor/visualization/tableau/extract.py +4 -0
- castor_extractor/warehouse/databricks/sql_client.py +14 -12
- {castor_extractor-0.24.29.dist-info → castor_extractor-0.24.33.dist-info}/METADATA +17 -1
- {castor_extractor-0.24.29.dist-info → castor_extractor-0.24.33.dist-info}/RECORD +20 -18
- {castor_extractor-0.24.29.dist-info → castor_extractor-0.24.33.dist-info}/LICENCE +0 -0
- {castor_extractor-0.24.29.dist-info → castor_extractor-0.24.33.dist-info}/WHEEL +0 -0
- {castor_extractor-0.24.29.dist-info → castor_extractor-0.24.33.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
|
@@ -1,5 +1,21 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.24.33 - 2025-07-10
|
|
4
|
+
|
|
5
|
+
* Tableau - Add an option to skip fields ingestion
|
|
6
|
+
|
|
7
|
+
## 0.24.32 - 2025-07-02
|
|
8
|
+
|
|
9
|
+
* Salesforce reporting - extract report's metadata
|
|
10
|
+
|
|
11
|
+
## 0.24.31 - 2025-07-02
|
|
12
|
+
|
|
13
|
+
* Looker Studio: add option to list users via a provided JSON file
|
|
14
|
+
|
|
15
|
+
## 0.24.30 - 2025-06-26
|
|
16
|
+
|
|
17
|
+
* Sigma: remove retry on timeout, decrease pagination for queries
|
|
18
|
+
|
|
3
19
|
## 0.24.29 - 2025-06-24
|
|
4
20
|
|
|
5
21
|
* Strategy: skip descriptions on ValueErrors
|
|
@@ -30,6 +30,14 @@ def main():
|
|
|
30
30
|
default=False,
|
|
31
31
|
help="Skips the extraction of activity logs",
|
|
32
32
|
)
|
|
33
|
+
parser.add_argument(
|
|
34
|
+
"--users-file-path",
|
|
35
|
+
help=(
|
|
36
|
+
"Optional path to a JSON file with user email addresses "
|
|
37
|
+
'as a list of strings (e.g. ["foo@bar.com", "fee@bar.com"]). '
|
|
38
|
+
"If provided, only extracts assets owned by the specified users."
|
|
39
|
+
),
|
|
40
|
+
)
|
|
33
41
|
|
|
34
42
|
parser.add_argument("-o", "--output", help="Directory to write to")
|
|
35
43
|
|
|
@@ -28,6 +28,13 @@ def main():
|
|
|
28
28
|
help="Option to avoid extracting Tableau columns, default to False",
|
|
29
29
|
)
|
|
30
30
|
|
|
31
|
+
parser.add_argument(
|
|
32
|
+
"--skip-fields",
|
|
33
|
+
dest="skip_fields",
|
|
34
|
+
action="store_true",
|
|
35
|
+
help="Option to avoid extracting Tableau fields, default to False",
|
|
36
|
+
)
|
|
37
|
+
|
|
31
38
|
parser.add_argument(
|
|
32
39
|
"--with-pulse",
|
|
33
40
|
dest="with_pulse",
|
|
@@ -41,6 +48,14 @@ def main():
|
|
|
41
48
|
required=False,
|
|
42
49
|
)
|
|
43
50
|
|
|
51
|
+
parser.add_argument(
|
|
52
|
+
"-ie",
|
|
53
|
+
"--ignore-errors",
|
|
54
|
+
action="store_true",
|
|
55
|
+
dest="ignore_errors",
|
|
56
|
+
help="Allow partial extraction of Fields and Columns: skip batch in case of Timeout errors",
|
|
57
|
+
)
|
|
58
|
+
|
|
44
59
|
parser.add_argument("-o", "--output", help="Directory to write to")
|
|
45
60
|
|
|
46
61
|
tableau.extract_all(**parse_filled_arguments(parser))
|
|
@@ -36,23 +36,40 @@ class LookerStudioClient:
|
|
|
36
36
|
self,
|
|
37
37
|
credentials: LookerStudioCredentials,
|
|
38
38
|
bigquery_credentials: Optional[dict] = None,
|
|
39
|
+
user_emails: Optional[list[str]] = None,
|
|
39
40
|
):
|
|
40
41
|
self.admin_sdk_client = AdminSDKClient(credentials)
|
|
41
42
|
self.looker_studio_client = LookerStudioAPIClient(credentials)
|
|
43
|
+
self.user_emails = user_emails
|
|
42
44
|
|
|
43
45
|
self.bigquery_client: Optional[BigQueryClient] = None
|
|
44
46
|
if bigquery_credentials:
|
|
45
47
|
self.bigquery_client = BigQueryClient(bigquery_credentials)
|
|
46
48
|
|
|
47
|
-
def
|
|
49
|
+
def _list_user_emails(self) -> Iterator[str]:
|
|
48
50
|
"""
|
|
49
|
-
|
|
51
|
+
Lists user emails either from a provided JSON file or via the Admin SDK API.
|
|
52
|
+
|
|
53
|
+
Using all Google Workspace users can be inefficient for large clients -
|
|
54
|
+
the client might spend hours checking thousands of users for Looker Studio
|
|
55
|
+
assets when only a handful actually own any. A JSON file allows
|
|
56
|
+
targeting known owners instead.
|
|
50
57
|
"""
|
|
51
|
-
|
|
58
|
+
if self.user_emails is not None:
|
|
59
|
+
yield from self.user_emails
|
|
60
|
+
return
|
|
61
|
+
|
|
62
|
+
for user in self.admin_sdk_client.list_users():
|
|
63
|
+
yield user[USER_EMAIL_FIELD]
|
|
52
64
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
65
|
+
def _get_assets(self) -> Iterator[dict]:
|
|
66
|
+
"""
|
|
67
|
+
Extracts reports and data sources user by user. The loop is necessary
|
|
68
|
+
because the Looker Studio API can only retrieve the assets owned by a
|
|
69
|
+
single user.
|
|
70
|
+
"""
|
|
71
|
+
for user_email in self._list_user_emails():
|
|
72
|
+
yield from self.looker_studio_client.fetch_user_assets(user_email)
|
|
56
73
|
|
|
57
74
|
def _get_source_queries(self) -> Iterator[dict]:
|
|
58
75
|
"""
|
|
@@ -70,21 +70,53 @@ def _bigquery_credentials_or_none(params: dict) -> Optional[dict]:
|
|
|
70
70
|
return cast(dict, json.load(file))
|
|
71
71
|
|
|
72
72
|
|
|
73
|
+
def _validate_user_emails(user_emails: list[str]):
|
|
74
|
+
"""
|
|
75
|
+
Raises an error if the user emails are not in the expected format (list of strings),
|
|
76
|
+
or if the list is empty.
|
|
77
|
+
"""
|
|
78
|
+
if not isinstance(user_emails, list):
|
|
79
|
+
raise TypeError("The users file must be a list")
|
|
80
|
+
|
|
81
|
+
if len(user_emails) == 0:
|
|
82
|
+
raise ValueError("The users file must contain at least one user email")
|
|
83
|
+
|
|
84
|
+
if not all(isinstance(email, str) for email in user_emails):
|
|
85
|
+
raise TypeError("All items in users list must be strings")
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _read_optional_user_emails(
|
|
89
|
+
users_file_path: Optional[str],
|
|
90
|
+
) -> Optional[list[str]]:
|
|
91
|
+
"""Loads the user emails from a file, if it was provided."""
|
|
92
|
+
if not users_file_path:
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
with open(users_file_path, "r") as file:
|
|
96
|
+
user_emails = json.load(file)
|
|
97
|
+
|
|
98
|
+
_validate_user_emails(user_emails)
|
|
99
|
+
return user_emails
|
|
100
|
+
|
|
101
|
+
|
|
73
102
|
def extract_all(**kwargs) -> None:
|
|
74
103
|
"""
|
|
75
104
|
Extracts data from Looker Studio and stores the output files locally under
|
|
76
105
|
the given output_directory.
|
|
77
106
|
"""
|
|
107
|
+
users_file_path = kwargs.get("users_file_path")
|
|
78
108
|
output_directory = kwargs.get("output") or from_env(OUTPUT_DIR)
|
|
79
109
|
|
|
80
110
|
credentials = _credentials(kwargs)
|
|
81
111
|
has_view_activity_logs = bool(credentials.has_view_activity_logs)
|
|
112
|
+
user_emails = _read_optional_user_emails(users_file_path)
|
|
82
113
|
|
|
83
114
|
bigquery_credentials = _bigquery_credentials_or_none(kwargs)
|
|
84
115
|
|
|
85
116
|
client = LookerStudioClient(
|
|
86
117
|
credentials=credentials,
|
|
87
118
|
bigquery_credentials=bigquery_credentials,
|
|
119
|
+
user_emails=user_emails,
|
|
88
120
|
)
|
|
89
121
|
ts = current_timestamp()
|
|
90
122
|
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from .extract import _validate_user_emails
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test__validate_user_emails():
|
|
7
|
+
with pytest.raises(TypeError):
|
|
8
|
+
_validate_user_emails("toto@tata.com")
|
|
9
|
+
|
|
10
|
+
with pytest.raises(TypeError):
|
|
11
|
+
_validate_user_emails({"not": "the", "right": "format"})
|
|
12
|
+
|
|
13
|
+
with pytest.raises(ValueError):
|
|
14
|
+
_validate_user_emails([])
|
|
15
|
+
|
|
16
|
+
with pytest.raises(TypeError):
|
|
17
|
+
_validate_user_emails([1, 2, 3, 4])
|
|
18
|
+
|
|
19
|
+
_validate_user_emails(["admin@toto.com", "tata@toto.com"])
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from collections.abc import Iterator
|
|
3
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
3
4
|
from typing import Optional
|
|
4
5
|
|
|
6
|
+
import requests
|
|
7
|
+
|
|
5
8
|
from ....utils import build_url
|
|
6
9
|
from ....utils.salesforce import SalesforceBaseClient
|
|
7
10
|
from ..assets import SalesforceReportingAsset
|
|
@@ -15,6 +18,8 @@ REQUIRING_URL_ASSETS = (
|
|
|
15
18
|
SalesforceReportingAsset.FOLDERS,
|
|
16
19
|
)
|
|
17
20
|
|
|
21
|
+
_CONCURRENT_THREADS = 50
|
|
22
|
+
|
|
18
23
|
|
|
19
24
|
class SalesforceReportingClient(SalesforceBaseClient):
|
|
20
25
|
"""
|
|
@@ -50,6 +55,50 @@ class SalesforceReportingClient(SalesforceBaseClient):
|
|
|
50
55
|
url = self._get_asset_url(asset_type, asset)
|
|
51
56
|
yield {**asset, "Url": url}
|
|
52
57
|
|
|
58
|
+
def _metadata(self, report_id: str) -> Optional[dict]:
|
|
59
|
+
url = f"services/data/v60.0/analytics/reports/{report_id}/describe"
|
|
60
|
+
try:
|
|
61
|
+
metadata = self._get(url, retry_on_timeout=False)
|
|
62
|
+
# pick only what we need to build the lineage
|
|
63
|
+
columns = metadata["reportExtendedMetadata"]["detailColumnInfo"]
|
|
64
|
+
return {
|
|
65
|
+
"reportId": report_id,
|
|
66
|
+
"detailColumnInfo": columns or dict(),
|
|
67
|
+
}
|
|
68
|
+
except (requests.HTTPError, requests.RequestException) as ex:
|
|
69
|
+
# Extracting column metadata is used only for lineage purposes
|
|
70
|
+
# and is non-critical. API errors are common during this step,
|
|
71
|
+
# so we choose to skip them rather than fail the process. The same
|
|
72
|
+
# rows consistently fail, and retries have proven ineffective.
|
|
73
|
+
logger.info(ex)
|
|
74
|
+
return None
|
|
75
|
+
|
|
76
|
+
def _fetch_reports_metadata(self) -> Iterator[dict]:
|
|
77
|
+
"""
|
|
78
|
+
Use the "describe" endpoint to extract report metadata.
|
|
79
|
+
Keep only the detailColumnInfo, which is required for building the lineage.
|
|
80
|
+
|
|
81
|
+
More info here:
|
|
82
|
+
https://developer.salesforce.com/docs/atlas.en-us.api_analytics.meta/api_analytics/sforce_analytics_rest_api_getbasic_reportmetadata.htm
|
|
83
|
+
https://www.notion.so/castordoc/Salesforce-Lineage-216a1c3d458580859888cf4ca2d7fa51?source=copy_link
|
|
84
|
+
"""
|
|
85
|
+
# The "describe" endpoint requires report_ids. To avoid introducing
|
|
86
|
+
# task dependencies, we opted to re-extract the reports.
|
|
87
|
+
# It is fast anyway, since it's running a SQL query
|
|
88
|
+
reports = self.fetch(SalesforceReportingAsset.REPORTS)
|
|
89
|
+
report_ids = [report["Id"] for report in reports]
|
|
90
|
+
|
|
91
|
+
# Calling "describe" on each report individually can be slow,
|
|
92
|
+
# especially for accounts with thousands of reports. That's why
|
|
93
|
+
# we use multithreading here — it significantly improves performance.
|
|
94
|
+
with ThreadPoolExecutor(max_workers=_CONCURRENT_THREADS) as executor:
|
|
95
|
+
fetch_results = executor.map(self._metadata, report_ids)
|
|
96
|
+
|
|
97
|
+
for metadata in fetch_results:
|
|
98
|
+
if not metadata:
|
|
99
|
+
continue
|
|
100
|
+
yield metadata
|
|
101
|
+
|
|
53
102
|
def fetch(self, asset: SalesforceReportingAsset) -> list[dict]:
|
|
54
103
|
"""
|
|
55
104
|
Fetch Salesforce Reporting assets
|
|
@@ -59,4 +108,7 @@ class SalesforceReportingClient(SalesforceBaseClient):
|
|
|
59
108
|
if asset in REQUIRING_URL_ASSETS:
|
|
60
109
|
return list(self._fetch_and_add_url(asset))
|
|
61
110
|
|
|
111
|
+
if asset == SalesforceReportingAsset.REPORTS_METADATA:
|
|
112
|
+
return list(self._fetch_reports_metadata())
|
|
113
|
+
|
|
62
114
|
return list(self._query_all(queries[asset]))
|
|
@@ -2,7 +2,7 @@ from collections.abc import Iterator
|
|
|
2
2
|
from concurrent.futures import ThreadPoolExecutor
|
|
3
3
|
from functools import partial
|
|
4
4
|
from http import HTTPStatus
|
|
5
|
-
from typing import Callable, Optional
|
|
5
|
+
from typing import Callable, Iterable, Optional
|
|
6
6
|
|
|
7
7
|
import requests
|
|
8
8
|
from pydantic import BaseModel
|
|
@@ -19,7 +19,11 @@ from ....utils import (
|
|
|
19
19
|
from ..assets import SigmaAsset
|
|
20
20
|
from .credentials import SigmaCredentials
|
|
21
21
|
from .endpoints import SigmaEndpointFactory
|
|
22
|
-
from .pagination import
|
|
22
|
+
from .pagination import (
|
|
23
|
+
SIGMA_API_LIMIT,
|
|
24
|
+
SIGMA_QUERIES_PAGINATION_LIMIT,
|
|
25
|
+
SigmaPagination,
|
|
26
|
+
)
|
|
23
27
|
|
|
24
28
|
_CONTENT_TYPE = "application/x-www-form-urlencoded"
|
|
25
29
|
|
|
@@ -101,9 +105,27 @@ class SigmaClient(APIClient):
|
|
|
101
105
|
safe_mode=safe_mode or SIGMA_SAFE_MODE,
|
|
102
106
|
)
|
|
103
107
|
|
|
104
|
-
def _get_paginated(
|
|
108
|
+
def _get_paginated(
|
|
109
|
+
self,
|
|
110
|
+
endpoint: str,
|
|
111
|
+
limit: int = SIGMA_API_LIMIT,
|
|
112
|
+
) -> Callable:
|
|
113
|
+
"""
|
|
114
|
+
Sigma’s API does not experience random timeouts, unlike some other APIs.
|
|
115
|
+
However, extracting queries from certain workbooks can take a
|
|
116
|
+
significant amount of time.
|
|
117
|
+
Previously, when a timeout occurred, the system would retry multiple
|
|
118
|
+
times — even though we knew it would eventually fail due to the inherent
|
|
119
|
+
slowness of the operation.
|
|
120
|
+
These retries only delayed the inevitable failure without adding value.
|
|
121
|
+
To address this, we've disabled retries on timeout and instead adjusted
|
|
122
|
+
the page size when extracting queries.
|
|
123
|
+
"""
|
|
105
124
|
return partial(
|
|
106
|
-
self._get,
|
|
125
|
+
self._get,
|
|
126
|
+
retry_on_timeout=False, # explained in the docstring
|
|
127
|
+
endpoint=endpoint,
|
|
128
|
+
params={"limit": limit},
|
|
107
129
|
)
|
|
108
130
|
|
|
109
131
|
def _get_all_datasets(self) -> Iterator[dict]:
|
|
@@ -200,16 +222,34 @@ class SigmaClient(APIClient):
|
|
|
200
222
|
"element_id": lineage.context.element_id,
|
|
201
223
|
}
|
|
202
224
|
|
|
225
|
+
@staticmethod
|
|
226
|
+
def _yield_deduplicated_queries(
|
|
227
|
+
queries: Iterable[dict], workbook_id: str
|
|
228
|
+
) -> Iterator[dict]:
|
|
229
|
+
"""
|
|
230
|
+
Returns unique queries for a workbook. This is necessary because the API
|
|
231
|
+
unfortunately returns duplicate entries for some workbook elements.
|
|
232
|
+
"""
|
|
233
|
+
seen_elements = set()
|
|
234
|
+
|
|
235
|
+
for query in queries:
|
|
236
|
+
element_id = query["elementId"]
|
|
237
|
+
if element_id in seen_elements:
|
|
238
|
+
continue
|
|
239
|
+
|
|
240
|
+
seen_elements.add(element_id)
|
|
241
|
+
yield {**query, "workbook_id": workbook_id}
|
|
242
|
+
|
|
203
243
|
def _get_all_queries(self, workbooks: list[dict]) -> Iterator[dict]:
|
|
204
244
|
for workbook in workbooks:
|
|
205
245
|
workbook_id = workbook["workbookId"]
|
|
206
246
|
request = self._get_paginated(
|
|
207
|
-
SigmaEndpointFactory.queries(workbook_id)
|
|
247
|
+
SigmaEndpointFactory.queries(workbook_id),
|
|
248
|
+
limit=SIGMA_QUERIES_PAGINATION_LIMIT,
|
|
208
249
|
)
|
|
209
250
|
queries = fetch_all_pages(request, SigmaPagination)
|
|
210
251
|
|
|
211
|
-
|
|
212
|
-
yield {**query, "workbook_id": workbook_id}
|
|
252
|
+
yield from self._yield_deduplicated_queries(queries, workbook_id)
|
|
213
253
|
|
|
214
254
|
def fetch(
|
|
215
255
|
self,
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from .client import SigmaClient
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_SigmaClient__yield_deduplicated_queries():
|
|
5
|
+
workbook_id = "workbook1"
|
|
6
|
+
mock_queries = [
|
|
7
|
+
{"elementId": "element1", "name": "Query 1"},
|
|
8
|
+
{"elementId": "element2", "name": "Query 2"},
|
|
9
|
+
{"elementId": "element1", "name": "Query 1"}, # Duplicate
|
|
10
|
+
{"elementId": "element3", "name": "Query 3"},
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
queries = list(
|
|
14
|
+
SigmaClient._yield_deduplicated_queries(mock_queries, workbook_id)
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
assert len(queries) == 3
|
|
18
|
+
for query in queries:
|
|
19
|
+
assert query["workbook_id"] == workbook_id
|
|
@@ -122,13 +122,17 @@ class TableauClient:
|
|
|
122
122
|
credentials: TableauCredentials,
|
|
123
123
|
timeout_sec: int = DEFAULT_TIMEOUT_SECONDS,
|
|
124
124
|
with_columns: bool = True,
|
|
125
|
+
with_fields: bool = True,
|
|
125
126
|
with_pulse: bool = False,
|
|
126
127
|
override_page_size: Optional[int] = None,
|
|
128
|
+
ignore_errors: bool = False,
|
|
127
129
|
):
|
|
128
130
|
self._credentials = credentials
|
|
129
131
|
self._server = _server(credentials.server_url, timeout_sec)
|
|
130
132
|
self._with_columns = with_columns
|
|
133
|
+
self._with_fields = with_fields
|
|
131
134
|
self._with_pulse = with_pulse
|
|
135
|
+
self._ignore_errors = ignore_errors
|
|
132
136
|
|
|
133
137
|
self._client_metadata = TableauClientMetadataApi(
|
|
134
138
|
server=self._server,
|
|
@@ -221,6 +225,10 @@ class TableauClient:
|
|
|
221
225
|
logger.info(f"Skipping asset {asset} - deactivated columns")
|
|
222
226
|
return []
|
|
223
227
|
|
|
228
|
+
if asset == TableauAsset.FIELD and not self._with_fields:
|
|
229
|
+
logger.info(f"Skipping asset {asset} - deactivated fields")
|
|
230
|
+
return []
|
|
231
|
+
|
|
224
232
|
logger.info(f"Extracting {asset.name}...")
|
|
225
233
|
|
|
226
234
|
if asset == TableauAsset.DATASOURCE:
|
|
@@ -240,4 +248,4 @@ class TableauClient:
|
|
|
240
248
|
return self._client_rest.fetch(asset)
|
|
241
249
|
|
|
242
250
|
# other assets can be extracted via Metadata API
|
|
243
|
-
return self._client_metadata.fetch(asset)
|
|
251
|
+
return self._client_metadata.fetch(asset, self._ignore_errors)
|
|
@@ -2,6 +2,7 @@ import logging
|
|
|
2
2
|
from collections.abc import Iterator
|
|
3
3
|
from typing import Optional
|
|
4
4
|
|
|
5
|
+
import requests
|
|
5
6
|
import tableauserverclient as TSC # type: ignore
|
|
6
7
|
|
|
7
8
|
from ....utils import SerializedAsset, retry
|
|
@@ -12,6 +13,13 @@ from .gql_queries import FIELDS_QUERIES, GQL_QUERIES, QUERY_TEMPLATE
|
|
|
12
13
|
|
|
13
14
|
logger = logging.getLogger(__name__)
|
|
14
15
|
|
|
16
|
+
# These assets are known to be error-prone, so it's acceptable if a few are missed.
|
|
17
|
+
# If errors occur, skip the current batch.
|
|
18
|
+
_SAFE_MODE_ASSETS = (
|
|
19
|
+
TableauAsset.COLUMN,
|
|
20
|
+
TableauAsset.FIELD,
|
|
21
|
+
)
|
|
22
|
+
|
|
15
23
|
# increase the value when extraction is too slow
|
|
16
24
|
# decrease the value when timeouts arise
|
|
17
25
|
_CUSTOM_PAGE_SIZE: dict[TableauAsset, int] = {
|
|
@@ -92,6 +100,7 @@ def gql_query_scroll(
|
|
|
92
100
|
resource: str,
|
|
93
101
|
fields: str,
|
|
94
102
|
page_size: int,
|
|
103
|
+
skip_batch: bool,
|
|
95
104
|
) -> Iterator[SerializedAsset]:
|
|
96
105
|
"""
|
|
97
106
|
Iterate over GQL query results, handling pagination and cursor
|
|
@@ -119,15 +128,22 @@ def gql_query_scroll(
|
|
|
119
128
|
|
|
120
129
|
current_offset = 0
|
|
121
130
|
while True:
|
|
122
|
-
|
|
123
|
-
|
|
131
|
+
try:
|
|
132
|
+
payload = _call(first=page_size, offset=current_offset)
|
|
133
|
+
yield payload["nodes"]
|
|
134
|
+
|
|
135
|
+
current_offset += len(payload["nodes"])
|
|
136
|
+
total = payload["totalCount"]
|
|
137
|
+
logger.info(f"Extracted {current_offset}/{total} {resource}")
|
|
124
138
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
139
|
+
if not payload["pageInfo"]["hasNextPage"]:
|
|
140
|
+
break
|
|
141
|
+
except requests.exceptions.ReadTimeout:
|
|
142
|
+
if not skip_batch:
|
|
143
|
+
raise
|
|
128
144
|
|
|
129
|
-
|
|
130
|
-
|
|
145
|
+
logger.warning("Skipping batch because of TableauServer Timeout")
|
|
146
|
+
current_offset += page_size
|
|
131
147
|
|
|
132
148
|
|
|
133
149
|
def _deduplicate(result_pages: Iterator[SerializedAsset]) -> SerializedAsset:
|
|
@@ -177,12 +193,14 @@ class TableauClientMetadataApi:
|
|
|
177
193
|
resource: str,
|
|
178
194
|
fields: str,
|
|
179
195
|
page_size: int = DEFAULT_PAGE_SIZE,
|
|
196
|
+
skip_batch: bool = False,
|
|
180
197
|
) -> SerializedAsset:
|
|
181
198
|
result_pages = gql_query_scroll(
|
|
182
199
|
self._server,
|
|
183
200
|
resource=resource,
|
|
184
201
|
fields=fields,
|
|
185
202
|
page_size=page_size,
|
|
203
|
+
skip_batch=skip_batch,
|
|
186
204
|
)
|
|
187
205
|
return _deduplicate(result_pages)
|
|
188
206
|
|
|
@@ -193,21 +211,41 @@ class TableauClientMetadataApi:
|
|
|
193
211
|
or DEFAULT_PAGE_SIZE
|
|
194
212
|
)
|
|
195
213
|
|
|
196
|
-
def _fetch_fields(self) -> SerializedAsset:
|
|
214
|
+
def _fetch_fields(self, skip_batch: bool = False) -> SerializedAsset:
|
|
197
215
|
result: SerializedAsset = []
|
|
198
216
|
page_size = self._page_size(TableauAsset.FIELD)
|
|
199
217
|
for resource, fields in FIELDS_QUERIES:
|
|
200
|
-
current = self._call(
|
|
218
|
+
current = self._call(
|
|
219
|
+
resource,
|
|
220
|
+
fields,
|
|
221
|
+
page_size,
|
|
222
|
+
skip_batch=skip_batch,
|
|
223
|
+
)
|
|
201
224
|
result.extend(current)
|
|
202
225
|
return result
|
|
203
226
|
|
|
227
|
+
@staticmethod
|
|
228
|
+
def _should_skip_batch_with_timeout(
|
|
229
|
+
asset: TableauAsset,
|
|
230
|
+
ignore_metadata_errors: bool = False,
|
|
231
|
+
) -> bool:
|
|
232
|
+
return asset in _SAFE_MODE_ASSETS and ignore_metadata_errors
|
|
233
|
+
|
|
204
234
|
def fetch(
|
|
205
235
|
self,
|
|
206
236
|
asset: TableauAsset,
|
|
237
|
+
ignore_errors: bool = False,
|
|
207
238
|
) -> SerializedAsset:
|
|
239
|
+
skip_batch = self._should_skip_batch_with_timeout(asset, ignore_errors)
|
|
240
|
+
|
|
208
241
|
if asset == TableauAsset.FIELD:
|
|
209
|
-
return self._fetch_fields()
|
|
242
|
+
return self._fetch_fields(skip_batch=skip_batch)
|
|
210
243
|
|
|
211
244
|
page_size = self._page_size(asset)
|
|
212
245
|
resource, fields = GQL_QUERIES[asset]
|
|
213
|
-
return self._call(
|
|
246
|
+
return self._call(
|
|
247
|
+
resource=resource,
|
|
248
|
+
fields=fields,
|
|
249
|
+
page_size=page_size,
|
|
250
|
+
skip_batch=skip_batch,
|
|
251
|
+
)
|
|
@@ -33,16 +33,20 @@ def extract_all(**kwargs) -> None:
|
|
|
33
33
|
"""
|
|
34
34
|
output_directory = kwargs.get("output") or from_env(OUTPUT_DIR)
|
|
35
35
|
with_columns = not kwargs.get("skip_columns")
|
|
36
|
+
with_fields = not kwargs.get("skip_fields")
|
|
36
37
|
with_pulse = kwargs.get("with_pulse") or False
|
|
37
38
|
page_size = kwargs.get("page_size")
|
|
39
|
+
ignore_errors = kwargs.get("ignore_errors") or False
|
|
38
40
|
timestamp = current_timestamp()
|
|
39
41
|
|
|
40
42
|
credentials = TableauCredentials(**kwargs)
|
|
41
43
|
client = TableauClient(
|
|
42
44
|
credentials,
|
|
43
45
|
with_columns=with_columns,
|
|
46
|
+
with_fields=with_fields,
|
|
44
47
|
with_pulse=with_pulse,
|
|
45
48
|
override_page_size=page_size,
|
|
49
|
+
ignore_errors=ignore_errors,
|
|
46
50
|
)
|
|
47
51
|
client.login()
|
|
48
52
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from collections import defaultdict
|
|
3
3
|
from datetime import date
|
|
4
|
-
from typing import Optional
|
|
5
4
|
|
|
6
5
|
from databricks import sql # type: ignore
|
|
7
6
|
|
|
@@ -17,7 +16,7 @@ _INFORMATION_SCHEMA_SQL = "SELECT * FROM system.information_schema"
|
|
|
17
16
|
|
|
18
17
|
_LINEAGE_SQL_TPL = """
|
|
19
18
|
SELECT * FROM system.access.{table_name}
|
|
20
|
-
WHERE event_date =
|
|
19
|
+
WHERE event_date = DATE('{day}')
|
|
21
20
|
"""
|
|
22
21
|
|
|
23
22
|
|
|
@@ -34,11 +33,7 @@ class DatabricksSQLClient:
|
|
|
34
33
|
self._host = credentials.host
|
|
35
34
|
self._token = credentials.token
|
|
36
35
|
|
|
37
|
-
def execute_sql(
|
|
38
|
-
self,
|
|
39
|
-
query: str,
|
|
40
|
-
params: Optional[dict] = None,
|
|
41
|
-
):
|
|
36
|
+
def execute_sql(self, query: str):
|
|
42
37
|
"""
|
|
43
38
|
Execute a SQL query on Databricks system tables and return the results.
|
|
44
39
|
https://docs.databricks.com/en/dev-tools/python-sql-connector.html
|
|
@@ -52,7 +47,7 @@ class DatabricksSQLClient:
|
|
|
52
47
|
access_token=self._token,
|
|
53
48
|
) as connection:
|
|
54
49
|
with connection.cursor() as cursor:
|
|
55
|
-
cursor.execute(query
|
|
50
|
+
cursor.execute(query)
|
|
56
51
|
return cursor.fetchall()
|
|
57
52
|
|
|
58
53
|
def _needs_extraction(self, entity: TagEntity) -> bool:
|
|
@@ -89,16 +84,23 @@ class DatabricksSQLClient:
|
|
|
89
84
|
return mapping
|
|
90
85
|
|
|
91
86
|
def get_lineage(
|
|
92
|
-
self,
|
|
87
|
+
self,
|
|
88
|
+
lineage_entity: LineageEntity,
|
|
89
|
+
day: date,
|
|
93
90
|
) -> list[dict]:
|
|
94
91
|
"""
|
|
95
92
|
Fetch {TABLE|COLUMN} lineage of the given day, via system tables
|
|
96
93
|
https://docs.databricks.com/en/admin/system-tables/lineage.html
|
|
94
|
+
|
|
95
|
+
Unfortunately, passing parameters is not always supported. We have to
|
|
96
|
+
format the query beforehand and pass it as plain text for execution.
|
|
97
97
|
"""
|
|
98
98
|
table_name = f"{lineage_entity.value.lower()}_lineage"
|
|
99
|
-
query = _LINEAGE_SQL_TPL.format(
|
|
100
|
-
|
|
101
|
-
|
|
99
|
+
query = _LINEAGE_SQL_TPL.format(
|
|
100
|
+
table_name=table_name,
|
|
101
|
+
day=day,
|
|
102
|
+
)
|
|
103
|
+
result = self.execute_sql(query)
|
|
102
104
|
data = []
|
|
103
105
|
for row in result:
|
|
104
106
|
data.append(row.asDict())
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: castor-extractor
|
|
3
|
-
Version: 0.24.
|
|
3
|
+
Version: 0.24.33
|
|
4
4
|
Summary: Extract your metadata assets.
|
|
5
5
|
Home-page: https://www.castordoc.com/
|
|
6
6
|
License: EULA
|
|
@@ -215,6 +215,22 @@ For any questions or bug report, contact us at [support@coalesce.io](mailto:supp
|
|
|
215
215
|
|
|
216
216
|
# Changelog
|
|
217
217
|
|
|
218
|
+
## 0.24.33 - 2025-07-10
|
|
219
|
+
|
|
220
|
+
* Tableau - Add an option to skip fields ingestion
|
|
221
|
+
|
|
222
|
+
## 0.24.32 - 2025-07-02
|
|
223
|
+
|
|
224
|
+
* Salesforce reporting - extract report's metadata
|
|
225
|
+
|
|
226
|
+
## 0.24.31 - 2025-07-02
|
|
227
|
+
|
|
228
|
+
* Looker Studio: add option to list users via a provided JSON file
|
|
229
|
+
|
|
230
|
+
## 0.24.30 - 2025-06-26
|
|
231
|
+
|
|
232
|
+
* Sigma: remove retry on timeout, decrease pagination for queries
|
|
233
|
+
|
|
218
234
|
## 0.24.29 - 2025-06-24
|
|
219
235
|
|
|
220
236
|
* Strategy: skip descriptions on ValueErrors
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=jKQMJGiDeDEZG-753wDrtfOoOYa5Db5Liy0AsATdsuc,18779
|
|
2
2
|
Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
|
|
3
3
|
DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
|
|
4
4
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
@@ -10,7 +10,7 @@ castor_extractor/commands/extract_confluence.py,sha256=blYcnDqywXNKRQ1aZAD9FclhL
|
|
|
10
10
|
castor_extractor/commands/extract_databricks.py,sha256=SVKyoa-BBUQAM6HRHf1Wdg9-tpICic2yyvXQwHcNBhA,1264
|
|
11
11
|
castor_extractor/commands/extract_domo.py,sha256=jvAawUsUTHrwCn_koK6StmQr4n_b5GyvJi6uu6WS0SM,1061
|
|
12
12
|
castor_extractor/commands/extract_looker.py,sha256=cySLiolLCgrREJ9d0kMrJ7P8K3efHTBTzShalWVfI3A,1214
|
|
13
|
-
castor_extractor/commands/extract_looker_studio.py,sha256=
|
|
13
|
+
castor_extractor/commands/extract_looker_studio.py,sha256=YFQWabmBEaWZFCquMNZw_iq6TF95KBVGdemcYiJMhk8,1399
|
|
14
14
|
castor_extractor/commands/extract_metabase_api.py,sha256=NXctea4GT_1iRDitY92nV3TKSqhjEUwYSxwPJMRS3iw,786
|
|
15
15
|
castor_extractor/commands/extract_metabase_db.py,sha256=tYIhTPPgj1mN-07LyWcL6e-YoGp7HCWda58-5Ukyg_I,1255
|
|
16
16
|
castor_extractor/commands/extract_mode.py,sha256=Q4iO-VAKMg4zFPejhAO-foZibL5Ht3jsnhWKwJ0oqUU,823
|
|
@@ -26,7 +26,7 @@ castor_extractor/commands/extract_sigma.py,sha256=sxewHcZ1Doq35V2qnpX_zCKKXkrb1_
|
|
|
26
26
|
castor_extractor/commands/extract_snowflake.py,sha256=GwlrRxwEBjHqGs_3bs5vM9fzmv61_iwvBr1KcIgFgWM,2161
|
|
27
27
|
castor_extractor/commands/extract_sqlserver.py,sha256=lwhbcNChaXHZgMgSOch3faVr7WJw-sDU6GHl3lzBt_0,1141
|
|
28
28
|
castor_extractor/commands/extract_strategy.py,sha256=Q-pUymatPrBFGXobhyUPzFph0-t774-XOpjdCFF1dYo,821
|
|
29
|
-
castor_extractor/commands/extract_tableau.py,sha256=
|
|
29
|
+
castor_extractor/commands/extract_tableau.py,sha256=LNtI29LbVk1vp4RNrn89GmdW6R_7QBYunRmkowDhbco,1982
|
|
30
30
|
castor_extractor/commands/extract_thoughtspot.py,sha256=caAYJlH-vK7u5IUB6OKXxcaWfLgc7d_XqnFDWK6YNS4,639
|
|
31
31
|
castor_extractor/commands/file_check.py,sha256=TJx76Ymd0QCECmq35zRJMkPE8DJtSInB28MuSXWk8Ao,2644
|
|
32
32
|
castor_extractor/commands/upload.py,sha256=rLXp7gQ8zb1kLbho4FT87q8eJd8Gvo_TkyIynAaQ-4s,1342
|
|
@@ -191,14 +191,15 @@ castor_extractor/visualization/looker_studio/__init__.py,sha256=GccG-GJXoNhjXFPk
|
|
|
191
191
|
castor_extractor/visualization/looker_studio/assets.py,sha256=rI73rbVrfwkkepqZr0zPouP2lPUfJxSi21RKtOTHtAA,308
|
|
192
192
|
castor_extractor/visualization/looker_studio/client/__init__.py,sha256=YkQaVDJa-7KSwdOLjtgKJMRiafbGNKC_46YVx0hYZ1Q,129
|
|
193
193
|
castor_extractor/visualization/looker_studio/client/admin_sdk_client.py,sha256=HIeyT9JTW1TPwVzD2Q-VfJ99jMP80Z-4CznKAnTnp2w,3493
|
|
194
|
-
castor_extractor/visualization/looker_studio/client/client.py,sha256=
|
|
194
|
+
castor_extractor/visualization/looker_studio/client/client.py,sha256=N4AtFMAPhLoe2i2rVhCbqEVIoxzcBKFh29V9LjRcdH0,3969
|
|
195
195
|
castor_extractor/visualization/looker_studio/client/credentials.py,sha256=F4ISI8Ua_HJsMuGhYql28o3hKYR4sL_uzkrUkRiekRo,1347
|
|
196
196
|
castor_extractor/visualization/looker_studio/client/endpoints.py,sha256=5eY-ffqNDdlDBOOpiF7LpjyHMrzeClJktidCr1pTDUs,669
|
|
197
197
|
castor_extractor/visualization/looker_studio/client/enums.py,sha256=fHgemTaQpnwee8cw1YQVDsVnH--vTyFwT4Px8aVYYHQ,167
|
|
198
198
|
castor_extractor/visualization/looker_studio/client/looker_studio_api_client.py,sha256=Phq378VEaFLD-nyP2_A1wge6HUP45jSthhlNjD7aqSg,4085
|
|
199
199
|
castor_extractor/visualization/looker_studio/client/pagination.py,sha256=9HQ3Rkdiz2VB6AvYtZ0F-WouiD0pMmdZyAmkv-3wh08,783
|
|
200
200
|
castor_extractor/visualization/looker_studio/client/queries/query.sql,sha256=Ub4rdrJ5WTPWKI-eVmXrNMv0Ktmti4b-93zZBr0xEB0,1426
|
|
201
|
-
castor_extractor/visualization/looker_studio/extract.py,sha256=
|
|
201
|
+
castor_extractor/visualization/looker_studio/extract.py,sha256=NU48xQ83UtRW3jXKJcvofzqgEM2lHGjtTzjbKOSB50A,4059
|
|
202
|
+
castor_extractor/visualization/looker_studio/extract_test.py,sha256=ZckAxUMuoEjJ9RWkfRvt9M8SxblkQvsq-Grb8GSs-y0,492
|
|
202
203
|
castor_extractor/visualization/metabase/__init__.py,sha256=3E36cmkMyEgBB6Ot5rWk-N75i0G-7k24QTlc-Iol4pM,193
|
|
203
204
|
castor_extractor/visualization/metabase/assets.py,sha256=nu3FwQBU_hdS2DBvgXAwQlEEi76QiNK2tMKEtMyctaY,2874
|
|
204
205
|
castor_extractor/visualization/metabase/client/__init__.py,sha256=KBvaPMofBRV3m_sZAnKNCrJGr-Z88EbpdzEzWPQ_uBk,99
|
|
@@ -264,18 +265,19 @@ castor_extractor/visualization/qlik/client/rest.py,sha256=x_Vx0xjRvj4D5FPm2CzBH_
|
|
|
264
265
|
castor_extractor/visualization/qlik/client/rest_test.py,sha256=yfiUht6BcpBYS2uGdaKCH-tYe0fQ-joM4MbitKwOf24,1799
|
|
265
266
|
castor_extractor/visualization/qlik/extract.py,sha256=CkJ2UELZmADUxdB84VGH5-qd1tz9Dh_ywoLULTkbrII,2186
|
|
266
267
|
castor_extractor/visualization/salesforce_reporting/__init__.py,sha256=MvArD0GKNIpCDvLIYcpKrjMjFLhMyDETK6i3k0Fb6Tk,124
|
|
267
|
-
castor_extractor/visualization/salesforce_reporting/assets.py,sha256=
|
|
268
|
+
castor_extractor/visualization/salesforce_reporting/assets.py,sha256=m9UnyaocyzSYZh-NkiyjAyk9D1OMBwyP9gqAJrSIwMg,313
|
|
268
269
|
castor_extractor/visualization/salesforce_reporting/client/__init__.py,sha256=DIA6f_vNJZqT89qVYxg98Le7QeDn2y0Qew03V3J9t9o,44
|
|
269
|
-
castor_extractor/visualization/salesforce_reporting/client/rest.py,sha256
|
|
270
|
+
castor_extractor/visualization/salesforce_reporting/client/rest.py,sha256=AqL1DTOpRy9KToJTlZohqUaRR5q-Xta79exi8odh2uE,4333
|
|
270
271
|
castor_extractor/visualization/salesforce_reporting/client/soql.py,sha256=ytZnX6zE-NoS_Kz12KghMcCM4ukPwhMj6U0rQZ_8Isk,1621
|
|
271
272
|
castor_extractor/visualization/salesforce_reporting/extract.py,sha256=ScStilebLGf4HDTFqhVTQAvv_OrKxc8waycfBKdsVAc,1359
|
|
272
273
|
castor_extractor/visualization/sigma/__init__.py,sha256=GINql4yJLtjfOJgjHaWNpE13cMtnKNytiFRomwav27Q,114
|
|
273
274
|
castor_extractor/visualization/sigma/assets.py,sha256=JZ1Cpxnml8P3mIJoTUM57hvylB18ErECQXaP5FF63O4,268
|
|
274
275
|
castor_extractor/visualization/sigma/client/__init__.py,sha256=YQv06FBBQHvBMFg_tN0nUcmUp2NCL2s-eFTXG8rXaBg,74
|
|
275
|
-
castor_extractor/visualization/sigma/client/client.py,sha256=
|
|
276
|
+
castor_extractor/visualization/sigma/client/client.py,sha256=ZE44k5klBVnc5lld3tpjuKGeSdFmlJ0wr5DOB4pEfco,9446
|
|
277
|
+
castor_extractor/visualization/sigma/client/client_test.py,sha256=ae0ZOvKutCm44jnrJ-0_A5Y6ZGyDkMf9Ml3eEP8dNkY,581
|
|
276
278
|
castor_extractor/visualization/sigma/client/credentials.py,sha256=XddAuQSmCKpxJ70TQgRnOj0vMPYVtiStk_lMMQ1AiNM,693
|
|
277
279
|
castor_extractor/visualization/sigma/client/endpoints.py,sha256=DBFphbgoH78_MZUGM_bKBAq28Nl7LWSZ6VRsbxrxtDg,1162
|
|
278
|
-
castor_extractor/visualization/sigma/client/pagination.py,sha256=
|
|
280
|
+
castor_extractor/visualization/sigma/client/pagination.py,sha256=2bFA7GiBUUasFtHJKA90516d283p7Pg50-4zw6Fwt8I,726
|
|
279
281
|
castor_extractor/visualization/sigma/extract.py,sha256=XIT1qsj6g6dgBWP8HPfj_medZexu48EaY9tUwi14gzM,2298
|
|
280
282
|
castor_extractor/visualization/strategy/__init__.py,sha256=HOMv4JxqF5ZmViWi-pDE-PSXJRLTdXal_jtpHG_rlR8,123
|
|
281
283
|
castor_extractor/visualization/strategy/assets.py,sha256=yFXF_dX01patC0HQ1eU7Jo_4DZ4m6IJEg0uCB71tMoI,480
|
|
@@ -287,8 +289,8 @@ castor_extractor/visualization/strategy/extract.py,sha256=2fBuvS2xiOGXRpxXnZsE_C
|
|
|
287
289
|
castor_extractor/visualization/tableau/__init__.py,sha256=eFI_1hjdkxyUiAYiy3szwyuwn3yJ5C_KbpBU0ySJDcQ,138
|
|
288
290
|
castor_extractor/visualization/tableau/assets.py,sha256=HbCRd8VCj1WBEeqg9jwnygnT7xOFJ6PQD7Lq7sV-XR0,635
|
|
289
291
|
castor_extractor/visualization/tableau/client/__init__.py,sha256=P8RKFKOC63WkH5hdEytJOwHS9vzQ8GXreLfXZetmMP8,78
|
|
290
|
-
castor_extractor/visualization/tableau/client/client.py,sha256=
|
|
291
|
-
castor_extractor/visualization/tableau/client/client_metadata_api.py,sha256=
|
|
292
|
+
castor_extractor/visualization/tableau/client/client.py,sha256=QV-GFS4nEq976JLji57pIfsw2ZZaGTvfCFqy6_HOWMg,8204
|
|
293
|
+
castor_extractor/visualization/tableau/client/client_metadata_api.py,sha256=eAq9rjrB_2ZCQy9NwREHBOTXZffWdkwtwhzswm1pEfk,7449
|
|
292
294
|
castor_extractor/visualization/tableau/client/client_metadata_api_test.py,sha256=rikyQKDLFYHLJhHJTF3LwWhKJ80svtTsYp5n7n9oTU8,2665
|
|
293
295
|
castor_extractor/visualization/tableau/client/client_rest_api.py,sha256=x4dNw4PPJdalTlGowwkANwqiS2ZhGxzpQytkHq3KbpY,3988
|
|
294
296
|
castor_extractor/visualization/tableau/client/client_tsc.py,sha256=VI_PJyd1ty3HSYXHHQjshmG2ziowIbrwJRonRPCHbks,1820
|
|
@@ -297,7 +299,7 @@ castor_extractor/visualization/tableau/client/errors.py,sha256=ecT8Tit5VtzrOBB9y
|
|
|
297
299
|
castor_extractor/visualization/tableau/client/gql_queries.py,sha256=XJAfhpMZ5S7-AhfpOaoHMHCAdil-l5e5xB-CH4NC38M,2177
|
|
298
300
|
castor_extractor/visualization/tableau/client/rest_fields.py,sha256=ZKYYuMxg9PXhczVXaD4rXNk7dYyWJ1_bVM8FLEXju7s,888
|
|
299
301
|
castor_extractor/visualization/tableau/constants.py,sha256=lHGB50FgVNO2nXeIhkvQKivD8ZFBIjDrflgD5cTXKJw,104
|
|
300
|
-
castor_extractor/visualization/tableau/extract.py,sha256=
|
|
302
|
+
castor_extractor/visualization/tableau/extract.py,sha256=9mSHFJ2DGlW-cDYiRZlJafAgj4_ObACxO0l9vBBfjUw,1683
|
|
301
303
|
castor_extractor/visualization/thoughtspot/__init__.py,sha256=NhTGUk5Kdt54oCjHYoAt0cLBmVLys5lFYiRANL6wCmI,150
|
|
302
304
|
castor_extractor/visualization/thoughtspot/assets.py,sha256=SAQWPKaD2NTSDg7-GSkcRSSEkKSws0MJfOVcHkdeTSg,276
|
|
303
305
|
castor_extractor/visualization/thoughtspot/client/__init__.py,sha256=svrE2rMxR-OXctjPeAHMEPePlfcra-9KDevTMcHunAA,86
|
|
@@ -345,7 +347,7 @@ castor_extractor/warehouse/databricks/format_test.py,sha256=ls0IcOElqp_qecAzNbK0
|
|
|
345
347
|
castor_extractor/warehouse/databricks/lineage.py,sha256=jwiRXrgqBAtzQt5EgErYrN8YRyviEEHmyrSbw8TSPq4,2105
|
|
346
348
|
castor_extractor/warehouse/databricks/lineage_test.py,sha256=PyBn1eAoxLm4Bz5M0F4zmaxFX2mXRTM_uug5OKbQPQs,2684
|
|
347
349
|
castor_extractor/warehouse/databricks/pagination.py,sha256=sM1G0sN1pf1TPpI0Y3Oew378UGEKVkMRc2Mlu9tDjLo,545
|
|
348
|
-
castor_extractor/warehouse/databricks/sql_client.py,sha256=
|
|
350
|
+
castor_extractor/warehouse/databricks/sql_client.py,sha256=BchHMNqHPtZsJWhj2XYq3QVVTj3XfKhzhhPTJng8vXo,3656
|
|
349
351
|
castor_extractor/warehouse/databricks/types.py,sha256=-TFX4jS6_c3wQLOpJTKpLeGS21YIPjKDjISnzeUPdCc,46
|
|
350
352
|
castor_extractor/warehouse/databricks/utils.py,sha256=5CKn6Me1Tus97H_qDEz_5tkhd4ARmwk2qiC3GndjyCc,1969
|
|
351
353
|
castor_extractor/warehouse/databricks/utils_test.py,sha256=_guTuzRWRTZdDY7ils0X1K8jhI9T877MEtw3x_YDg9I,2415
|
|
@@ -428,8 +430,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx
|
|
|
428
430
|
castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
|
|
429
431
|
castor_extractor/warehouse/sqlserver/query.py,sha256=g0hPT-RmeGi2DyenAi3o72cTlQsLToXIFYojqc8E5fQ,533
|
|
430
432
|
castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
|
|
431
|
-
castor_extractor-0.24.
|
|
432
|
-
castor_extractor-0.24.
|
|
433
|
-
castor_extractor-0.24.
|
|
434
|
-
castor_extractor-0.24.
|
|
435
|
-
castor_extractor-0.24.
|
|
433
|
+
castor_extractor-0.24.33.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
434
|
+
castor_extractor-0.24.33.dist-info/METADATA,sha256=vCEpwDM8sngoUEfrGtRPSjtCjTw6zxJGiJrnmj4eq_Y,26232
|
|
435
|
+
castor_extractor-0.24.33.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
436
|
+
castor_extractor-0.24.33.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
|
|
437
|
+
castor_extractor-0.24.33.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|