castor-extractor 0.21.9__py3-none-any.whl → 0.22.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +8 -0
- castor_extractor/commands/__init__.py +0 -3
- castor_extractor/commands/file_check.py +1 -2
- castor_extractor/file_checker/column.py +5 -5
- castor_extractor/file_checker/file.py +7 -7
- castor_extractor/file_checker/file_test.py +2 -2
- castor_extractor/file_checker/templates/generic_warehouse.py +4 -6
- castor_extractor/knowledge/confluence/client/client.py +2 -1
- castor_extractor/knowledge/confluence/extract.py +3 -2
- castor_extractor/knowledge/notion/client/client.py +3 -2
- castor_extractor/knowledge/notion/extract.py +3 -2
- castor_extractor/quality/soda/client/client.py +2 -1
- castor_extractor/quality/soda/client/pagination.py +1 -3
- castor_extractor/types.py +3 -3
- castor_extractor/uploader/env.py +2 -2
- castor_extractor/uploader/upload.py +4 -3
- castor_extractor/uploader/utils.py +1 -1
- castor_extractor/utils/__init__.py +1 -0
- castor_extractor/utils/client/abstract.py +2 -1
- castor_extractor/utils/client/api/auth.py +2 -2
- castor_extractor/utils/client/api/auth_test.py +2 -2
- castor_extractor/utils/client/api/client.py +3 -3
- castor_extractor/utils/client/api/pagination.py +3 -2
- castor_extractor/utils/client/api/safe_request.py +5 -5
- castor_extractor/utils/collection.py +7 -11
- castor_extractor/utils/dbt/client.py +3 -3
- castor_extractor/utils/dbt/client_test.py +2 -2
- castor_extractor/utils/deprecate.py +1 -2
- castor_extractor/utils/files.py +5 -5
- castor_extractor/utils/formatter.py +5 -4
- castor_extractor/utils/json_stream_write.py +2 -1
- castor_extractor/utils/object.py +2 -1
- castor_extractor/utils/pager/pager.py +2 -4
- castor_extractor/utils/pager/pager_on_id.py +2 -1
- castor_extractor/utils/pager/pager_on_id_test.py +5 -5
- castor_extractor/utils/pager/pager_test.py +3 -3
- castor_extractor/utils/retry.py +4 -3
- castor_extractor/utils/retry_test.py +2 -3
- castor_extractor/utils/safe.py +3 -3
- castor_extractor/utils/salesforce/client.py +2 -1
- castor_extractor/utils/salesforce/credentials.py +1 -3
- castor_extractor/utils/store.py +2 -1
- castor_extractor/utils/string.py +2 -2
- castor_extractor/utils/string_test.py +1 -3
- castor_extractor/utils/time.py +4 -0
- castor_extractor/utils/time_test.py +8 -1
- castor_extractor/utils/type.py +3 -2
- castor_extractor/utils/validation.py +4 -4
- castor_extractor/utils/write.py +2 -2
- castor_extractor/visualization/domo/client/client.py +8 -7
- castor_extractor/visualization/domo/client/credentials.py +2 -2
- castor_extractor/visualization/domo/client/endpoints.py +2 -2
- castor_extractor/visualization/domo/extract.py +3 -2
- castor_extractor/visualization/looker/api/client.py +17 -16
- castor_extractor/visualization/looker/api/utils.py +2 -2
- castor_extractor/visualization/looker/assets.py +1 -3
- castor_extractor/visualization/looker/extract.py +4 -3
- castor_extractor/visualization/looker/fields.py +3 -3
- castor_extractor/visualization/looker/multithreading.py +3 -3
- castor_extractor/visualization/looker_studio/__init__.py +6 -0
- castor_extractor/visualization/looker_studio/assets.py +6 -0
- castor_extractor/visualization/looker_studio/client/__init__.py +3 -0
- castor_extractor/visualization/looker_studio/client/admin_sdk_client.py +90 -0
- castor_extractor/visualization/looker_studio/client/client.py +37 -0
- castor_extractor/visualization/looker_studio/client/credentials.py +20 -0
- castor_extractor/visualization/looker_studio/client/endpoints.py +18 -0
- castor_extractor/visualization/looker_studio/client/enums.py +8 -0
- castor_extractor/visualization/looker_studio/client/looker_studio_api_client.py +102 -0
- castor_extractor/visualization/looker_studio/client/pagination.py +31 -0
- castor_extractor/visualization/looker_studio/client/scopes.py +6 -0
- castor_extractor/visualization/metabase/assets.py +1 -3
- castor_extractor/visualization/metabase/client/api/client.py +8 -7
- castor_extractor/visualization/metabase/extract.py +3 -2
- castor_extractor/visualization/metabase/types.py +1 -3
- castor_extractor/visualization/mode/client/client.py +6 -6
- castor_extractor/visualization/mode/extract.py +2 -2
- castor_extractor/visualization/powerbi/assets.py +1 -3
- castor_extractor/visualization/powerbi/client/client.py +12 -11
- castor_extractor/visualization/powerbi/client/credentials.py +3 -3
- castor_extractor/visualization/powerbi/client/endpoints.py +2 -2
- castor_extractor/visualization/powerbi/extract.py +3 -2
- castor_extractor/visualization/qlik/assets.py +1 -3
- castor_extractor/visualization/qlik/client/constants.py +1 -3
- castor_extractor/visualization/qlik/client/engine/error.py +1 -3
- castor_extractor/visualization/qlik/client/master.py +3 -3
- castor_extractor/visualization/qlik/client/rest.py +12 -12
- castor_extractor/visualization/qlik/extract.py +4 -3
- castor_extractor/visualization/salesforce_reporting/client/rest.py +3 -2
- castor_extractor/visualization/salesforce_reporting/client/soql.py +1 -3
- castor_extractor/visualization/salesforce_reporting/extract.py +3 -2
- castor_extractor/visualization/sigma/client/client.py +9 -8
- castor_extractor/visualization/sigma/client/credentials.py +1 -3
- castor_extractor/visualization/sigma/extract.py +3 -2
- castor_extractor/visualization/tableau/assets.py +1 -2
- castor_extractor/visualization/tableau/client/client.py +1 -2
- castor_extractor/visualization/tableau/client/client_utils.py +3 -2
- castor_extractor/visualization/tableau/client/credentials.py +3 -3
- castor_extractor/visualization/tableau/client/safe_mode.py +1 -2
- castor_extractor/visualization/tableau/extract.py +2 -2
- castor_extractor/visualization/tableau/gql_fields.py +3 -3
- castor_extractor/visualization/tableau/tsc_fields.py +1 -2
- castor_extractor/visualization/tableau/types.py +3 -3
- castor_extractor/visualization/tableau_revamp/client/client_metadata_api.py +3 -2
- castor_extractor/visualization/tableau_revamp/client/client_rest_api.py +3 -3
- castor_extractor/visualization/tableau_revamp/client/client_tsc.py +3 -2
- castor_extractor/visualization/tableau_revamp/client/gql_queries.py +1 -3
- castor_extractor/visualization/tableau_revamp/client/rest_fields.py +1 -3
- castor_extractor/visualization/tableau_revamp/extract.py +2 -2
- castor_extractor/visualization/thoughtspot/client/client.py +3 -2
- castor_extractor/visualization/thoughtspot/client/utils.py +1 -1
- castor_extractor/visualization/thoughtspot/extract.py +3 -2
- castor_extractor/warehouse/abstract/asset.py +4 -5
- castor_extractor/warehouse/abstract/extract.py +4 -3
- castor_extractor/warehouse/abstract/query.py +4 -4
- castor_extractor/warehouse/bigquery/client.py +8 -8
- castor_extractor/warehouse/bigquery/extract.py +1 -1
- castor_extractor/warehouse/bigquery/query.py +2 -2
- castor_extractor/warehouse/bigquery/types.py +2 -4
- castor_extractor/warehouse/databricks/api_client.py +15 -14
- castor_extractor/warehouse/databricks/client.py +16 -16
- castor_extractor/warehouse/databricks/extract.py +4 -4
- castor_extractor/warehouse/databricks/format.py +12 -12
- castor_extractor/warehouse/databricks/lineage.py +11 -11
- castor_extractor/warehouse/databricks/pagination.py +2 -2
- castor_extractor/warehouse/databricks/types.py +4 -4
- castor_extractor/warehouse/databricks/utils.py +5 -4
- castor_extractor/warehouse/mysql/query.py +2 -2
- castor_extractor/warehouse/postgres/query.py +2 -2
- castor_extractor/warehouse/redshift/client.py +1 -1
- castor_extractor/warehouse/redshift/query.py +2 -2
- castor_extractor/warehouse/salesforce/client.py +8 -8
- castor_extractor/warehouse/salesforce/extract.py +3 -4
- castor_extractor/warehouse/salesforce/format.py +19 -11
- castor_extractor/warehouse/salesforce/format_test.py +24 -10
- castor_extractor/warehouse/snowflake/query.py +5 -5
- castor_extractor/warehouse/sqlserver/client.py +1 -1
- castor_extractor/warehouse/sqlserver/query.py +2 -2
- {castor_extractor-0.21.9.dist-info → castor_extractor-0.22.1.dist-info}/METADATA +13 -6
- {castor_extractor-0.21.9.dist-info → castor_extractor-0.22.1.dist-info}/RECORD +142 -131
- {castor_extractor-0.21.9.dist-info → castor_extractor-0.22.1.dist-info}/LICENCE +0 -0
- {castor_extractor-0.21.9.dist-info → castor_extractor-0.22.1.dist-info}/WHEEL +0 -0
- {castor_extractor-0.21.9.dist-info → castor_extractor-0.22.1.dist-info}/entry_points.txt +0 -0
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Dict, Type
|
|
2
|
-
|
|
3
1
|
from .constants import (
|
|
4
2
|
ACCESS_DENIED_ERROR_CODE,
|
|
5
3
|
APP_SIZE_EXCEEDED_ERROR_CODE,
|
|
@@ -48,7 +46,7 @@ class QlikResponseKeyError(Exception):
|
|
|
48
46
|
"""
|
|
49
47
|
|
|
50
48
|
|
|
51
|
-
ERROR_CODE_MAPPING:
|
|
49
|
+
ERROR_CODE_MAPPING: dict[int, type[JsonRpcError]] = {
|
|
52
50
|
ACCESS_DENIED_ERROR_CODE: AccessDeniedError,
|
|
53
51
|
APP_SIZE_EXCEEDED_ERROR_CODE: AppSizeExceededError,
|
|
54
52
|
PERSISTENCE_READ_FAILED_ERROR_CODE: PersistenceReadFailedError,
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Callable,
|
|
1
|
+
from typing import Callable, Optional
|
|
2
2
|
|
|
3
3
|
from tqdm import tqdm # type: ignore
|
|
4
4
|
|
|
@@ -7,7 +7,7 @@ from .constants import APP_EXTERNAL_ID_KEY, SCOPED_ASSETS
|
|
|
7
7
|
from .engine import EngineApiClient, QlikCredentials
|
|
8
8
|
from .rest import RestApiClient
|
|
9
9
|
|
|
10
|
-
ListedData =
|
|
10
|
+
ListedData = list[dict]
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class MissingAppsScopeError(Exception):
|
|
@@ -54,7 +54,7 @@ class QlikMasterClient:
|
|
|
54
54
|
def __init__(
|
|
55
55
|
self,
|
|
56
56
|
credentials: QlikCredentials,
|
|
57
|
-
except_http_error_statuses: Optional[
|
|
57
|
+
except_http_error_statuses: Optional[list[int]] = None,
|
|
58
58
|
display_progress: bool = True,
|
|
59
59
|
):
|
|
60
60
|
self._server_url = credentials.base_url
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Optional, Union
|
|
3
3
|
from urllib.parse import urljoin
|
|
4
4
|
|
|
5
5
|
import requests
|
|
@@ -20,7 +20,7 @@ from .engine import QlikCredentials
|
|
|
20
20
|
|
|
21
21
|
logger = logging.getLogger(__name__)
|
|
22
22
|
|
|
23
|
-
Response = Union[dict,
|
|
23
|
+
Response = Union[dict, list[dict]]
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
def _session() -> requests.Session:
|
|
@@ -62,7 +62,7 @@ class RestApiClient:
|
|
|
62
62
|
def __init__(
|
|
63
63
|
self,
|
|
64
64
|
credentials: QlikCredentials,
|
|
65
|
-
except_http_error_statuses: Optional[
|
|
65
|
+
except_http_error_statuses: Optional[list[int]] = None,
|
|
66
66
|
):
|
|
67
67
|
self._server_url = credentials.base_url
|
|
68
68
|
self._api_key = credentials.api_key
|
|
@@ -111,10 +111,10 @@ class RestApiClient:
|
|
|
111
111
|
except HTTPError as error:
|
|
112
112
|
return self._handle_http_error(error)
|
|
113
113
|
|
|
114
|
-
def _pager(self, first_page_url: str) ->
|
|
114
|
+
def _pager(self, first_page_url: str) -> list[dict]:
|
|
115
115
|
current_page_url = first_page_url
|
|
116
116
|
|
|
117
|
-
data:
|
|
117
|
+
data: list[dict] = []
|
|
118
118
|
while current_page_url:
|
|
119
119
|
response = self._call(current_page_url)
|
|
120
120
|
if not response:
|
|
@@ -130,7 +130,7 @@ class RestApiClient:
|
|
|
130
130
|
current_page_url = next_page_url
|
|
131
131
|
return data
|
|
132
132
|
|
|
133
|
-
def get(self, asset: QlikAsset) ->
|
|
133
|
+
def get(self, asset: QlikAsset) -> list[dict]:
|
|
134
134
|
"""
|
|
135
135
|
Calls the route corresponding to the asset and returns the list of
|
|
136
136
|
corresponding data
|
|
@@ -144,7 +144,7 @@ class RestApiClient:
|
|
|
144
144
|
|
|
145
145
|
return [_filter_fields(row) for row in data]
|
|
146
146
|
|
|
147
|
-
def get_with_scope(self, asset: QlikAsset, app_id: str) ->
|
|
147
|
+
def get_with_scope(self, asset: QlikAsset, app_id: str) -> list[dict]:
|
|
148
148
|
"""
|
|
149
149
|
Calls the route corresponding to the asset scoped on an app_id and
|
|
150
150
|
returns the corresponding data
|
|
@@ -157,7 +157,7 @@ class RestApiClient:
|
|
|
157
157
|
assert isinstance(response, list), RESPONSE_LIST_EXPECTED_MSG
|
|
158
158
|
return response
|
|
159
159
|
|
|
160
|
-
def data_connections(self) ->
|
|
160
|
+
def data_connections(self) -> list[dict]:
|
|
161
161
|
"""
|
|
162
162
|
Returns the list of data Connections
|
|
163
163
|
|
|
@@ -165,7 +165,7 @@ class RestApiClient:
|
|
|
165
165
|
"""
|
|
166
166
|
return self.get(QlikAsset.CONNECTIONS)
|
|
167
167
|
|
|
168
|
-
def spaces(self) ->
|
|
168
|
+
def spaces(self) -> list[dict]:
|
|
169
169
|
"""
|
|
170
170
|
Returns the list of Spaces
|
|
171
171
|
|
|
@@ -173,7 +173,7 @@ class RestApiClient:
|
|
|
173
173
|
"""
|
|
174
174
|
return self.get(QlikAsset.SPACES)
|
|
175
175
|
|
|
176
|
-
def users(self) ->
|
|
176
|
+
def users(self) -> list[dict]:
|
|
177
177
|
"""
|
|
178
178
|
Returns the list of Users
|
|
179
179
|
|
|
@@ -181,7 +181,7 @@ class RestApiClient:
|
|
|
181
181
|
"""
|
|
182
182
|
return self.get(QlikAsset.USERS)
|
|
183
183
|
|
|
184
|
-
def apps(self) ->
|
|
184
|
+
def apps(self) -> list[dict]:
|
|
185
185
|
"""
|
|
186
186
|
Returns the list of Apps
|
|
187
187
|
|
|
@@ -189,7 +189,7 @@ class RestApiClient:
|
|
|
189
189
|
"""
|
|
190
190
|
return self.get(QlikAsset.APPS)
|
|
191
191
|
|
|
192
|
-
def data_lineage(self, app_id: str) ->
|
|
192
|
+
def data_lineage(self, app_id: str) -> list[dict]:
|
|
193
193
|
"""
|
|
194
194
|
Returns the data lineage for a given source
|
|
195
195
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Iterable
|
|
3
|
+
from typing import Optional, Union
|
|
3
4
|
|
|
4
5
|
from ...utils import (
|
|
5
6
|
OUTPUT_DIR,
|
|
@@ -18,7 +19,7 @@ logger = logging.getLogger(__name__)
|
|
|
18
19
|
|
|
19
20
|
def iterate_all_data(
|
|
20
21
|
client: QlikClient,
|
|
21
|
-
) -> Iterable[
|
|
22
|
+
) -> Iterable[tuple[QlikAsset, Union[list, dict]]]:
|
|
22
23
|
"""Iterate over the extracted data from Qlik"""
|
|
23
24
|
|
|
24
25
|
logger.info("Extracting CONNECTIONS from REST API")
|
|
@@ -47,7 +48,7 @@ def iterate_all_data(
|
|
|
47
48
|
|
|
48
49
|
|
|
49
50
|
def extract_all(
|
|
50
|
-
except_http_error_statuses: Optional[
|
|
51
|
+
except_http_error_statuses: Optional[list[int]] = None, **kwargs
|
|
51
52
|
) -> None:
|
|
52
53
|
"""
|
|
53
54
|
Extract data from Qlik REST API
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Iterator
|
|
3
|
+
from typing import Optional
|
|
3
4
|
|
|
4
5
|
from ....utils import build_url
|
|
5
6
|
from ....utils.salesforce import SalesforceBaseClient
|
|
@@ -49,7 +50,7 @@ class SalesforceReportingClient(SalesforceBaseClient):
|
|
|
49
50
|
url = self._get_asset_url(asset_type, asset)
|
|
50
51
|
yield {**asset, "Url": url}
|
|
51
52
|
|
|
52
|
-
def fetch(self, asset: SalesforceReportingAsset) ->
|
|
53
|
+
def fetch(self, asset: SalesforceReportingAsset) -> list[dict]:
|
|
53
54
|
"""
|
|
54
55
|
Fetch Salesforce Reporting assets
|
|
55
56
|
"""
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Iterable
|
|
3
|
+
from typing import Union
|
|
3
4
|
|
|
4
5
|
from ...utils import (
|
|
5
6
|
OUTPUT_DIR,
|
|
@@ -19,7 +20,7 @@ logger = logging.getLogger(__name__)
|
|
|
19
20
|
|
|
20
21
|
def iterate_all_data(
|
|
21
22
|
client: SalesforceReportingClient,
|
|
22
|
-
) -> Iterable[
|
|
23
|
+
) -> Iterable[tuple[str, Union[list, dict]]]:
|
|
23
24
|
"""Iterate over the extracted data from Salesforce"""
|
|
24
25
|
|
|
25
26
|
for asset in SalesforceReportingAsset:
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
from collections.abc import Iterator
|
|
1
2
|
from functools import partial
|
|
2
3
|
from http import HTTPStatus
|
|
3
|
-
from typing import Callable,
|
|
4
|
+
from typing import Callable, Optional
|
|
4
5
|
|
|
5
6
|
import requests
|
|
6
7
|
|
|
@@ -19,7 +20,7 @@ from .pagination import SIGMA_API_LIMIT, SigmaPagination
|
|
|
19
20
|
|
|
20
21
|
_CONTENT_TYPE = "application/x-www-form-urlencoded"
|
|
21
22
|
|
|
22
|
-
_DATA_ELEMENTS:
|
|
23
|
+
_DATA_ELEMENTS: tuple[str, ...] = (
|
|
23
24
|
"input-table",
|
|
24
25
|
"pivot-table",
|
|
25
26
|
"table",
|
|
@@ -49,7 +50,7 @@ SIGMA_SAFE_MODE = RequestSafeMode(
|
|
|
49
50
|
|
|
50
51
|
|
|
51
52
|
class SigmaBearerAuth(BearerAuth):
|
|
52
|
-
def __init__(self, host: str, token_payload:
|
|
53
|
+
def __init__(self, host: str, token_payload: dict[str, str]):
|
|
53
54
|
auth_endpoint = SigmaEndpointFactory.authentication()
|
|
54
55
|
self.authentication_url = build_url(host, auth_endpoint)
|
|
55
56
|
self.token_payload = token_payload
|
|
@@ -118,7 +119,7 @@ class SigmaClient(APIClient):
|
|
|
118
119
|
"page_id": page_id,
|
|
119
120
|
}
|
|
120
121
|
|
|
121
|
-
def _get_all_elements(self, workbooks:
|
|
122
|
+
def _get_all_elements(self, workbooks: list[dict]) -> Iterator[dict]:
|
|
122
123
|
for workbook in workbooks:
|
|
123
124
|
workbook_id = workbook["workbookId"]
|
|
124
125
|
|
|
@@ -132,7 +133,7 @@ class SigmaClient(APIClient):
|
|
|
132
133
|
page=page, workbook_id=workbook_id
|
|
133
134
|
)
|
|
134
135
|
|
|
135
|
-
def _get_all_lineages(self, elements:
|
|
136
|
+
def _get_all_lineages(self, elements: list[dict]) -> Iterator[dict]:
|
|
136
137
|
for element in elements:
|
|
137
138
|
workbook_id = element["workbook_id"]
|
|
138
139
|
element_id = element["elementId"]
|
|
@@ -145,7 +146,7 @@ class SigmaClient(APIClient):
|
|
|
145
146
|
"element_id": element_id,
|
|
146
147
|
}
|
|
147
148
|
|
|
148
|
-
def _get_all_queries(self, workbooks:
|
|
149
|
+
def _get_all_queries(self, workbooks: list[dict]) -> Iterator[dict]:
|
|
149
150
|
for workbook in workbooks:
|
|
150
151
|
workbook_id = workbook["workbookId"]
|
|
151
152
|
request = self._get_paginated(
|
|
@@ -159,8 +160,8 @@ class SigmaClient(APIClient):
|
|
|
159
160
|
def fetch(
|
|
160
161
|
self,
|
|
161
162
|
asset: SigmaAsset,
|
|
162
|
-
workbooks: Optional[
|
|
163
|
-
elements: Optional[
|
|
163
|
+
workbooks: Optional[list[dict]] = None,
|
|
164
|
+
elements: Optional[list[dict]] = None,
|
|
164
165
|
) -> Iterator[dict]:
|
|
165
166
|
"""Returns the needed metadata for the queried asset"""
|
|
166
167
|
if asset == SigmaAsset.DATASETS:
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Dict
|
|
2
|
-
|
|
3
1
|
from pydantic import Field
|
|
4
2
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
5
3
|
|
|
@@ -21,7 +19,7 @@ class SigmaCredentials(BaseSettings):
|
|
|
21
19
|
grant_type: str = "client_credentials"
|
|
22
20
|
|
|
23
21
|
@property
|
|
24
|
-
def token_payload(self) ->
|
|
22
|
+
def token_payload(self) -> dict[str, str]:
|
|
25
23
|
return {
|
|
26
24
|
"grant_type": self.grant_type,
|
|
27
25
|
"client_id": self.client_id,
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Iterable, Iterator
|
|
3
|
+
from typing import Union
|
|
3
4
|
|
|
4
5
|
from ...utils import (
|
|
5
6
|
OUTPUT_DIR,
|
|
@@ -18,7 +19,7 @@ logger = logging.getLogger(__name__)
|
|
|
18
19
|
|
|
19
20
|
def iterate_all_data(
|
|
20
21
|
client: SigmaClient,
|
|
21
|
-
) -> Iterable[
|
|
22
|
+
) -> Iterable[tuple[SigmaAsset, Union[list, Iterator, dict]]]:
|
|
22
23
|
"""Iterate over the extracted data from Sigma"""
|
|
23
24
|
|
|
24
25
|
logger.info("Extracting DATASETS from API")
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
|
-
from typing import Set
|
|
3
2
|
|
|
4
3
|
from ...types import ExternalAsset, classproperty
|
|
5
4
|
|
|
@@ -24,7 +23,7 @@ class TableauAsset(ExternalAsset):
|
|
|
24
23
|
WORKBOOK_TO_DATASOURCE = "workbooks_to_datasource"
|
|
25
24
|
|
|
26
25
|
@classproperty
|
|
27
|
-
def optional(cls) ->
|
|
26
|
+
def optional(cls) -> set["TableauAsset"]:
|
|
28
27
|
return {
|
|
29
28
|
TableauAsset.DASHBOARD,
|
|
30
29
|
TableauAsset.DASHBOARD_SHEET,
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import List
|
|
3
2
|
|
|
4
3
|
import tableauserverclient as TSC # type: ignore
|
|
5
4
|
|
|
@@ -43,7 +42,7 @@ class ApiClient:
|
|
|
43
42
|
self._page_size = PAGE_SIZE
|
|
44
43
|
self._server.version = TABLEAU_SERVER_VERSION
|
|
45
44
|
self._safe_mode = bool(kwargs.get("safe_mode"))
|
|
46
|
-
self.errors:
|
|
45
|
+
self.errors: list[str] = []
|
|
47
46
|
|
|
48
47
|
@staticmethod
|
|
49
48
|
def name() -> str:
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
from
|
|
1
|
+
from collections.abc import Iterator
|
|
2
|
+
from typing import Optional
|
|
2
3
|
|
|
3
4
|
from ....utils import SerializedAsset
|
|
4
5
|
from ..assets import TableauAsset
|
|
@@ -69,6 +70,6 @@ def query_scroll(
|
|
|
69
70
|
break
|
|
70
71
|
|
|
71
72
|
|
|
72
|
-
def extract_asset(asset:
|
|
73
|
+
def extract_asset(asset: dict, asset_type: TableauAsset) -> dict:
|
|
73
74
|
"""Agnostic function extracting dedicated attributes with define asset"""
|
|
74
75
|
return {key: getattr(asset, key) for key in TSC_FIELDS[asset_type]}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Optional
|
|
3
3
|
|
|
4
4
|
from ....utils import from_env
|
|
5
5
|
|
|
@@ -20,7 +20,7 @@ class CredentialsKey(Enum):
|
|
|
20
20
|
TABLEAU_SERVER_URL = "server_url"
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
CREDENTIALS_ENV:
|
|
23
|
+
CREDENTIALS_ENV: dict[CredentialsKey, str] = {
|
|
24
24
|
CredentialsKey.TABLEAU_USER: "CASTOR_TABLEAU_USER",
|
|
25
25
|
CredentialsKey.TABLEAU_PASSWORD: "CASTOR_TABLEAU_PASSWORD",
|
|
26
26
|
CredentialsKey.TABLEAU_TOKEN_NAME: "CASTOR_TABLEAU_TOKEN_NAME",
|
|
@@ -89,7 +89,7 @@ class CredentialsApi:
|
|
|
89
89
|
CredentialsKey.TABLEAU_TOKEN: token,
|
|
90
90
|
}
|
|
91
91
|
|
|
92
|
-
def to_dict(self, hide: bool = False) ->
|
|
92
|
+
def to_dict(self, hide: bool = False) -> dict[str, str]:
|
|
93
93
|
safe = (
|
|
94
94
|
CredentialsKey.TABLEAU_USER,
|
|
95
95
|
CredentialsKey.TABLEAU_SITE_ID,
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Dict, List
|
|
3
2
|
|
|
4
3
|
import tableauserverclient as TSC # type: ignore
|
|
5
4
|
|
|
@@ -48,7 +47,7 @@ def safe_mode_fetch_usage(client) -> SerializedAsset:
|
|
|
48
47
|
Returns computed usages when page number is not found
|
|
49
48
|
Log errors if ServerResponseError is return
|
|
50
49
|
"""
|
|
51
|
-
list_usages:
|
|
50
|
+
list_usages: list[dict] = []
|
|
52
51
|
page_number: int = 0
|
|
53
52
|
|
|
54
53
|
while True:
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Iterable
|
|
3
3
|
|
|
4
4
|
from ...utils import (
|
|
5
5
|
OUTPUT_DIR,
|
|
@@ -19,7 +19,7 @@ logger = logging.getLogger(__name__)
|
|
|
19
19
|
|
|
20
20
|
def iterate_all_data(
|
|
21
21
|
client: Client,
|
|
22
|
-
) -> Iterable[
|
|
22
|
+
) -> Iterable[tuple[TableauAsset, list]]:
|
|
23
23
|
"""Iterate over the extracted Data from Tableau"""
|
|
24
24
|
|
|
25
25
|
logger.info("Extracting USER from Tableau API")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Fields which will be use for Tableau GraphQL API
|
|
2
2
|
from enum import Enum
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Union
|
|
4
4
|
|
|
5
5
|
from .assets import TableauAsset, TableauGraphqlAsset
|
|
6
6
|
|
|
@@ -189,9 +189,9 @@ class GQLQueryFields(Enum):
|
|
|
189
189
|
"""
|
|
190
190
|
|
|
191
191
|
|
|
192
|
-
QueryInfo =
|
|
192
|
+
QueryInfo = list[dict[str, Union[GQLQueryFields, TableauGraphqlAsset]]]
|
|
193
193
|
|
|
194
|
-
QUERY_FIELDS:
|
|
194
|
+
QUERY_FIELDS: dict[TableauAsset, QueryInfo] = {
|
|
195
195
|
TableauAsset.CUSTOM_SQL_TABLE: [
|
|
196
196
|
{
|
|
197
197
|
FIELDS: GQLQueryFields.CUSTOM_SQL_TABLE,
|
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
# TSC for TableauServerClient: basic REST API to extracting core objects
|
|
2
|
-
from typing import Dict, Set
|
|
3
2
|
|
|
4
3
|
from .assets import TableauAsset
|
|
5
4
|
|
|
6
5
|
# TSC fields extracted per assets
|
|
7
|
-
TSC_FIELDS:
|
|
6
|
+
TSC_FIELDS: dict[TableauAsset, set[str]] = {
|
|
8
7
|
TableauAsset.PROJECT: {
|
|
9
8
|
"id",
|
|
10
9
|
"name",
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Union
|
|
2
2
|
|
|
3
3
|
from tableauserverclient import ServerResponseError # type: ignore
|
|
4
4
|
from typing_extensions import Literal
|
|
@@ -6,6 +6,6 @@ from typing_extensions import Literal
|
|
|
6
6
|
from .errors import TableauErrorCode
|
|
7
7
|
|
|
8
8
|
PageReturn = Union[
|
|
9
|
-
|
|
10
|
-
|
|
9
|
+
tuple[list[dict], Literal[None]],
|
|
10
|
+
tuple[Literal[None], Union[TableauErrorCode, ServerResponseError]],
|
|
11
11
|
]
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
from
|
|
1
|
+
from collections.abc import Iterator
|
|
2
|
+
from typing import Optional
|
|
2
3
|
|
|
3
4
|
import tableauserverclient as TSC # type: ignore
|
|
4
5
|
|
|
@@ -10,7 +11,7 @@ from .gql_queries import FIELDS_QUERIES, GQL_QUERIES, QUERY_TEMPLATE
|
|
|
10
11
|
|
|
11
12
|
# increase the value when extraction is too slow
|
|
12
13
|
# decrease the value when timeouts arise
|
|
13
|
-
_CUSTOM_PAGE_SIZE:
|
|
14
|
+
_CUSTOM_PAGE_SIZE: dict[TableauRevampAsset, int] = {
|
|
14
15
|
# for some clients, extraction of columns tend to hit the node limit
|
|
15
16
|
# https://community.tableau.com/s/question/0D54T00000YuK60SAF/metadata-query-nodelimitexceeded-error
|
|
16
17
|
# the workaround is to reduce pagination
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Optional
|
|
3
3
|
|
|
4
4
|
import requests
|
|
5
5
|
import tableauserverclient as TSC # type: ignore
|
|
@@ -40,7 +40,7 @@ class TableauClientRestApi:
|
|
|
40
40
|
return self._server.http_options["timeout"]
|
|
41
41
|
|
|
42
42
|
@property
|
|
43
|
-
def headers(self) ->
|
|
43
|
+
def headers(self) -> dict[str, str]:
|
|
44
44
|
return {"x-tableau-auth": self._server.auth_token}
|
|
45
45
|
|
|
46
46
|
def _get_site_name(self) -> str:
|
|
@@ -52,7 +52,7 @@ class TableauClientRestApi:
|
|
|
52
52
|
self,
|
|
53
53
|
url: str,
|
|
54
54
|
page_token: Optional[str] = None,
|
|
55
|
-
) ->
|
|
55
|
+
) -> dict:
|
|
56
56
|
if page_token:
|
|
57
57
|
url += f"?page_token={page_token}"
|
|
58
58
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
from
|
|
1
|
+
from collections.abc import Iterable, Iterator
|
|
2
|
+
from typing import Any
|
|
2
3
|
|
|
3
4
|
import tableauserverclient as TSC # type: ignore
|
|
4
5
|
|
|
@@ -30,7 +31,7 @@ class TableauClientTSC:
|
|
|
30
31
|
self,
|
|
31
32
|
data: Iterable,
|
|
32
33
|
asset: TableauRevampAsset,
|
|
33
|
-
) -> Iterator[
|
|
34
|
+
) -> Iterator[dict]:
|
|
34
35
|
keys = REST_FIELDS[asset]
|
|
35
36
|
|
|
36
37
|
for row in data:
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Dict, Tuple
|
|
2
|
-
|
|
3
1
|
from ..assets import TableauRevampAsset
|
|
4
2
|
|
|
5
3
|
QUERY_TEMPLATE = """
|
|
@@ -130,7 +128,7 @@ workbook { id }
|
|
|
130
128
|
"""
|
|
131
129
|
|
|
132
130
|
|
|
133
|
-
GQL_QUERIES:
|
|
131
|
+
GQL_QUERIES: dict[TableauRevampAsset, tuple[str, str]] = {
|
|
134
132
|
TableauRevampAsset.COLUMN: ("columns", _COLUMNS_QUERY),
|
|
135
133
|
TableauRevampAsset.DASHBOARD: ("dashboards", _DASHBOARDS_QUERY),
|
|
136
134
|
TableauRevampAsset.DATASOURCE: ("datasources", _DATASOURCES_QUERY),
|
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
from typing import Dict, Set
|
|
2
|
-
|
|
3
1
|
from ..assets import TableauRevampAsset
|
|
4
2
|
|
|
5
3
|
# list of fields to pick in REST API or TSC responses
|
|
6
|
-
REST_FIELDS:
|
|
4
|
+
REST_FIELDS: dict[TableauRevampAsset, set[str]] = {
|
|
7
5
|
TableauRevampAsset.DATASOURCE: {
|
|
8
6
|
"id",
|
|
9
7
|
"project_id",
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Iterable
|
|
3
3
|
|
|
4
4
|
from ...utils import (
|
|
5
5
|
OUTPUT_DIR,
|
|
@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
|
|
|
18
18
|
|
|
19
19
|
def iterate_all_data(
|
|
20
20
|
client: TableauRevampClient,
|
|
21
|
-
) -> Iterable[
|
|
21
|
+
) -> Iterable[tuple[TableauRevampAsset, list]]:
|
|
22
22
|
"""Iterate over the extracted Data from Tableau"""
|
|
23
23
|
|
|
24
24
|
for asset in TableauRevampAsset:
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
from
|
|
1
|
+
from collections.abc import Iterator
|
|
2
|
+
from typing import Optional
|
|
2
3
|
|
|
3
4
|
import requests
|
|
4
5
|
|
|
@@ -35,7 +36,7 @@ THOUGHTSPOT_SAFE_MODE = RequestSafeMode()
|
|
|
35
36
|
|
|
36
37
|
|
|
37
38
|
class ThoughtspotBearerAuth(BearerAuth):
|
|
38
|
-
def __init__(self, host: str, token_payload:
|
|
39
|
+
def __init__(self, host: str, token_payload: dict[str, str]):
|
|
39
40
|
auth_endpoint = ThoughtspotEndpointFactory.authentication()
|
|
40
41
|
self.authentication_url = build_url(host, auth_endpoint)
|
|
41
42
|
self.token_payload = token_payload
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Iterable, Iterator
|
|
3
|
+
from typing import Union
|
|
3
4
|
|
|
4
5
|
from ...utils import (
|
|
5
6
|
OUTPUT_DIR,
|
|
@@ -21,7 +22,7 @@ logger = logging.getLogger(__name__)
|
|
|
21
22
|
|
|
22
23
|
def iterate_all_data(
|
|
23
24
|
client: ThoughtspotClient,
|
|
24
|
-
) -> Iterable[
|
|
25
|
+
) -> Iterable[tuple[ThoughtspotAsset, Union[list, Iterator, dict]]]:
|
|
25
26
|
"""Iterate over the extracted data from Thoughtspot"""
|
|
26
27
|
|
|
27
28
|
for asset in ThoughtspotAsset:
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
|
-
from typing import Dict, List, Set, Tuple
|
|
3
2
|
|
|
4
3
|
from ...types import ExternalAsset, classproperty
|
|
5
4
|
|
|
@@ -26,7 +25,7 @@ class WarehouseAsset(ExternalAsset):
|
|
|
26
25
|
VIEW_DDL = "view_ddl"
|
|
27
26
|
|
|
28
27
|
@classproperty
|
|
29
|
-
def optional(cls) ->
|
|
28
|
+
def optional(cls) -> set["WarehouseAsset"]:
|
|
30
29
|
return {
|
|
31
30
|
WarehouseAsset.ADDITIONAL_COLUMN_LINEAGE,
|
|
32
31
|
WarehouseAsset.ADDITIONAL_TABLE_LINEAGE,
|
|
@@ -50,7 +49,7 @@ class WarehouseAssetGroup(Enum):
|
|
|
50
49
|
|
|
51
50
|
|
|
52
51
|
# tuple of supported assets for each group (depends on the technology)
|
|
53
|
-
SupportedAssets =
|
|
52
|
+
SupportedAssets = dict[WarehouseAssetGroup, tuple[WarehouseAsset, ...]]
|
|
54
53
|
|
|
55
54
|
# shared by all technologies
|
|
56
55
|
CATALOG_ASSETS = (
|
|
@@ -80,13 +79,13 @@ NON_EXTRACTABLE_ASSETS = {WarehouseAssetGroup.EXTERNAL_LINEAGE}
|
|
|
80
79
|
|
|
81
80
|
def extractable_asset_groups(
|
|
82
81
|
supported_assets: SupportedAssets,
|
|
83
|
-
) ->
|
|
82
|
+
) -> list[tuple[WarehouseAsset, ...]]:
|
|
84
83
|
"""
|
|
85
84
|
helper function to differentiate
|
|
86
85
|
extractable assets vs supported (ingest-able) assets
|
|
87
86
|
"""
|
|
88
87
|
groups = set(supported_assets).difference(NON_EXTRACTABLE_ASSETS)
|
|
89
|
-
extractable:
|
|
88
|
+
extractable: set[tuple[WarehouseAsset, ...]] = {
|
|
90
89
|
supported_assets[group] for group in groups
|
|
91
90
|
}
|
|
92
91
|
return list(extractable)
|