castor-extractor 0.16.4__py3-none-any.whl → 0.16.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +8 -0
- castor_extractor/utils/salesforce/client.py +1 -1
- castor_extractor/visualization/tableau_revamp/__init__.py +3 -0
- castor_extractor/visualization/tableau_revamp/assets.py +18 -0
- castor_extractor/visualization/tableau_revamp/client/__init__.py +2 -0
- castor_extractor/visualization/tableau_revamp/client/client.py +234 -0
- castor_extractor/visualization/tableau_revamp/client/credentials.py +104 -0
- castor_extractor/visualization/tableau_revamp/client/errors.py +3 -0
- castor_extractor/visualization/tableau_revamp/client/gql_queries.py +131 -0
- castor_extractor/visualization/tableau_revamp/client/tsc_fields.py +30 -0
- castor_extractor/visualization/tableau_revamp/constants.py +5 -0
- castor_extractor/visualization/tableau_revamp/extract.py +53 -0
- castor_extractor/warehouse/salesforce/soql.py +0 -1
- {castor_extractor-0.16.4.dist-info → castor_extractor-0.16.6.dist-info}/METADATA +1 -1
- {castor_extractor-0.16.4.dist-info → castor_extractor-0.16.6.dist-info}/RECORD +18 -8
- {castor_extractor-0.16.4.dist-info → castor_extractor-0.16.6.dist-info}/LICENCE +0 -0
- {castor_extractor-0.16.4.dist-info → castor_extractor-0.16.6.dist-info}/WHEEL +0 -0
- {castor_extractor-0.16.4.dist-info → castor_extractor-0.16.6.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.16.6 - 2024-05-14
|
|
4
|
+
|
|
5
|
+
* Introducing the revamped connector for Tableau
|
|
6
|
+
|
|
7
|
+
## 0.16.5 - 2024-04-25
|
|
8
|
+
|
|
9
|
+
* Salesforce: remove DeploymentStatus from EntityDefinition query
|
|
10
|
+
|
|
3
11
|
## 0.16.4 - 2024-04-25
|
|
4
12
|
|
|
5
13
|
* Salesforce: extract sobjects and fields
|
|
@@ -77,7 +77,7 @@ class SalesforceBaseClient(APIClient):
|
|
|
77
77
|
):
|
|
78
78
|
logger.info(f"querying page {page_count}")
|
|
79
79
|
url = self.build_url(self._host, next_page_path)
|
|
80
|
-
records,
|
|
80
|
+
records, next_page_path = self._call(
|
|
81
81
|
url, processor=self._query_processor
|
|
82
82
|
)
|
|
83
83
|
yield from records
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from ...types import ExternalAsset
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class TableauRevampAsset(ExternalAsset):
|
|
5
|
+
"""
|
|
6
|
+
Tableau assets
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
COLUMN = "columns"
|
|
10
|
+
DASHBOARD = "dashboards"
|
|
11
|
+
DATASOURCE = "datasources"
|
|
12
|
+
FIELD = "fields"
|
|
13
|
+
PROJECT = "projects"
|
|
14
|
+
SHEET = "sheets"
|
|
15
|
+
TABLE = "tables"
|
|
16
|
+
USAGE = "usage"
|
|
17
|
+
USER = "users"
|
|
18
|
+
WORKBOOK = "workbooks"
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Dict, Iterator, List, Optional
|
|
3
|
+
|
|
4
|
+
import tableauserverclient as TSC # type: ignore
|
|
5
|
+
|
|
6
|
+
from ....utils import SerializedAsset
|
|
7
|
+
from ..assets import TableauRevampAsset
|
|
8
|
+
from ..constants import (
|
|
9
|
+
DEFAULT_PAGE_SIZE,
|
|
10
|
+
DEFAULT_TIMEOUT_SECONDS,
|
|
11
|
+
TABLEAU_SERVER_VERSION,
|
|
12
|
+
)
|
|
13
|
+
from .credentials import TableauRevampCredentials
|
|
14
|
+
from .errors import TableauApiError
|
|
15
|
+
from .gql_queries import GQL_QUERIES, QUERY_TEMPLATE
|
|
16
|
+
from .tsc_fields import TSC_FIELDS
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
# these assets must be extracted via TableauServerClient
|
|
21
|
+
_TSC_ASSETS = (
|
|
22
|
+
# only users who published content can be extracted from MetadataAPI
|
|
23
|
+
TableauRevampAsset.USER,
|
|
24
|
+
# projects are not available in Metadata API
|
|
25
|
+
TableauRevampAsset.PROJECT,
|
|
26
|
+
# view count are not available in Metadata API
|
|
27
|
+
TableauRevampAsset.USAGE,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
_CUSTOM_PAGE_SIZE: Dict[TableauRevampAsset, int] = {
|
|
31
|
+
TableauRevampAsset.FIELD: 1000,
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _pick_fields(
|
|
36
|
+
data: SerializedAsset,
|
|
37
|
+
asset: TableauRevampAsset,
|
|
38
|
+
) -> SerializedAsset:
|
|
39
|
+
fields = TSC_FIELDS[asset]
|
|
40
|
+
|
|
41
|
+
def _pick(row: dict):
|
|
42
|
+
return {field: getattr(row, field) for field in fields}
|
|
43
|
+
|
|
44
|
+
return [_pick(row) for row in data]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _enrich_with_tsc(
|
|
48
|
+
datasources: SerializedAsset,
|
|
49
|
+
tsc_datasources: SerializedAsset,
|
|
50
|
+
) -> SerializedAsset:
|
|
51
|
+
"""
|
|
52
|
+
Enrich datasources with fields coming from TableauServerClient:
|
|
53
|
+
- project_luid
|
|
54
|
+
- webpage_url
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
mapping = {row["id"]: row for row in tsc_datasources}
|
|
58
|
+
|
|
59
|
+
for datasource in datasources:
|
|
60
|
+
if datasource["__typename"] != "PublishedDatasource":
|
|
61
|
+
# embedded datasources are bound to workbooks => no project
|
|
62
|
+
# embedded datasources cannot be accessed via URL => no webpage_url
|
|
63
|
+
continue
|
|
64
|
+
luid = datasource["luid"]
|
|
65
|
+
tsc_datasource = mapping[luid]
|
|
66
|
+
datasource["projectLuid"] = tsc_datasource["project_id"]
|
|
67
|
+
datasource["webpageUrl"] = tsc_datasource["webpage_url"]
|
|
68
|
+
|
|
69
|
+
return datasources
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def gql_query_scroll(
|
|
73
|
+
server,
|
|
74
|
+
query: str,
|
|
75
|
+
resource: str,
|
|
76
|
+
) -> Iterator[SerializedAsset]:
|
|
77
|
+
"""Iterate over GQL query results, handling pagination and cursor"""
|
|
78
|
+
|
|
79
|
+
def _call(cursor: Optional[str]) -> dict:
|
|
80
|
+
# If cursor is defined it must be quoted else use null token
|
|
81
|
+
token = "null" if cursor is None else f'"{cursor}"'
|
|
82
|
+
query_ = query.replace("AFTER_TOKEN_SIGNAL", token)
|
|
83
|
+
answer = server.metadata.query(query_)
|
|
84
|
+
if "errors" in answer:
|
|
85
|
+
raise TableauApiError(answer["errors"])
|
|
86
|
+
return answer["data"][f"{resource}Connection"]
|
|
87
|
+
|
|
88
|
+
cursor = None
|
|
89
|
+
while True:
|
|
90
|
+
payload = _call(cursor)
|
|
91
|
+
yield payload["nodes"]
|
|
92
|
+
|
|
93
|
+
page_info = payload["pageInfo"]
|
|
94
|
+
if page_info["hasNextPage"]:
|
|
95
|
+
cursor = page_info["endCursor"]
|
|
96
|
+
else:
|
|
97
|
+
break
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class TableauRevampClient:
|
|
101
|
+
"""
|
|
102
|
+
Connect to Tableau's API and extract assets.
|
|
103
|
+
|
|
104
|
+
Relies on TableauServerClient overlay:
|
|
105
|
+
https://tableau.github.io/server-client-python/docs/
|
|
106
|
+
- for connection
|
|
107
|
+
- to extract Users (Metadata
|
|
108
|
+
|
|
109
|
+
Calls the MetadataAPI, using graphQL
|
|
110
|
+
https://help.tableau.com/current/api/metadata_api/en-us/reference/index.html
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
def __init__(
|
|
114
|
+
self,
|
|
115
|
+
credentials: TableauRevampCredentials,
|
|
116
|
+
timeout_sec: int = DEFAULT_TIMEOUT_SECONDS,
|
|
117
|
+
):
|
|
118
|
+
self._credentials = credentials
|
|
119
|
+
self._server = TSC.Server(self._credentials.server_url)
|
|
120
|
+
options = {"verify": True, "timeout": timeout_sec}
|
|
121
|
+
self._server.add_http_options(options)
|
|
122
|
+
self._server.version = TABLEAU_SERVER_VERSION
|
|
123
|
+
self.errors: List[str] = []
|
|
124
|
+
|
|
125
|
+
@staticmethod
|
|
126
|
+
def name() -> str:
|
|
127
|
+
return "Tableau/API"
|
|
128
|
+
|
|
129
|
+
def _user_password_login(self) -> None:
|
|
130
|
+
"""Login into Tableau using user and password"""
|
|
131
|
+
self._server.auth.sign_in(
|
|
132
|
+
TSC.TableauAuth(
|
|
133
|
+
self._credentials.user,
|
|
134
|
+
self._credentials.password,
|
|
135
|
+
site_id=self._credentials.site_id,
|
|
136
|
+
),
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
def _pat_login(self) -> None:
|
|
140
|
+
"""Login into Tableau using personal authentication token"""
|
|
141
|
+
self._server.auth.sign_in(
|
|
142
|
+
TSC.PersonalAccessTokenAuth(
|
|
143
|
+
self._credentials.token_name,
|
|
144
|
+
self._credentials.token,
|
|
145
|
+
site_id=self._credentials.site_id,
|
|
146
|
+
),
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
def login(self) -> None:
|
|
150
|
+
"""
|
|
151
|
+
Depending on the given credentials, logs-in using either:
|
|
152
|
+
- user/password
|
|
153
|
+
- token_name/value (Personal Access Token)
|
|
154
|
+
https://help.tableau.com/current/api/rest_api/en-us/REST/rest_api_concepts_auth.htm
|
|
155
|
+
|
|
156
|
+
Raises an error if none can be found
|
|
157
|
+
"""
|
|
158
|
+
|
|
159
|
+
if self._credentials.user and self._credentials.password:
|
|
160
|
+
logger.info("Logging in using user and password authentication")
|
|
161
|
+
return self._user_password_login()
|
|
162
|
+
|
|
163
|
+
if self._credentials.token_name and self._credentials.token:
|
|
164
|
+
logger.info("Logging in using token authentication")
|
|
165
|
+
return self._pat_login()
|
|
166
|
+
|
|
167
|
+
raise ValueError(
|
|
168
|
+
"Invalid credentials: either user/password or PAT must be provided",
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
def base_url(self) -> str:
|
|
172
|
+
return self._credentials.server_url
|
|
173
|
+
|
|
174
|
+
def _fetch_from_tsc(
|
|
175
|
+
self,
|
|
176
|
+
asset: TableauRevampAsset,
|
|
177
|
+
) -> SerializedAsset:
|
|
178
|
+
|
|
179
|
+
if asset == TableauRevampAsset.USER:
|
|
180
|
+
data = TSC.Pager(self._server.users)
|
|
181
|
+
|
|
182
|
+
elif asset == TableauRevampAsset.PROJECT:
|
|
183
|
+
data = TSC.Pager(self._server.projects)
|
|
184
|
+
|
|
185
|
+
elif asset == TableauRevampAsset.DATASOURCE:
|
|
186
|
+
data = TSC.Pager(self._server.datasources)
|
|
187
|
+
|
|
188
|
+
elif asset == TableauRevampAsset.USAGE:
|
|
189
|
+
data = TSC.Pager(self._server.views, usage=True)
|
|
190
|
+
|
|
191
|
+
else:
|
|
192
|
+
raise AssertionError(f"Fetching from TSC not supported for {asset}")
|
|
193
|
+
|
|
194
|
+
return _pick_fields(data, asset)
|
|
195
|
+
|
|
196
|
+
def _fetch_from_metadata_api(
|
|
197
|
+
self,
|
|
198
|
+
asset: TableauRevampAsset,
|
|
199
|
+
) -> SerializedAsset:
|
|
200
|
+
resource, fields = GQL_QUERIES[asset]
|
|
201
|
+
page_size = _CUSTOM_PAGE_SIZE.get(asset) or DEFAULT_PAGE_SIZE
|
|
202
|
+
query = QUERY_TEMPLATE.format(
|
|
203
|
+
resource=resource,
|
|
204
|
+
fields=fields,
|
|
205
|
+
page_size=page_size,
|
|
206
|
+
)
|
|
207
|
+
result_pages = gql_query_scroll(self._server, query, resource)
|
|
208
|
+
return [asset for page in result_pages for asset in page]
|
|
209
|
+
|
|
210
|
+
def _fetch_datasources(self) -> SerializedAsset:
|
|
211
|
+
asset = TableauRevampAsset.DATASOURCE
|
|
212
|
+
|
|
213
|
+
datasources = self._fetch_from_metadata_api(asset)
|
|
214
|
+
datasource_projects = self._fetch_from_tsc(asset)
|
|
215
|
+
|
|
216
|
+
return _enrich_with_tsc(datasources, datasource_projects)
|
|
217
|
+
|
|
218
|
+
def fetch(
|
|
219
|
+
self,
|
|
220
|
+
asset: TableauRevampAsset,
|
|
221
|
+
) -> SerializedAsset:
|
|
222
|
+
"""
|
|
223
|
+
Extract the given Tableau Asset
|
|
224
|
+
"""
|
|
225
|
+
if asset == TableauRevampAsset.DATASOURCE:
|
|
226
|
+
# both APIs are required to extract datasources
|
|
227
|
+
return self._fetch_datasources()
|
|
228
|
+
|
|
229
|
+
if asset in _TSC_ASSETS:
|
|
230
|
+
# some assets can only be extracted via TSC
|
|
231
|
+
return self._fetch_from_tsc(asset)
|
|
232
|
+
|
|
233
|
+
# extract most assets via Metadata API
|
|
234
|
+
return self._fetch_from_metadata_api(asset)
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from typing import Dict, Literal, Optional, overload
|
|
3
|
+
|
|
4
|
+
from ....utils import from_env
|
|
5
|
+
|
|
6
|
+
_AUTH_ERROR_MSG = "Need either user and password or token_name and token"
|
|
7
|
+
|
|
8
|
+
# To specify the default site on Tableau Server, you can use an empty string
|
|
9
|
+
# https://tableau.github.io/server-client-python/docs/api-ref#authentication
|
|
10
|
+
_DEFAULT_SERVER_SITE_ID = ""
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class CredentialsKey(Enum):
|
|
14
|
+
"""Value enum object for the credentials"""
|
|
15
|
+
|
|
16
|
+
TABLEAU_USER = "user"
|
|
17
|
+
TABLEAU_PASSWORD = "password" # noqa: S105
|
|
18
|
+
TABLEAU_TOKEN_NAME = "token_name" # noqa: S105
|
|
19
|
+
TABLEAU_TOKEN = "token" # noqa: S105
|
|
20
|
+
TABLEAU_SITE_ID = "site_id"
|
|
21
|
+
TABLEAU_SERVER_URL = "server_url"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
CREDENTIALS_ENV: Dict[CredentialsKey, str] = {
|
|
25
|
+
CredentialsKey.TABLEAU_USER: "CASTOR_TABLEAU_USER",
|
|
26
|
+
CredentialsKey.TABLEAU_PASSWORD: "CASTOR_TABLEAU_PASSWORD",
|
|
27
|
+
CredentialsKey.TABLEAU_TOKEN_NAME: "CASTOR_TABLEAU_TOKEN_NAME",
|
|
28
|
+
CredentialsKey.TABLEAU_TOKEN: "CASTOR_TABLEAU_TOKEN",
|
|
29
|
+
CredentialsKey.TABLEAU_SITE_ID: "CASTOR_TABLEAU_SITE_ID",
|
|
30
|
+
CredentialsKey.TABLEAU_SERVER_URL: "CASTOR_TABLEAU_SERVER_URL",
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@overload
|
|
35
|
+
def get_value(key: CredentialsKey, kwargs: dict) -> Optional[str]: ...
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@overload
|
|
39
|
+
def get_value(
|
|
40
|
+
key: CredentialsKey, kwargs: dict, optional: Literal[True]
|
|
41
|
+
) -> Optional[str]: ...
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@overload
|
|
45
|
+
def get_value(
|
|
46
|
+
key: CredentialsKey, kwargs: dict, optional: Literal[False]
|
|
47
|
+
) -> str: ...
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def get_value(
|
|
51
|
+
key: CredentialsKey,
|
|
52
|
+
kwargs: dict,
|
|
53
|
+
optional: bool = True,
|
|
54
|
+
) -> Optional[str]:
|
|
55
|
+
"""
|
|
56
|
+
Returns the value of the given key:
|
|
57
|
+
- from kwargs in priority
|
|
58
|
+
- from ENV otherwise
|
|
59
|
+
Raises an error if not found (unless optional)
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
if key.value in kwargs:
|
|
63
|
+
return kwargs[key.value]
|
|
64
|
+
|
|
65
|
+
env_key = CREDENTIALS_ENV[key]
|
|
66
|
+
return from_env(env_key, optional)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class TableauRevampCredentials:
|
|
70
|
+
"""
|
|
71
|
+
Tableau's credentials to connect to REST API
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
def __init__(
|
|
75
|
+
self,
|
|
76
|
+
*,
|
|
77
|
+
server_url: str,
|
|
78
|
+
site_id: Optional[str],
|
|
79
|
+
user: Optional[str],
|
|
80
|
+
password: Optional[str],
|
|
81
|
+
token_name: Optional[str],
|
|
82
|
+
token: Optional[str],
|
|
83
|
+
):
|
|
84
|
+
self.user = user
|
|
85
|
+
self.site_id = site_id or _DEFAULT_SERVER_SITE_ID
|
|
86
|
+
self.server_url = server_url
|
|
87
|
+
self.password = password
|
|
88
|
+
self.token_name = token_name
|
|
89
|
+
self.token = token
|
|
90
|
+
|
|
91
|
+
@classmethod
|
|
92
|
+
def from_env(cls, kwargs: dict) -> "TableauRevampCredentials":
|
|
93
|
+
return TableauRevampCredentials(
|
|
94
|
+
server_url=get_value(
|
|
95
|
+
CredentialsKey.TABLEAU_SERVER_URL,
|
|
96
|
+
kwargs,
|
|
97
|
+
optional=False,
|
|
98
|
+
),
|
|
99
|
+
site_id=get_value(CredentialsKey.TABLEAU_SITE_ID, kwargs),
|
|
100
|
+
user=get_value(CredentialsKey.TABLEAU_USER, kwargs),
|
|
101
|
+
password=get_value(CredentialsKey.TABLEAU_PASSWORD, kwargs),
|
|
102
|
+
token_name=get_value(CredentialsKey.TABLEAU_TOKEN_NAME, kwargs),
|
|
103
|
+
token=get_value(CredentialsKey.TABLEAU_TOKEN, kwargs),
|
|
104
|
+
)
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
from typing import Dict, Tuple
|
|
2
|
+
|
|
3
|
+
from ..assets import TableauRevampAsset
|
|
4
|
+
|
|
5
|
+
QUERY_TEMPLATE = """
|
|
6
|
+
{{
|
|
7
|
+
{resource}Connection(first: {page_size}, after: AFTER_TOKEN_SIGNAL) {{
|
|
8
|
+
nodes {{ {fields}
|
|
9
|
+
}}
|
|
10
|
+
pageInfo {{
|
|
11
|
+
hasNextPage
|
|
12
|
+
endCursor
|
|
13
|
+
}}
|
|
14
|
+
totalCount
|
|
15
|
+
}}
|
|
16
|
+
}}
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
_COLUMNS_QUERY = """
|
|
20
|
+
downstreamDashboards { id }
|
|
21
|
+
downstreamFields { id }
|
|
22
|
+
downstreamWorkbooks { id }
|
|
23
|
+
id
|
|
24
|
+
name
|
|
25
|
+
table { id }
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
_DASHBOARDS_QUERY = """
|
|
29
|
+
createdAt
|
|
30
|
+
id
|
|
31
|
+
name
|
|
32
|
+
path
|
|
33
|
+
tags { name }
|
|
34
|
+
updatedAt
|
|
35
|
+
workbook { id }
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
_DATASOURCES_QUERY = """
|
|
39
|
+
__typename
|
|
40
|
+
createdAt
|
|
41
|
+
downstreamDashboards { id }
|
|
42
|
+
downstreamWorkbooks { id }
|
|
43
|
+
id
|
|
44
|
+
name
|
|
45
|
+
updatedAt
|
|
46
|
+
... on PublishedDatasource {
|
|
47
|
+
description
|
|
48
|
+
luid
|
|
49
|
+
owner { luid }
|
|
50
|
+
site { name }
|
|
51
|
+
tags { name }
|
|
52
|
+
uri
|
|
53
|
+
}
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
_TABLES_QUERY = """
|
|
57
|
+
__typename
|
|
58
|
+
downstreamDashboards { id }
|
|
59
|
+
downstreamDatasources { id }
|
|
60
|
+
downstreamWorkbooks { id }
|
|
61
|
+
id
|
|
62
|
+
name
|
|
63
|
+
... on DatabaseTable {
|
|
64
|
+
connectionType
|
|
65
|
+
fullName
|
|
66
|
+
schema
|
|
67
|
+
tableType
|
|
68
|
+
}
|
|
69
|
+
... on CustomSQLTable {
|
|
70
|
+
query
|
|
71
|
+
}
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
_WORKBOOKS_QUERY = """
|
|
76
|
+
createdAt
|
|
77
|
+
description
|
|
78
|
+
embeddedDatasources { id }
|
|
79
|
+
id
|
|
80
|
+
luid
|
|
81
|
+
name
|
|
82
|
+
owner { luid }
|
|
83
|
+
projectLuid
|
|
84
|
+
site { name }
|
|
85
|
+
tags { name }
|
|
86
|
+
updatedAt
|
|
87
|
+
uri
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
_FIELDS_QUERY = """
|
|
91
|
+
__typename
|
|
92
|
+
datasource { id }
|
|
93
|
+
description
|
|
94
|
+
downstreamDashboards { id }
|
|
95
|
+
downstreamWorkbooks { id }
|
|
96
|
+
folderName
|
|
97
|
+
id
|
|
98
|
+
name
|
|
99
|
+
... on DataField {
|
|
100
|
+
dataType
|
|
101
|
+
role
|
|
102
|
+
}
|
|
103
|
+
... on ColumnField {
|
|
104
|
+
columns {
|
|
105
|
+
name
|
|
106
|
+
table { name }
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
_SHEETS_QUERY = """
|
|
112
|
+
containedInDashboards { id }
|
|
113
|
+
createdAt
|
|
114
|
+
id
|
|
115
|
+
index
|
|
116
|
+
name
|
|
117
|
+
updatedAt
|
|
118
|
+
upstreamFields { name }
|
|
119
|
+
workbook { id }
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
GQL_QUERIES: Dict[TableauRevampAsset, Tuple[str, str]] = {
|
|
124
|
+
TableauRevampAsset.COLUMN: ("columns", _COLUMNS_QUERY),
|
|
125
|
+
TableauRevampAsset.DASHBOARD: ("dashboards", _DASHBOARDS_QUERY),
|
|
126
|
+
TableauRevampAsset.DATASOURCE: ("datasources", _DATASOURCES_QUERY),
|
|
127
|
+
TableauRevampAsset.FIELD: ("fields", _FIELDS_QUERY),
|
|
128
|
+
TableauRevampAsset.SHEET: ("sheets", _SHEETS_QUERY),
|
|
129
|
+
TableauRevampAsset.TABLE: ("tables", _TABLES_QUERY),
|
|
130
|
+
TableauRevampAsset.WORKBOOK: ("workbooks", _WORKBOOKS_QUERY),
|
|
131
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from typing import Dict, Set
|
|
2
|
+
|
|
3
|
+
from ..assets import TableauRevampAsset
|
|
4
|
+
|
|
5
|
+
# list of fields to pick in TSC response
|
|
6
|
+
TSC_FIELDS: Dict[TableauRevampAsset, Set[str]] = {
|
|
7
|
+
TableauRevampAsset.DATASOURCE: {
|
|
8
|
+
"id",
|
|
9
|
+
"project_id",
|
|
10
|
+
"webpage_url",
|
|
11
|
+
},
|
|
12
|
+
TableauRevampAsset.PROJECT: {
|
|
13
|
+
"description",
|
|
14
|
+
"id",
|
|
15
|
+
"name",
|
|
16
|
+
"parent_id",
|
|
17
|
+
},
|
|
18
|
+
TableauRevampAsset.USAGE: {
|
|
19
|
+
"name",
|
|
20
|
+
"total_views",
|
|
21
|
+
"workbook_id",
|
|
22
|
+
},
|
|
23
|
+
TableauRevampAsset.USER: {
|
|
24
|
+
"email",
|
|
25
|
+
"fullname",
|
|
26
|
+
"id",
|
|
27
|
+
"name",
|
|
28
|
+
"site_role",
|
|
29
|
+
},
|
|
30
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Iterable, Tuple
|
|
3
|
+
|
|
4
|
+
from ...utils import (
|
|
5
|
+
OUTPUT_DIR,
|
|
6
|
+
current_timestamp,
|
|
7
|
+
deep_serialize,
|
|
8
|
+
from_env,
|
|
9
|
+
get_output_filename,
|
|
10
|
+
write_errors_logs,
|
|
11
|
+
write_json,
|
|
12
|
+
write_summary,
|
|
13
|
+
)
|
|
14
|
+
from .assets import TableauRevampAsset
|
|
15
|
+
from .client import TableauRevampClient
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def iterate_all_data(
|
|
21
|
+
client: TableauRevampClient,
|
|
22
|
+
) -> Iterable[Tuple[TableauRevampAsset, list]]:
|
|
23
|
+
"""Iterate over the extracted Data from Tableau"""
|
|
24
|
+
|
|
25
|
+
logger.info("Extracting USER from Tableau API")
|
|
26
|
+
yield TableauRevampAsset.USER, deep_serialize(
|
|
27
|
+
client.fetch(TableauRevampAsset.USER)
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def extract_all(client: TableauRevampClient, **kwargs: str) -> None:
|
|
32
|
+
"""
|
|
33
|
+
Extract Data from tableau
|
|
34
|
+
Store data locally in files under the output_directory
|
|
35
|
+
If errors from Tableau's API are catch store them locally in file under the output_directory
|
|
36
|
+
"""
|
|
37
|
+
output_directory = kwargs.get("output_directory") or from_env(OUTPUT_DIR)
|
|
38
|
+
|
|
39
|
+
timestamp = current_timestamp()
|
|
40
|
+
|
|
41
|
+
for key, data in iterate_all_data(client):
|
|
42
|
+
filename = get_output_filename(key.value, output_directory, timestamp)
|
|
43
|
+
write_json(filename, data)
|
|
44
|
+
|
|
45
|
+
write_summary(
|
|
46
|
+
output_directory,
|
|
47
|
+
timestamp,
|
|
48
|
+
base_url=client.base_url(),
|
|
49
|
+
client_name=client.name(),
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
if client.errors:
|
|
53
|
+
write_errors_logs(output_directory, timestamp, client.errors)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=5zI5Mielu8ZXyckh7x2A_iXPW3qXjEhH_8THWOZVY0c,10191
|
|
2
2
|
Dockerfile,sha256=HcX5z8OpeSvkScQsN-Y7CNMUig_UB6vTMDl7uqzuLGE,303
|
|
3
3
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
4
4
|
README.md,sha256=uF6PXm9ocPITlKVSh9afTakHmpLx3TvawLf-CbMP3wM,3578
|
|
@@ -84,7 +84,7 @@ castor_extractor/utils/retry_test.py,sha256=nsMttlmyKygVcffX3Hay8U2S1BspkGPiCmzI
|
|
|
84
84
|
castor_extractor/utils/safe.py,sha256=jpfIimwdBSVUvU2DPFrhqpKC_DSYwxQqd08MlIkSODY,1967
|
|
85
85
|
castor_extractor/utils/safe_test.py,sha256=IHN1Z761tYMFslYC-2HAfkXmFPh4LYSqNLs4QZwykjk,2160
|
|
86
86
|
castor_extractor/utils/salesforce/__init__.py,sha256=VGD4vd1Se79z2PAaVCvCSL3yhgWlhQFaVDLZ5aERug0,132
|
|
87
|
-
castor_extractor/utils/salesforce/client.py,sha256=
|
|
87
|
+
castor_extractor/utils/salesforce/client.py,sha256=KgqvncwDESb9pVkf4yeI8cRygB0n3QwSWsAUuYBSUWM,2861
|
|
88
88
|
castor_extractor/utils/salesforce/client_test.py,sha256=s6UTogjC36jrJOnYA-gFuyTQsvROCt9y_eoD2O41xCg,682
|
|
89
89
|
castor_extractor/utils/salesforce/constants.py,sha256=5sph6dbTCp0mAGWP24WTpC1wsIqeG8yI8-BsKrmV_wA,335
|
|
90
90
|
castor_extractor/utils/salesforce/credentials.py,sha256=Wwb-_BlbFBJUl3dhXz72IIqcCfj1F3Zj3JoYr3FYk0A,2045
|
|
@@ -240,6 +240,16 @@ castor_extractor/visualization/tableau/tests/unit/utils/env_key.py,sha256=fBX8pG
|
|
|
240
240
|
castor_extractor/visualization/tableau/tsc_fields.py,sha256=BoV6XVu-HUan9hxeYRbvbS9dIMfgDlgfOvdY7DiFiZc,966
|
|
241
241
|
castor_extractor/visualization/tableau/types.py,sha256=_T3fahMHRkV2TVaYcjITh61T0FLzSBYKD21VurzkF5M,322
|
|
242
242
|
castor_extractor/visualization/tableau/usage.py,sha256=LlFwlbEr-EnYUJjKZha99CRCRrERJ350oAvzBQlp9_s,427
|
|
243
|
+
castor_extractor/visualization/tableau_revamp/__init__.py,sha256=a3DGjQhaz17gBqW-E84TAgupKbqLC40y5Ajo1yn-ot4,156
|
|
244
|
+
castor_extractor/visualization/tableau_revamp/assets.py,sha256=owlwaI2E4UKk1YhkaHgaAXx6gu3Op6EqZ7bjp0tHI6s,351
|
|
245
|
+
castor_extractor/visualization/tableau_revamp/client/__init__.py,sha256=wmS9uLtUiqNYVloi0-DgD8d2qzu3RVZEAtWiaDp6G_M,90
|
|
246
|
+
castor_extractor/visualization/tableau_revamp/client/client.py,sha256=PsU7OOAYeTBF4dcT-Tl7RwJGD8o_S6vmGhUk_lYNhDw,7288
|
|
247
|
+
castor_extractor/visualization/tableau_revamp/client/credentials.py,sha256=fHG32egq6ll2U4BNazalMof_plzfCMQjrN9WOs6kezk,3014
|
|
248
|
+
castor_extractor/visualization/tableau_revamp/client/errors.py,sha256=dTe1shqmWmAXpDpCz-E24m8dGYjt6rvIGV9qQb4jnvI,150
|
|
249
|
+
castor_extractor/visualization/tableau_revamp/client/gql_queries.py,sha256=SiKTbl-lblV6GCajikPXrDh4BaTmi0wN_HtGQhVDV3o,2041
|
|
250
|
+
castor_extractor/visualization/tableau_revamp/client/tsc_fields.py,sha256=Nl_CM2OEzgh3eL8Szcv9Fbiu9wGRkcaYlerUYTJrZLQ,610
|
|
251
|
+
castor_extractor/visualization/tableau_revamp/constants.py,sha256=PcdudAogQhi3e-knalhgliMKjy5ahN0em_-7XSLrnxM,87
|
|
252
|
+
castor_extractor/visualization/tableau_revamp/extract.py,sha256=2SLUxp5okM4AcEJJ61ZgcC2ikfZZl9MH17CEXMXmgl0,1450
|
|
243
253
|
castor_extractor/warehouse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
244
254
|
castor_extractor/warehouse/abstract/__init__.py,sha256=QNwFRsLpH6aqVpl37qzklLr62iA85Yx6nZAivHDhpyk,366
|
|
245
255
|
castor_extractor/warehouse/abstract/asset.py,sha256=qe5ugm7fnkvjbzdELRAeywbuKH4OLq2YHlXdjepehxE,2159
|
|
@@ -317,7 +327,7 @@ castor_extractor/warehouse/salesforce/constants.py,sha256=GusduVBCPvwpk_Im6F3bDv
|
|
|
317
327
|
castor_extractor/warehouse/salesforce/extract.py,sha256=ZTb58t7mqhavNvErrnw8M0L4Uu3qJpQEIldymurbgl0,3417
|
|
318
328
|
castor_extractor/warehouse/salesforce/format.py,sha256=_BSj_G6C-kPwRubxSx1WuHg-_nYVQVNgAANqNfXL5RM,2154
|
|
319
329
|
castor_extractor/warehouse/salesforce/format_test.py,sha256=6hy0USZH7-PDQt3oZ9_3Nwlr3eHLkqNEchqIM3bIDrU,858
|
|
320
|
-
castor_extractor/warehouse/salesforce/soql.py,sha256=
|
|
330
|
+
castor_extractor/warehouse/salesforce/soql.py,sha256=pAEaJE8ZUcyN3ptBsZGzNcGRhCcU81X6RMlnF1HRMw4,1063
|
|
321
331
|
castor_extractor/warehouse/snowflake/__init__.py,sha256=TEGXTyxWp4Tr9gIHb-UFVTRKj6YWmrRtqHruiKSZGiY,174
|
|
322
332
|
castor_extractor/warehouse/snowflake/client.py,sha256=XT0QLVNff_586SDuMe40iu8FCwPDh2uBV5aKc1Ql914,5555
|
|
323
333
|
castor_extractor/warehouse/snowflake/client_test.py,sha256=ihWtOOAQfh8pu5JTr_EWfqefKOVIaJXznACURzaU1Qs,1432
|
|
@@ -357,8 +367,8 @@ castor_extractor/warehouse/synapse/queries/schema.sql,sha256=aX9xNrBD_ydwl-znGSF
|
|
|
357
367
|
castor_extractor/warehouse/synapse/queries/table.sql,sha256=mCE8bR1Vb7j7SwZW2gafcXidQ2fo1HwxcybA8wP2Kfs,1049
|
|
358
368
|
castor_extractor/warehouse/synapse/queries/user.sql,sha256=sTb_SS7Zj3AXW1SggKPLNMCd0qoTpL7XI_BJRMaEpBg,67
|
|
359
369
|
castor_extractor/warehouse/synapse/queries/view_ddl.sql,sha256=3EVbp5_yTgdByHFIPLHmnoOnqqLE77SrjAwFDvu4e54,249
|
|
360
|
-
castor_extractor-0.16.
|
|
361
|
-
castor_extractor-0.16.
|
|
362
|
-
castor_extractor-0.16.
|
|
363
|
-
castor_extractor-0.16.
|
|
364
|
-
castor_extractor-0.16.
|
|
370
|
+
castor_extractor-0.16.6.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
371
|
+
castor_extractor-0.16.6.dist-info/METADATA,sha256=yDrJkKrR_JGhxTHdorJu_IcuH_d3qd7VLoLwQPEdclo,6370
|
|
372
|
+
castor_extractor-0.16.6.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
373
|
+
castor_extractor-0.16.6.dist-info/entry_points.txt,sha256=SbyPk58Gh-FRztfCNnUZQ6w7SatzNJFZ6GIJLNsy7tI,1427
|
|
374
|
+
castor_extractor-0.16.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|