castor-extractor 0.23.3__py3-none-any.whl → 0.24.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +8 -0
- README.md +2 -0
- castor_extractor/commands/extract_looker_studio.py +30 -0
- castor_extractor/knowledge/confluence/client/client.py +0 -13
- castor_extractor/knowledge/confluence/client/endpoints.py +1 -1
- castor_extractor/visualization/looker_studio/__init__.py +1 -0
- castor_extractor/visualization/looker_studio/assets.py +1 -0
- castor_extractor/visualization/looker_studio/client/client.py +56 -6
- castor_extractor/visualization/looker_studio/client/credentials.py +5 -0
- castor_extractor/visualization/looker_studio/client/queries/query.sql +52 -0
- castor_extractor/visualization/looker_studio/extract.py +84 -0
- {castor_extractor-0.23.3.dist-info → castor_extractor-0.24.1.dist-info}/METADATA +11 -1
- {castor_extractor-0.23.3.dist-info → castor_extractor-0.24.1.dist-info}/RECORD +16 -48
- {castor_extractor-0.23.3.dist-info → castor_extractor-0.24.1.dist-info}/entry_points.txt +1 -0
- castor_extractor/visualization/tableau/__init__.py +0 -3
- castor_extractor/visualization/tableau/assets.py +0 -49
- castor_extractor/visualization/tableau/client/__init__.py +0 -2
- castor_extractor/visualization/tableau/client/client.py +0 -229
- castor_extractor/visualization/tableau/client/client_utils.py +0 -75
- castor_extractor/visualization/tableau/client/credentials.py +0 -104
- castor_extractor/visualization/tableau/client/project.py +0 -28
- castor_extractor/visualization/tableau/client/safe_mode.py +0 -70
- castor_extractor/visualization/tableau/constants.py +0 -9
- castor_extractor/visualization/tableau/errors.py +0 -5
- castor_extractor/visualization/tableau/extract.py +0 -121
- castor_extractor/visualization/tableau/gql_fields.py +0 -249
- castor_extractor/visualization/tableau/tests/__init__.py +0 -0
- castor_extractor/visualization/tableau/tests/unit/__init__.py +0 -0
- castor_extractor/visualization/tableau/tests/unit/assets/graphql/metadata/metadata_1_get.json +0 -15
- castor_extractor/visualization/tableau/tests/unit/assets/graphql/metadata/metadata_2_get.json +0 -15
- castor_extractor/visualization/tableau/tests/unit/assets/rest_api/auth.xml +0 -7
- castor_extractor/visualization/tableau/tests/unit/assets/rest_api/project_get.xml +0 -9
- castor_extractor/visualization/tableau/tests/unit/assets/rest_api/user_get.xml +0 -8
- castor_extractor/visualization/tableau/tests/unit/assets/rest_api/view_get_usage.xml +0 -24
- castor_extractor/visualization/tableau/tests/unit/assets/rest_api/workbook_get.xml +0 -19
- castor_extractor/visualization/tableau/tests/unit/graphql/__init__.py +0 -0
- castor_extractor/visualization/tableau/tests/unit/graphql/paginated_object_test.py +0 -63
- castor_extractor/visualization/tableau/tests/unit/rest_api/__init__.py +0 -0
- castor_extractor/visualization/tableau/tests/unit/rest_api/auth_test.py +0 -39
- castor_extractor/visualization/tableau/tests/unit/rest_api/credentials_test.py +0 -13
- castor_extractor/visualization/tableau/tests/unit/rest_api/projects_test.py +0 -59
- castor_extractor/visualization/tableau/tests/unit/rest_api/usages_test.py +0 -49
- castor_extractor/visualization/tableau/tests/unit/rest_api/users_test.py +0 -52
- castor_extractor/visualization/tableau/tests/unit/rest_api/workbooks_test.py +0 -60
- castor_extractor/visualization/tableau/tests/unit/utils/__init__.py +0 -1
- castor_extractor/visualization/tableau/tests/unit/utils/env_key.py +0 -6
- castor_extractor/visualization/tableau/tsc_fields.py +0 -46
- castor_extractor/visualization/tableau/types.py +0 -11
- castor_extractor/visualization/tableau/usage.py +0 -14
- {castor_extractor-0.23.3.dist-info → castor_extractor-0.24.1.dist-info}/LICENCE +0 -0
- {castor_extractor-0.23.3.dist-info → castor_extractor-0.24.1.dist-info}/WHEEL +0 -0
|
@@ -1,229 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
|
|
3
|
-
import tableauserverclient as TSC # type: ignore
|
|
4
|
-
|
|
5
|
-
from ....utils import SerializedAsset
|
|
6
|
-
from ..assets import TableauAsset
|
|
7
|
-
from ..constants import PAGE_SIZE, TABLEAU_SERVER_VERSION
|
|
8
|
-
from ..usage import compute_usage_views
|
|
9
|
-
from .client_utils import extract_asset, get_paginated_objects
|
|
10
|
-
from .credentials import CredentialsApi, CredentialsKey, get_value
|
|
11
|
-
from .project import compute_project_path
|
|
12
|
-
from .safe_mode import safe_mode_fetch_usage
|
|
13
|
-
|
|
14
|
-
logger = logging.getLogger(__name__)
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class ApiClient:
|
|
18
|
-
"""
|
|
19
|
-
Connect to Tableau REST API and fetch main assets.
|
|
20
|
-
Superuser credentials are required.
|
|
21
|
-
https://tableau.github.io/server-client-python/docs/
|
|
22
|
-
"""
|
|
23
|
-
|
|
24
|
-
def __init__(
|
|
25
|
-
self,
|
|
26
|
-
**kwargs,
|
|
27
|
-
):
|
|
28
|
-
self._credentials = CredentialsApi(
|
|
29
|
-
user=get_value(CredentialsKey.TABLEAU_USER, kwargs, True),
|
|
30
|
-
password=get_value(CredentialsKey.TABLEAU_PASSWORD, kwargs, True),
|
|
31
|
-
token_name=get_value(
|
|
32
|
-
CredentialsKey.TABLEAU_TOKEN_NAME,
|
|
33
|
-
kwargs,
|
|
34
|
-
True,
|
|
35
|
-
),
|
|
36
|
-
token=get_value(CredentialsKey.TABLEAU_TOKEN, kwargs, True),
|
|
37
|
-
server_url=get_value(CredentialsKey.TABLEAU_SERVER_URL, kwargs),
|
|
38
|
-
site_id=get_value(CredentialsKey.TABLEAU_SITE_ID, kwargs, True),
|
|
39
|
-
)
|
|
40
|
-
self._server = TSC.Server(self._credentials.server_url)
|
|
41
|
-
self._server.add_http_options({"verify": True})
|
|
42
|
-
self._page_size = PAGE_SIZE
|
|
43
|
-
self._server.version = TABLEAU_SERVER_VERSION
|
|
44
|
-
self._safe_mode = bool(kwargs.get("safe_mode"))
|
|
45
|
-
self.errors: list[str] = []
|
|
46
|
-
|
|
47
|
-
@staticmethod
|
|
48
|
-
def name() -> str:
|
|
49
|
-
return "Tableau/API"
|
|
50
|
-
|
|
51
|
-
def _user_password_login(self) -> None:
|
|
52
|
-
"""Login into Tableau using user and password"""
|
|
53
|
-
self._server.auth.sign_in(
|
|
54
|
-
TSC.TableauAuth(
|
|
55
|
-
self._credentials.user,
|
|
56
|
-
self._credentials.password,
|
|
57
|
-
site_id=self._credentials.site_id,
|
|
58
|
-
),
|
|
59
|
-
)
|
|
60
|
-
|
|
61
|
-
def _pat_login(self) -> None:
|
|
62
|
-
"""Login into Tableau using personal authentication token"""
|
|
63
|
-
self._server.auth.sign_in(
|
|
64
|
-
TSC.PersonalAccessTokenAuth(
|
|
65
|
-
self._credentials.token_name,
|
|
66
|
-
self._credentials.token,
|
|
67
|
-
site_id=self._credentials.site_id,
|
|
68
|
-
),
|
|
69
|
-
)
|
|
70
|
-
|
|
71
|
-
def login(self) -> None:
|
|
72
|
-
"""Login into Tableau"""
|
|
73
|
-
|
|
74
|
-
if self._credentials.user and self._credentials.password:
|
|
75
|
-
logger.info("Logging in using user and password authentication")
|
|
76
|
-
return self._user_password_login()
|
|
77
|
-
|
|
78
|
-
if self._credentials.token_name and self._credentials.token:
|
|
79
|
-
logger.info("Logging in using token authentication")
|
|
80
|
-
return self._pat_login()
|
|
81
|
-
|
|
82
|
-
raise ValueError(
|
|
83
|
-
"""Wrong authentication: you should provide either user and password
|
|
84
|
-
or personal access token""",
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
def base_url(self) -> str:
|
|
88
|
-
return self._credentials.server_url
|
|
89
|
-
|
|
90
|
-
def _fetch_users(self) -> SerializedAsset:
|
|
91
|
-
"""Fetches list of User"""
|
|
92
|
-
return [
|
|
93
|
-
extract_asset(user, TableauAsset.USER)
|
|
94
|
-
for user in TSC.Pager(self._server.users)
|
|
95
|
-
]
|
|
96
|
-
|
|
97
|
-
def _fetch_workbooks(self) -> SerializedAsset:
|
|
98
|
-
"""Fetches list of Workbooks"""
|
|
99
|
-
|
|
100
|
-
return [
|
|
101
|
-
extract_asset(workbook, TableauAsset.WORKBOOK)
|
|
102
|
-
for workbook in TSC.Pager(self._server.workbooks)
|
|
103
|
-
]
|
|
104
|
-
|
|
105
|
-
def _fetch_usages(self, safe_mode: bool) -> SerializedAsset:
|
|
106
|
-
"""Fetches list of Usages"""
|
|
107
|
-
if not safe_mode:
|
|
108
|
-
usages = [
|
|
109
|
-
extract_asset(usage, TableauAsset.USAGE)
|
|
110
|
-
for usage in TSC.Pager(self._server.views, usage=True)
|
|
111
|
-
]
|
|
112
|
-
|
|
113
|
-
return compute_usage_views(usages)
|
|
114
|
-
|
|
115
|
-
return safe_mode_fetch_usage(self)
|
|
116
|
-
|
|
117
|
-
def _fetch_projects(self) -> SerializedAsset:
|
|
118
|
-
"""Fetches list of Projects"""
|
|
119
|
-
return compute_project_path(
|
|
120
|
-
[
|
|
121
|
-
extract_asset(project, TableauAsset.PROJECT)
|
|
122
|
-
for project in TSC.Pager(self._server.projects)
|
|
123
|
-
],
|
|
124
|
-
)
|
|
125
|
-
|
|
126
|
-
def _fetch_workbooks_to_datasource(self) -> SerializedAsset:
|
|
127
|
-
"""Fetches workbooks to datasource"""
|
|
128
|
-
|
|
129
|
-
return self._fetch_paginated_objects(
|
|
130
|
-
TableauAsset.WORKBOOK_TO_DATASOURCE,
|
|
131
|
-
)
|
|
132
|
-
|
|
133
|
-
def _fetch_published_datasources(self) -> SerializedAsset:
|
|
134
|
-
"""Fetches list of published datasources"""
|
|
135
|
-
|
|
136
|
-
return [
|
|
137
|
-
extract_asset(datasource, TableauAsset.PUBLISHED_DATASOURCE)
|
|
138
|
-
for datasource in TSC.Pager(self._server.datasources)
|
|
139
|
-
]
|
|
140
|
-
|
|
141
|
-
def _fetch_datasources(self) -> SerializedAsset:
|
|
142
|
-
"""Fetches both embedded and published datasource"""
|
|
143
|
-
|
|
144
|
-
return self._fetch_paginated_objects(
|
|
145
|
-
TableauAsset.DATASOURCE,
|
|
146
|
-
)
|
|
147
|
-
|
|
148
|
-
def _fetch_fields(self) -> SerializedAsset:
|
|
149
|
-
"""Fetches fields"""
|
|
150
|
-
return self._fetch_paginated_objects(
|
|
151
|
-
TableauAsset.FIELD,
|
|
152
|
-
)
|
|
153
|
-
|
|
154
|
-
def _fetch_custom_sql_queries(self) -> SerializedAsset:
|
|
155
|
-
"""Fetches custom sql queries"""
|
|
156
|
-
|
|
157
|
-
return self._fetch_paginated_objects(
|
|
158
|
-
TableauAsset.CUSTOM_SQL_QUERY,
|
|
159
|
-
)
|
|
160
|
-
|
|
161
|
-
def _fetch_custom_sql_tables(self) -> SerializedAsset:
|
|
162
|
-
"""Fetches custom sql tables"""
|
|
163
|
-
|
|
164
|
-
return self._fetch_paginated_objects(
|
|
165
|
-
TableauAsset.CUSTOM_SQL_TABLE,
|
|
166
|
-
)
|
|
167
|
-
|
|
168
|
-
def _fetch_dashboards(self) -> SerializedAsset:
|
|
169
|
-
"""Fetches dashboards"""
|
|
170
|
-
|
|
171
|
-
return self._fetch_paginated_objects(
|
|
172
|
-
TableauAsset.DASHBOARD,
|
|
173
|
-
)
|
|
174
|
-
|
|
175
|
-
def _fetch_sheets(self) -> SerializedAsset:
|
|
176
|
-
"""Fetches sheets"""
|
|
177
|
-
|
|
178
|
-
return self._fetch_paginated_objects(
|
|
179
|
-
TableauAsset.SHEET,
|
|
180
|
-
)
|
|
181
|
-
|
|
182
|
-
def _fetch_paginated_objects(self, asset: TableauAsset) -> SerializedAsset:
|
|
183
|
-
"""Fetches paginated objects"""
|
|
184
|
-
|
|
185
|
-
return get_paginated_objects(self._server, asset, self._page_size)
|
|
186
|
-
|
|
187
|
-
def fetch(self, asset: TableauAsset) -> SerializedAsset:
|
|
188
|
-
"""Fetches the given asset"""
|
|
189
|
-
logger.info(f"Fetching {asset.name}")
|
|
190
|
-
|
|
191
|
-
if asset == TableauAsset.CUSTOM_SQL_QUERY:
|
|
192
|
-
assets = self._fetch_custom_sql_queries()
|
|
193
|
-
|
|
194
|
-
if asset == TableauAsset.CUSTOM_SQL_TABLE:
|
|
195
|
-
assets = self._fetch_custom_sql_tables()
|
|
196
|
-
|
|
197
|
-
if asset == TableauAsset.DASHBOARD:
|
|
198
|
-
assets = self._fetch_dashboards()
|
|
199
|
-
|
|
200
|
-
if asset == TableauAsset.DATASOURCE:
|
|
201
|
-
assets = self._fetch_datasources()
|
|
202
|
-
|
|
203
|
-
if asset == TableauAsset.FIELD:
|
|
204
|
-
assets = self._fetch_fields()
|
|
205
|
-
|
|
206
|
-
if asset == TableauAsset.PROJECT:
|
|
207
|
-
assets = self._fetch_projects()
|
|
208
|
-
|
|
209
|
-
if asset == TableauAsset.PUBLISHED_DATASOURCE:
|
|
210
|
-
assets = self._fetch_published_datasources()
|
|
211
|
-
|
|
212
|
-
if asset == TableauAsset.SHEET:
|
|
213
|
-
assets = self._fetch_sheets()
|
|
214
|
-
|
|
215
|
-
if asset == TableauAsset.USAGE:
|
|
216
|
-
assets = self._fetch_usages(self._safe_mode)
|
|
217
|
-
|
|
218
|
-
if asset == TableauAsset.USER:
|
|
219
|
-
assets = self._fetch_users()
|
|
220
|
-
|
|
221
|
-
if asset == TableauAsset.WORKBOOK:
|
|
222
|
-
assets = self._fetch_workbooks()
|
|
223
|
-
|
|
224
|
-
if asset == TableauAsset.WORKBOOK_TO_DATASOURCE:
|
|
225
|
-
assets = self._fetch_workbooks_to_datasource()
|
|
226
|
-
|
|
227
|
-
logger.info(f"Fetched {asset.name} ({len(assets)} results)")
|
|
228
|
-
|
|
229
|
-
return assets
|
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
from collections.abc import Iterator
|
|
2
|
-
from typing import Optional
|
|
3
|
-
|
|
4
|
-
from ....utils import SerializedAsset
|
|
5
|
-
from ..assets import TableauAsset
|
|
6
|
-
from ..gql_fields import QUERY_FIELDS
|
|
7
|
-
from ..tsc_fields import TSC_FIELDS
|
|
8
|
-
|
|
9
|
-
QUERY_TEMPLATE = """
|
|
10
|
-
{{
|
|
11
|
-
{object_type}Connection(first: {page_size}, after: AFTER_TOKEN_SIGNAL) {{
|
|
12
|
-
nodes {{ {query_fields}
|
|
13
|
-
}}
|
|
14
|
-
pageInfo {{
|
|
15
|
-
hasNextPage
|
|
16
|
-
endCursor
|
|
17
|
-
}}
|
|
18
|
-
totalCount
|
|
19
|
-
}}
|
|
20
|
-
}}
|
|
21
|
-
"""
|
|
22
|
-
|
|
23
|
-
RESOURCE_TEMPLATE = "{resource}Connection"
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def get_paginated_objects(
|
|
27
|
-
server,
|
|
28
|
-
asset: TableauAsset,
|
|
29
|
-
page_size: int,
|
|
30
|
-
) -> SerializedAsset:
|
|
31
|
-
assets: SerializedAsset = []
|
|
32
|
-
for query in QUERY_FIELDS[asset]:
|
|
33
|
-
fields = query["fields"].value
|
|
34
|
-
object_type = query["object_type"].value
|
|
35
|
-
query_formatted = QUERY_TEMPLATE.format(
|
|
36
|
-
object_type=object_type,
|
|
37
|
-
page_size=page_size,
|
|
38
|
-
query_fields=fields,
|
|
39
|
-
)
|
|
40
|
-
resource = RESOURCE_TEMPLATE.format(resource=object_type)
|
|
41
|
-
result_pages = query_scroll(server, query_formatted, resource)
|
|
42
|
-
queried_assets = [asset for page in result_pages for asset in page]
|
|
43
|
-
assets.extend(queried_assets)
|
|
44
|
-
return assets
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def query_scroll(
|
|
48
|
-
server,
|
|
49
|
-
query: str,
|
|
50
|
-
resource: str,
|
|
51
|
-
) -> Iterator[SerializedAsset]:
|
|
52
|
-
"""build a tableau query iterator handling pagination and cursor"""
|
|
53
|
-
|
|
54
|
-
def _call(cursor: Optional[str]) -> dict:
|
|
55
|
-
# If cursor is defined it must be quoted else use null token
|
|
56
|
-
token = "null" if cursor is None else f'"{cursor}"'
|
|
57
|
-
query_ = query.replace("AFTER_TOKEN_SIGNAL", token)
|
|
58
|
-
|
|
59
|
-
return server.metadata.query(query_)["data"][resource]
|
|
60
|
-
|
|
61
|
-
cursor = None
|
|
62
|
-
while True:
|
|
63
|
-
payload = _call(cursor)
|
|
64
|
-
yield payload["nodes"]
|
|
65
|
-
|
|
66
|
-
page_info = payload["pageInfo"]
|
|
67
|
-
if page_info["hasNextPage"]:
|
|
68
|
-
cursor = page_info["endCursor"]
|
|
69
|
-
else:
|
|
70
|
-
break
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
def extract_asset(asset: dict, asset_type: TableauAsset) -> dict:
|
|
74
|
-
"""Agnostic function extracting dedicated attributes with define asset"""
|
|
75
|
-
return {key: getattr(asset, key) for key in TSC_FIELDS[asset_type]}
|
|
@@ -1,104 +0,0 @@
|
|
|
1
|
-
from enum import Enum
|
|
2
|
-
from typing import Optional
|
|
3
|
-
|
|
4
|
-
from ....utils import from_env
|
|
5
|
-
|
|
6
|
-
AUTH_ERROR_MSG = "Need either user and password or token_name and token"
|
|
7
|
-
|
|
8
|
-
# https://tableau.github.io/server-client-python/docs/api-ref#authentication
|
|
9
|
-
DEFAULT_SERVER_SITE_ID = ""
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class CredentialsKey(Enum):
|
|
13
|
-
"""Value enum object for the credentials"""
|
|
14
|
-
|
|
15
|
-
TABLEAU_USER = "user"
|
|
16
|
-
TABLEAU_PASSWORD = "password" # noqa: S105
|
|
17
|
-
TABLEAU_TOKEN_NAME = "token_name" # noqa: S105
|
|
18
|
-
TABLEAU_TOKEN = "token" # noqa: S105
|
|
19
|
-
TABLEAU_SITE_ID = "site_id"
|
|
20
|
-
TABLEAU_SERVER_URL = "server_url"
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
CREDENTIALS_ENV: dict[CredentialsKey, str] = {
|
|
24
|
-
CredentialsKey.TABLEAU_USER: "CASTOR_TABLEAU_USER",
|
|
25
|
-
CredentialsKey.TABLEAU_PASSWORD: "CASTOR_TABLEAU_PASSWORD",
|
|
26
|
-
CredentialsKey.TABLEAU_TOKEN_NAME: "CASTOR_TABLEAU_TOKEN_NAME",
|
|
27
|
-
CredentialsKey.TABLEAU_TOKEN: "CASTOR_TABLEAU_TOKEN",
|
|
28
|
-
CredentialsKey.TABLEAU_SITE_ID: "CASTOR_TABLEAU_SITE_ID",
|
|
29
|
-
CredentialsKey.TABLEAU_SERVER_URL: "CASTOR_TABLEAU_SERVER_URL",
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def get_value(
|
|
34
|
-
key: CredentialsKey,
|
|
35
|
-
kwargs: dict,
|
|
36
|
-
optional: bool = False,
|
|
37
|
-
) -> Optional[str]:
|
|
38
|
-
"""
|
|
39
|
-
Returns the value of the given key:
|
|
40
|
-
- from kwargs in priority
|
|
41
|
-
- from ENV if not provided (raises an error if not found in ENV)
|
|
42
|
-
"""
|
|
43
|
-
env_key = CREDENTIALS_ENV[key]
|
|
44
|
-
|
|
45
|
-
return kwargs.get(key.value) or from_env(env_key, optional)
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
class CredentialsApi:
|
|
49
|
-
"""ValueObject for the credentials"""
|
|
50
|
-
|
|
51
|
-
def __init__(
|
|
52
|
-
self,
|
|
53
|
-
*,
|
|
54
|
-
server_url: str,
|
|
55
|
-
site_id: Optional[str],
|
|
56
|
-
user: Optional[str],
|
|
57
|
-
password: Optional[str],
|
|
58
|
-
token_name: Optional[str],
|
|
59
|
-
token: Optional[str],
|
|
60
|
-
):
|
|
61
|
-
credentials = self._get_credentials(user, password, token_name, token)
|
|
62
|
-
|
|
63
|
-
self.user = credentials.get(CredentialsKey.TABLEAU_USER)
|
|
64
|
-
self.site_id = site_id if site_id else DEFAULT_SERVER_SITE_ID
|
|
65
|
-
self.server_url = server_url
|
|
66
|
-
self.password = credentials.get(CredentialsKey.TABLEAU_PASSWORD)
|
|
67
|
-
self.token_name = credentials.get(CredentialsKey.TABLEAU_TOKEN_NAME)
|
|
68
|
-
self.token = credentials.get(CredentialsKey.TABLEAU_TOKEN)
|
|
69
|
-
|
|
70
|
-
@staticmethod
|
|
71
|
-
def _get_credentials(
|
|
72
|
-
user: Optional[str],
|
|
73
|
-
password: Optional[str],
|
|
74
|
-
token_name: Optional[str],
|
|
75
|
-
token: Optional[str],
|
|
76
|
-
) -> dict:
|
|
77
|
-
"""Helpers to retrieve credentials,
|
|
78
|
-
if both are given choose user and password authentication method"""
|
|
79
|
-
assert (user and password) or (token_name and token), AUTH_ERROR_MSG
|
|
80
|
-
|
|
81
|
-
if user and password:
|
|
82
|
-
return {
|
|
83
|
-
CredentialsKey.TABLEAU_USER: user,
|
|
84
|
-
CredentialsKey.TABLEAU_PASSWORD: password,
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
return {
|
|
88
|
-
CredentialsKey.TABLEAU_TOKEN_NAME: token_name,
|
|
89
|
-
CredentialsKey.TABLEAU_TOKEN: token,
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
def to_dict(self, hide: bool = False) -> dict[str, str]:
|
|
93
|
-
safe = (
|
|
94
|
-
CredentialsKey.TABLEAU_USER,
|
|
95
|
-
CredentialsKey.TABLEAU_SITE_ID,
|
|
96
|
-
CredentialsKey.TABLEAU_SERVER_URL,
|
|
97
|
-
CredentialsKey.TABLEAU_TOKEN_NAME,
|
|
98
|
-
)
|
|
99
|
-
unsafe = (CredentialsKey.TABLEAU_PASSWORD, CredentialsKey.TABLEAU_TOKEN)
|
|
100
|
-
|
|
101
|
-
def val(k: CredentialsKey, v: str) -> str:
|
|
102
|
-
return "*" + v[-4:] if hide and k in unsafe else v
|
|
103
|
-
|
|
104
|
-
return {a.value: val(a, getattr(self, a.value)) for a in safe + unsafe}
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
|
|
3
|
-
from ....utils import SerializedAsset
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _folder_path(
|
|
7
|
-
projects: SerializedAsset,
|
|
8
|
-
project: dict,
|
|
9
|
-
root: Optional[str] = "",
|
|
10
|
-
) -> str:
|
|
11
|
-
"""Recursive function to compute folder path with list of projects"""
|
|
12
|
-
path = "/" + str(project["name"]) + (root or "")
|
|
13
|
-
if project["parent_id"] is None:
|
|
14
|
-
return path
|
|
15
|
-
|
|
16
|
-
parent_project = next(
|
|
17
|
-
parent_project
|
|
18
|
-
for parent_project in projects
|
|
19
|
-
if parent_project["id"] == project["parent_id"]
|
|
20
|
-
)
|
|
21
|
-
return _folder_path(projects, parent_project, path)
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def compute_project_path(projects: SerializedAsset) -> SerializedAsset:
|
|
25
|
-
"""Compute folder path with parent project name"""
|
|
26
|
-
for project in projects:
|
|
27
|
-
project["folder_path"] = _folder_path(projects, project)
|
|
28
|
-
return projects
|
|
@@ -1,70 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
|
|
3
|
-
import tableauserverclient as TSC # type: ignore
|
|
4
|
-
|
|
5
|
-
from ....utils import SerializedAsset
|
|
6
|
-
from ..assets import TableauAsset
|
|
7
|
-
from ..constants import SAFE_MODE_PAGE_SIZE
|
|
8
|
-
from ..errors import TableauErrorCode
|
|
9
|
-
from ..types import PageReturn, ServerResponseError
|
|
10
|
-
from ..usage import compute_usage_views
|
|
11
|
-
from .client_utils import extract_asset
|
|
12
|
-
|
|
13
|
-
logger = logging.getLogger(__name__)
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def _paginated_option(page_number: int) -> TSC.RequestOptions:
|
|
17
|
-
"""Set up the Paginated option for TSC.RequestOptions"""
|
|
18
|
-
return TSC.RequestOptions(
|
|
19
|
-
pagesize=SAFE_MODE_PAGE_SIZE,
|
|
20
|
-
pagenumber=page_number,
|
|
21
|
-
)
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def _next_usage_page(client, page_number: int) -> PageReturn:
|
|
25
|
-
"""
|
|
26
|
-
Request views per page
|
|
27
|
-
return Usages | ServerResponseError | TableauErrorCode
|
|
28
|
-
"""
|
|
29
|
-
options = _paginated_option(page_number)
|
|
30
|
-
try:
|
|
31
|
-
all_usages_items, _ = client._server.views.get(options, usage=True)
|
|
32
|
-
return all_usages_items, None
|
|
33
|
-
|
|
34
|
-
except ServerResponseError as error:
|
|
35
|
-
expected = TableauErrorCode.PAGE_NUMBER_NOT_FOUND
|
|
36
|
-
if error.code == expected.value:
|
|
37
|
-
return None, expected
|
|
38
|
-
raise error
|
|
39
|
-
|
|
40
|
-
except ServerResponseError as error:
|
|
41
|
-
return None, error
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def safe_mode_fetch_usage(client) -> SerializedAsset:
|
|
45
|
-
"""
|
|
46
|
-
Iterate throught each page
|
|
47
|
-
Returns computed usages when page number is not found
|
|
48
|
-
Log errors if ServerResponseError is return
|
|
49
|
-
"""
|
|
50
|
-
list_usages: list[dict] = []
|
|
51
|
-
page_number: int = 0
|
|
52
|
-
|
|
53
|
-
while True:
|
|
54
|
-
page_number += 1
|
|
55
|
-
usages, error = _next_usage_page(client, page_number)
|
|
56
|
-
if error == TableauErrorCode.PAGE_NUMBER_NOT_FOUND:
|
|
57
|
-
return compute_usage_views(list_usages)
|
|
58
|
-
|
|
59
|
-
if error:
|
|
60
|
-
logger.warning(error)
|
|
61
|
-
client.errors.append(str(error))
|
|
62
|
-
continue
|
|
63
|
-
|
|
64
|
-
if not usages:
|
|
65
|
-
continue
|
|
66
|
-
|
|
67
|
-
new_usages = [
|
|
68
|
-
extract_asset(usage, TableauAsset.USAGE) for usage in usages
|
|
69
|
-
]
|
|
70
|
-
list_usages.extend(new_usages)
|
|
@@ -1,121 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
from collections.abc import Iterable
|
|
3
|
-
|
|
4
|
-
from ...utils import (
|
|
5
|
-
OUTPUT_DIR,
|
|
6
|
-
current_timestamp,
|
|
7
|
-
deep_serialize,
|
|
8
|
-
from_env,
|
|
9
|
-
get_output_filename,
|
|
10
|
-
write_errors_logs,
|
|
11
|
-
write_json,
|
|
12
|
-
write_summary,
|
|
13
|
-
)
|
|
14
|
-
from .assets import TableauAsset
|
|
15
|
-
from .client import ApiClient as Client
|
|
16
|
-
|
|
17
|
-
logger = logging.getLogger(__name__)
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def iterate_all_data(
|
|
21
|
-
client: Client,
|
|
22
|
-
) -> Iterable[tuple[TableauAsset, list]]:
|
|
23
|
-
"""Iterate over the extracted Data from Tableau"""
|
|
24
|
-
|
|
25
|
-
logger.info("Extracting USER from Tableau API")
|
|
26
|
-
yield TableauAsset.USER, deep_serialize(client.fetch(TableauAsset.USER))
|
|
27
|
-
|
|
28
|
-
logger.info("Extracting WORKBOOK from Tableau API")
|
|
29
|
-
yield (
|
|
30
|
-
TableauAsset.WORKBOOK,
|
|
31
|
-
deep_serialize(
|
|
32
|
-
client.fetch(TableauAsset.WORKBOOK),
|
|
33
|
-
),
|
|
34
|
-
)
|
|
35
|
-
|
|
36
|
-
logger.info("Extracting DASHBOARD from Tableau API")
|
|
37
|
-
yield (
|
|
38
|
-
TableauAsset.DASHBOARD,
|
|
39
|
-
deep_serialize(
|
|
40
|
-
client.fetch(TableauAsset.DASHBOARD),
|
|
41
|
-
),
|
|
42
|
-
)
|
|
43
|
-
|
|
44
|
-
logger.info("Extracting PUBLISHED DATASOURCE from Tableau API")
|
|
45
|
-
yield (
|
|
46
|
-
TableauAsset.PUBLISHED_DATASOURCE,
|
|
47
|
-
deep_serialize(
|
|
48
|
-
client.fetch(TableauAsset.PUBLISHED_DATASOURCE),
|
|
49
|
-
),
|
|
50
|
-
)
|
|
51
|
-
|
|
52
|
-
logger.info("Extracting PROJECT from Tableau API")
|
|
53
|
-
yield (
|
|
54
|
-
TableauAsset.PROJECT,
|
|
55
|
-
deep_serialize(
|
|
56
|
-
client.fetch(TableauAsset.PROJECT),
|
|
57
|
-
),
|
|
58
|
-
)
|
|
59
|
-
|
|
60
|
-
logger.info("Extracting USAGE from Tableau API")
|
|
61
|
-
yield TableauAsset.USAGE, deep_serialize(client.fetch(TableauAsset.USAGE))
|
|
62
|
-
|
|
63
|
-
logger.info("Extracting WORKBOOK_TO_DATASOURCE from Tableau API")
|
|
64
|
-
yield (
|
|
65
|
-
TableauAsset.WORKBOOK_TO_DATASOURCE,
|
|
66
|
-
deep_serialize(
|
|
67
|
-
client.fetch(TableauAsset.WORKBOOK_TO_DATASOURCE),
|
|
68
|
-
),
|
|
69
|
-
)
|
|
70
|
-
|
|
71
|
-
logger.info("Extracting DATASOURCE from Tableau API")
|
|
72
|
-
yield (
|
|
73
|
-
TableauAsset.DATASOURCE,
|
|
74
|
-
deep_serialize(
|
|
75
|
-
client.fetch(TableauAsset.DATASOURCE),
|
|
76
|
-
),
|
|
77
|
-
)
|
|
78
|
-
|
|
79
|
-
logger.info("Extracting CUSTOM_SQL_TABLE from Tableau API")
|
|
80
|
-
yield (
|
|
81
|
-
TableauAsset.CUSTOM_SQL_TABLE,
|
|
82
|
-
deep_serialize(
|
|
83
|
-
client.fetch(TableauAsset.CUSTOM_SQL_TABLE),
|
|
84
|
-
),
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
logger.info("Extracting CUSTOM_SQL_QUERY from Tableau API")
|
|
88
|
-
yield (
|
|
89
|
-
TableauAsset.CUSTOM_SQL_QUERY,
|
|
90
|
-
deep_serialize(
|
|
91
|
-
client.fetch(TableauAsset.CUSTOM_SQL_QUERY),
|
|
92
|
-
),
|
|
93
|
-
)
|
|
94
|
-
|
|
95
|
-
logger.info("Extracting FIELD from Tableau API")
|
|
96
|
-
yield TableauAsset.FIELD, deep_serialize(client.fetch(TableauAsset.FIELD))
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
def extract_all(client: Client, **kwargs: str) -> None:
|
|
100
|
-
"""
|
|
101
|
-
Extract Data from tableau
|
|
102
|
-
Store data locally in files under the output_directory
|
|
103
|
-
If errors from Tableau's API are catch store them locally in file under the output_directory
|
|
104
|
-
"""
|
|
105
|
-
output_directory = kwargs.get("output_directory") or from_env(OUTPUT_DIR)
|
|
106
|
-
|
|
107
|
-
timestamp = current_timestamp()
|
|
108
|
-
|
|
109
|
-
for key, data in iterate_all_data(client):
|
|
110
|
-
filename = get_output_filename(key.value, output_directory, timestamp)
|
|
111
|
-
write_json(filename, data)
|
|
112
|
-
|
|
113
|
-
write_summary(
|
|
114
|
-
output_directory,
|
|
115
|
-
timestamp,
|
|
116
|
-
base_url=client.base_url(),
|
|
117
|
-
client_name=client.name(),
|
|
118
|
-
)
|
|
119
|
-
|
|
120
|
-
if client.errors:
|
|
121
|
-
write_errors_logs(output_directory, timestamp, client.errors)
|