castor-extractor 0.19.0__py3-none-any.whl → 0.19.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

Files changed (83) hide show
  1. CHANGELOG.md +29 -2
  2. castor_extractor/file_checker/templates/generic_warehouse.py +1 -1
  3. castor_extractor/knowledge/notion/client/client.py +44 -80
  4. castor_extractor/knowledge/notion/client/client_test.py +9 -4
  5. castor_extractor/knowledge/notion/client/constants.py +1 -0
  6. castor_extractor/knowledge/notion/client/endpoints.py +1 -1
  7. castor_extractor/knowledge/notion/client/pagination.py +9 -5
  8. castor_extractor/quality/soda/assets.py +1 -1
  9. castor_extractor/quality/soda/client/client.py +30 -83
  10. castor_extractor/quality/soda/client/credentials.py +0 -11
  11. castor_extractor/quality/soda/client/endpoints.py +3 -6
  12. castor_extractor/quality/soda/client/pagination.py +25 -0
  13. castor_extractor/utils/__init__.py +13 -2
  14. castor_extractor/utils/client/__init__.py +14 -0
  15. castor_extractor/utils/client/api/__init__.py +5 -0
  16. castor_extractor/utils/client/api/auth.py +76 -0
  17. castor_extractor/utils/client/api/auth_test.py +49 -0
  18. castor_extractor/utils/client/api/client.py +153 -0
  19. castor_extractor/utils/client/api/client_test.py +47 -0
  20. castor_extractor/utils/client/api/pagination.py +83 -0
  21. castor_extractor/utils/client/api/pagination_test.py +51 -0
  22. castor_extractor/utils/{safe_request_test.py → client/api/safe_request_test.py} +4 -1
  23. castor_extractor/utils/client/api/utils.py +9 -0
  24. castor_extractor/utils/client/api/utils_test.py +16 -0
  25. castor_extractor/utils/collection.py +34 -2
  26. castor_extractor/utils/collection_test.py +17 -3
  27. castor_extractor/utils/pager/__init__.py +0 -1
  28. castor_extractor/utils/retry.py +44 -0
  29. castor_extractor/utils/retry_test.py +26 -1
  30. castor_extractor/utils/salesforce/client.py +44 -49
  31. castor_extractor/utils/salesforce/client_test.py +2 -2
  32. castor_extractor/utils/salesforce/pagination.py +33 -0
  33. castor_extractor/visualization/domo/client/client.py +10 -5
  34. castor_extractor/visualization/domo/client/credentials.py +1 -1
  35. castor_extractor/visualization/domo/client/endpoints.py +19 -7
  36. castor_extractor/visualization/looker/api/credentials.py +1 -1
  37. castor_extractor/visualization/metabase/client/api/client.py +26 -11
  38. castor_extractor/visualization/metabase/client/api/credentials.py +1 -1
  39. castor_extractor/visualization/metabase/client/db/credentials.py +1 -1
  40. castor_extractor/visualization/mode/client/credentials.py +1 -1
  41. castor_extractor/visualization/qlik/client/engine/credentials.py +1 -1
  42. castor_extractor/visualization/salesforce_reporting/client/rest.py +4 -3
  43. castor_extractor/visualization/sigma/client/client.py +106 -111
  44. castor_extractor/visualization/sigma/client/credentials.py +11 -1
  45. castor_extractor/visualization/sigma/client/endpoints.py +1 -1
  46. castor_extractor/visualization/sigma/client/pagination.py +22 -18
  47. castor_extractor/visualization/tableau/tests/unit/rest_api/auth_test.py +0 -1
  48. castor_extractor/visualization/tableau/tests/unit/rest_api/credentials_test.py +0 -3
  49. castor_extractor/visualization/tableau_revamp/assets.py +11 -0
  50. castor_extractor/visualization/tableau_revamp/client/client.py +71 -151
  51. castor_extractor/visualization/tableau_revamp/client/client_metadata_api.py +95 -0
  52. castor_extractor/visualization/tableau_revamp/client/client_rest_api.py +128 -0
  53. castor_extractor/visualization/tableau_revamp/client/client_tsc.py +66 -0
  54. castor_extractor/visualization/tableau_revamp/client/{tsc_fields.py → rest_fields.py} +15 -2
  55. castor_extractor/visualization/tableau_revamp/constants.py +0 -2
  56. castor_extractor/visualization/tableau_revamp/extract.py +5 -11
  57. castor_extractor/warehouse/databricks/api_client.py +239 -0
  58. castor_extractor/warehouse/databricks/api_client_test.py +15 -0
  59. castor_extractor/warehouse/databricks/client.py +37 -490
  60. castor_extractor/warehouse/databricks/client_test.py +1 -99
  61. castor_extractor/warehouse/databricks/endpoints.py +28 -0
  62. castor_extractor/warehouse/databricks/lineage.py +141 -0
  63. castor_extractor/warehouse/databricks/lineage_test.py +34 -0
  64. castor_extractor/warehouse/databricks/pagination.py +22 -0
  65. castor_extractor/warehouse/databricks/sql_client.py +90 -0
  66. castor_extractor/warehouse/databricks/utils.py +44 -1
  67. castor_extractor/warehouse/databricks/utils_test.py +58 -1
  68. castor_extractor/warehouse/mysql/client.py +0 -2
  69. castor_extractor/warehouse/salesforce/client.py +12 -59
  70. castor_extractor/warehouse/salesforce/pagination.py +34 -0
  71. castor_extractor/warehouse/sqlserver/client.py +0 -1
  72. castor_extractor-0.19.6.dist-info/METADATA +903 -0
  73. {castor_extractor-0.19.0.dist-info → castor_extractor-0.19.6.dist-info}/RECORD +77 -60
  74. castor_extractor/utils/client/api.py +0 -87
  75. castor_extractor/utils/client/api_test.py +0 -24
  76. castor_extractor/utils/pager/pager_on_token.py +0 -52
  77. castor_extractor/utils/pager/pager_on_token_test.py +0 -73
  78. castor_extractor/visualization/sigma/client/client_test.py +0 -54
  79. castor_extractor-0.19.0.dist-info/METADATA +0 -207
  80. /castor_extractor/utils/{safe_request.py → client/api/safe_request.py} +0 -0
  81. {castor_extractor-0.19.0.dist-info → castor_extractor-0.19.6.dist-info}/LICENCE +0 -0
  82. {castor_extractor-0.19.0.dist-info → castor_extractor-0.19.6.dist-info}/WHEEL +0 -0
  83. {castor_extractor-0.19.0.dist-info → castor_extractor-0.19.6.dist-info}/entry_points.txt +0 -0
@@ -1,4 +1,6 @@
1
- from pydantic import Field, SecretStr
1
+ from typing import Dict
2
+
3
+ from pydantic import Field
2
4
  from pydantic_settings import BaseSettings, SettingsConfigDict
3
5
 
4
6
  CASTOR_ENV_PREFIX = "CASTOR_SIGMA_"
@@ -17,3 +19,11 @@ class SigmaCredentials(BaseSettings):
17
19
  client_id: str
18
20
  host: str
19
21
  grant_type: str = "client_credentials"
22
+
23
+ @property
24
+ def token_payload(self) -> Dict[str, str]:
25
+ return {
26
+ "grant_type": self.grant_type,
27
+ "client_id": self.client_id,
28
+ "client_secret": self.api_token,
29
+ }
@@ -1,4 +1,4 @@
1
- class EndpointFactory:
1
+ class SigmaEndpointFactory:
2
2
  """Wrapper class around all endpoints we're using"""
3
3
 
4
4
  DATASETS = "datasets"
@@ -1,24 +1,28 @@
1
- import logging
2
1
  from typing import Optional
3
2
 
4
- logger = logging.getLogger(__name__)
3
+ from pydantic import ConfigDict, Field
4
+ from pydantic.alias_generators import to_camel
5
5
 
6
+ from ....utils import PaginationModel
6
7
 
7
- class Pagination:
8
- """This is a wrapper around Sigma's pagination system"""
8
+ SIGMA_API_LIMIT = 500 # default number of records per page
9
9
 
10
- def __init__(
11
- self,
12
- next_page: Optional[str],
13
- entries: Optional[list] = None,
14
- total: Optional[int] = 0,
15
- ):
16
- self.next_page = next_page
17
- self.entries = entries or []
18
- self.total = total
19
10
 
20
- def generate_url(self, endpoint_url: str) -> str:
21
- """Generates the paginated url based on the targeted endpoint"""
22
- pagination = f"?page={self.next_page}"
23
- paginated_url = f"{endpoint_url}{pagination}"
24
- return paginated_url
11
+ class SigmaPagination(PaginationModel):
12
+ next_page: Optional[str] = "0"
13
+ entries: list = Field(default_factory=list)
14
+
15
+ model_config = ConfigDict(
16
+ alias_generator=to_camel,
17
+ populate_by_name=True,
18
+ from_attributes=True,
19
+ )
20
+
21
+ def is_last(self) -> bool:
22
+ return self.next_page is None
23
+
24
+ def next_page_payload(self) -> dict:
25
+ return {"page": self.next_page}
26
+
27
+ def page_results(self) -> list:
28
+ return self.entries
@@ -3,7 +3,6 @@ import unittest
3
3
  from unittest import mock
4
4
 
5
5
  import requests_mock # type: ignore
6
- import tableauserverclient as TSC # type: ignore
7
6
 
8
7
  from ....client import ApiClient
9
8
  from ..utils import KEYS
@@ -1,6 +1,3 @@
1
- import requests_mock # type: ignore
2
- import tableauserverclient as TSC # type: ignore
3
-
4
1
  from ....client.credentials import CredentialsApi
5
2
 
6
3
 
@@ -10,9 +10,20 @@ class TableauRevampAsset(ExternalAsset):
10
10
  DASHBOARD = "dashboards"
11
11
  DATASOURCE = "datasources"
12
12
  FIELD = "fields"
13
+ METRIC = "metrics"
14
+ METRIC_DEFINITION = "metrics_definitions"
13
15
  PROJECT = "projects"
14
16
  SHEET = "sheets"
17
+ SUBSCRIPTION = "subscriptions"
15
18
  TABLE = "tables"
16
19
  USAGE = "usage"
17
20
  USER = "users"
18
21
  WORKBOOK = "workbooks"
22
+
23
+
24
+ # assets that are only available for clients using Tableau Pulse
25
+ TABLEAU_PULSE_ASSETS = (
26
+ TableauRevampAsset.METRIC,
27
+ TableauRevampAsset.METRIC_DEFINITION,
28
+ TableauRevampAsset.SUBSCRIPTION,
29
+ )
@@ -1,59 +1,39 @@
1
1
  import logging
2
- from typing import Dict, Iterator, List, Optional
3
2
 
4
3
  import tableauserverclient as TSC # type: ignore
5
- from tableauserverclient import Pager
6
4
 
7
5
  from ....utils import SerializedAsset
8
- from ..assets import TableauRevampAsset
9
- from ..constants import (
10
- DEFAULT_PAGE_SIZE,
11
- DEFAULT_TIMEOUT_SECONDS,
12
- TABLEAU_SERVER_VERSION,
13
- )
6
+ from ..assets import TABLEAU_PULSE_ASSETS, TableauRevampAsset
7
+ from ..constants import DEFAULT_TIMEOUT_SECONDS
8
+ from .client_metadata_api import TableauClientMetadataApi
9
+ from .client_rest_api import TableauClientRestApi
10
+ from .client_tsc import TableauClientTSC
14
11
  from .credentials import TableauRevampCredentials
15
- from .errors import TableauApiError
16
- from .gql_queries import FIELDS_QUERIES, GQL_QUERIES, QUERY_TEMPLATE
17
- from .tsc_fields import TSC_FIELDS
18
12
 
19
13
  logger = logging.getLogger(__name__)
20
14
 
21
- # these assets must be extracted via TableauServerClient
15
+ # these assets must be extracted via TableauServerClient (TSC)
22
16
  _TSC_ASSETS = (
23
- # only users who published content can be extracted from MetadataAPI
24
- TableauRevampAsset.USER,
25
17
  # projects are not available in Metadata API
26
18
  TableauRevampAsset.PROJECT,
27
19
  # view count are not available in Metadata API
28
20
  TableauRevampAsset.USAGE,
21
+ # only users who published content can be extracted from MetadataAPI
22
+ TableauRevampAsset.USER,
29
23
  )
30
24
 
31
- # increase the value when extraction is too slow
32
- # decrease the value when timeouts arise
33
- _CUSTOM_PAGE_SIZE: Dict[TableauRevampAsset, int] = {
34
- # for some clients, extraction of columns tend to hit the node limit
35
- # https://community.tableau.com/s/question/0D54T00000YuK60SAF/metadata-query-nodelimitexceeded-error
36
- # the workaround is to reduce pagination
37
- TableauRevampAsset.COLUMN: 50,
38
- # fields are light but volumes are bigger
39
- TableauRevampAsset.FIELD: 1000,
40
- TableauRevampAsset.TABLE: 50,
41
- }
42
-
43
-
44
- def _pick_fields(
45
- data: Pager,
46
- asset: TableauRevampAsset,
47
- ) -> SerializedAsset:
48
- fields = TSC_FIELDS[asset]
49
-
50
- def _pick(row: dict):
51
- return {field: getattr(row, field) for field in fields}
25
+ # these assets must be extracted via the REST API
26
+ _REST_API_ASSETS = (
27
+ # Tableau Pulse assets are only available in REST API
28
+ TableauRevampAsset.METRIC,
29
+ TableauRevampAsset.METRIC_DEFINITION,
30
+ TableauRevampAsset.SUBSCRIPTION,
31
+ )
52
32
 
53
- return [_pick(row) for row in data]
33
+ logging.getLogger("tableau.endpoint").setLevel(logging.WARNING)
54
34
 
55
35
 
56
- def _enrich_datasources_with_tsc(
36
+ def _merge_datasources(
57
37
  datasources: SerializedAsset,
58
38
  tsc_datasources: SerializedAsset,
59
39
  ) -> SerializedAsset:
@@ -71,14 +51,19 @@ def _enrich_datasources_with_tsc(
71
51
  # embedded datasources cannot be accessed via URL => no webpage_url
72
52
  continue
73
53
  luid = datasource["luid"]
74
- tsc_datasource = mapping[luid]
54
+ tsc_datasource = mapping.get(luid)
55
+ if not tsc_datasource:
56
+ # it happens that a datasource is in Metadata API but not in TSC
57
+ datasource["projectLuid"] = None
58
+ datasource["webpageUrl"] = None
59
+ continue
75
60
  datasource["projectLuid"] = tsc_datasource["project_id"]
76
61
  datasource["webpageUrl"] = tsc_datasource["webpage_url"]
77
62
 
78
63
  return datasources
79
64
 
80
65
 
81
- def _enrich_workbooks_with_tsc(
66
+ def _merge_workbooks(
82
67
  workbooks: SerializedAsset,
83
68
  tsc_workbooks: SerializedAsset,
84
69
  ) -> SerializedAsset:
@@ -104,61 +89,44 @@ def _enrich_workbooks_with_tsc(
104
89
  return workbooks
105
90
 
106
91
 
107
- def gql_query_scroll(
108
- server,
109
- query: str,
110
- resource: str,
111
- ) -> Iterator[SerializedAsset]:
112
- """Iterate over GQL query results, handling pagination and cursor"""
113
-
114
- def _call(cursor: Optional[str]) -> dict:
115
- # If cursor is defined it must be quoted else use null token
116
- token = "null" if cursor is None else f'"{cursor}"'
117
- query_ = query.replace("AFTER_TOKEN_SIGNAL", token)
118
- answer = server.metadata.query(query_)
119
- if "errors" in answer:
120
- raise TableauApiError(answer["errors"])
121
- return answer["data"][f"{resource}Connection"]
122
-
123
- cursor = None
124
- while True:
125
- payload = _call(cursor)
126
- yield payload["nodes"]
127
-
128
- page_info = payload["pageInfo"]
129
- if page_info["hasNextPage"]:
130
- cursor = page_info["endCursor"]
131
- else:
132
- break
92
+ def _server(
93
+ server_url: str,
94
+ timeout_sec: int,
95
+ ) -> TSC.Server:
96
+ options = {"verify": True, "timeout": timeout_sec}
97
+ server = TSC.Server(server_url, use_server_version=True)
98
+ server.add_http_options(options)
99
+ return server
133
100
 
134
101
 
135
102
  class TableauRevampClient:
136
103
  """
137
104
  Connect to Tableau's API and extract assets.
138
105
 
139
- Relies on TableauServerClient overlay:
106
+ Relies on TableauServerClient (TSC) overlay for authentication
140
107
  https://tableau.github.io/server-client-python/docs/
141
- - for connection
142
- - to extract Users (Metadata
143
-
144
- Calls the MetadataAPI, using graphQL
145
- https://help.tableau.com/current/api/metadata_api/en-us/reference/index.html
146
108
  """
147
109
 
148
110
  def __init__(
149
111
  self,
150
112
  credentials: TableauRevampCredentials,
151
113
  timeout_sec: int = DEFAULT_TIMEOUT_SECONDS,
114
+ with_pulse: bool = False,
152
115
  ):
153
116
  self._credentials = credentials
154
- self._server = TSC.Server(self._credentials.server_url)
155
- options = {"verify": True, "timeout": timeout_sec}
156
- self._server.add_http_options(options)
157
- self._server.version = TABLEAU_SERVER_VERSION
158
- self.errors: List[str] = []
159
-
160
- @staticmethod
161
- def name() -> str:
117
+ self._server = _server(credentials.server_url, timeout_sec)
118
+ self._with_pulse = with_pulse
119
+
120
+ self._client_metadata = TableauClientMetadataApi(server=self._server)
121
+ self._client_rest = TableauClientRestApi(server=self._server)
122
+ self._client_tsc = TableauClientTSC(server=self._server)
123
+
124
+ @property
125
+ def base_url(self) -> str:
126
+ return self._credentials.server_url
127
+
128
+ @property
129
+ def name(self) -> str:
162
130
  return "Tableau/API"
163
131
 
164
132
  def _user_password_login(self) -> None:
@@ -203,98 +171,50 @@ class TableauRevampClient:
203
171
  "Invalid credentials: either user/password or PAT must be provided",
204
172
  )
205
173
 
206
- def base_url(self) -> str:
207
- return self._credentials.server_url
208
-
209
- def _fetch_from_tsc(
210
- self,
211
- asset: TableauRevampAsset,
212
- ) -> SerializedAsset:
213
- if asset == TableauRevampAsset.DATASOURCE:
214
- data = TSC.Pager(self._server.datasources)
215
-
216
- elif asset == TableauRevampAsset.PROJECT:
217
- data = TSC.Pager(self._server.projects)
218
-
219
- elif asset == TableauRevampAsset.USAGE:
220
- data = TSC.Pager(self._server.views, usage=True)
221
-
222
- elif asset == TableauRevampAsset.USER:
223
- data = TSC.Pager(self._server.users)
224
-
225
- elif asset == TableauRevampAsset.WORKBOOK:
226
- data = TSC.Pager(self._server.workbooks)
227
-
228
- else:
229
- raise AssertionError(f"Fetching from TSC not supported for {asset}")
230
-
231
- return _pick_fields(data, asset)
232
-
233
- def _run_graphql_query(
234
- self,
235
- resource: str,
236
- fields: str,
237
- page_size: int = DEFAULT_PAGE_SIZE,
238
- ) -> SerializedAsset:
239
- query = QUERY_TEMPLATE.format(
240
- resource=resource,
241
- fields=fields,
242
- page_size=page_size,
243
- )
244
- result_pages = gql_query_scroll(self._server, query, resource)
245
- return [asset for page in result_pages for asset in page]
246
-
247
- def _fetch_fields(self) -> SerializedAsset:
248
- result: SerializedAsset = []
249
- page_size = _CUSTOM_PAGE_SIZE[TableauRevampAsset.FIELD]
250
- for resource, fields in FIELDS_QUERIES:
251
- current = self._run_graphql_query(resource, fields, page_size)
252
- result.extend(current)
253
- return result
254
-
255
- def _fetch_from_metadata_api(
256
- self,
257
- asset: TableauRevampAsset,
258
- ) -> SerializedAsset:
259
- if asset == TableauRevampAsset.FIELD:
260
- return self._fetch_fields()
261
-
262
- page_size = _CUSTOM_PAGE_SIZE.get(asset) or DEFAULT_PAGE_SIZE
263
- resource, fields = GQL_QUERIES[asset]
264
- return self._run_graphql_query(resource, fields, page_size)
265
-
266
174
  def _fetch_datasources(self) -> SerializedAsset:
267
175
  asset = TableauRevampAsset.DATASOURCE
268
176
 
269
- datasources = self._fetch_from_metadata_api(asset)
270
- datasource_projects = self._fetch_from_tsc(asset)
177
+ datasources = self._client_metadata.fetch(asset)
178
+ tsc_datasources = self._client_tsc.fetch(asset)
271
179
 
272
- return _enrich_datasources_with_tsc(datasources, datasource_projects)
180
+ return _merge_datasources(datasources, tsc_datasources)
273
181
 
274
182
  def _fetch_workbooks(self) -> SerializedAsset:
275
183
  asset = TableauRevampAsset.WORKBOOK
276
184
 
277
- workbooks = self._fetch_from_metadata_api(asset)
278
- workbook_projects = self._fetch_from_tsc(asset)
185
+ workbooks = self._client_metadata.fetch(asset)
186
+ workbook_projects = self._client_tsc.fetch(asset)
279
187
 
280
- return _enrich_workbooks_with_tsc(workbooks, workbook_projects)
188
+ return _merge_workbooks(workbooks, workbook_projects)
281
189
 
282
- def fetch(self, asset: TableauRevampAsset) -> SerializedAsset:
190
+ def fetch(
191
+ self,
192
+ asset: TableauRevampAsset,
193
+ ) -> SerializedAsset:
283
194
  """
284
195
  Extract the given Tableau Asset
285
196
  """
197
+ if asset in TABLEAU_PULSE_ASSETS and not self._with_pulse:
198
+ logger.info(f"Skipping asset {asset} - Tableau Pulse de-activated")
199
+ return []
200
+
201
+ logger.info(f"Extracting {asset.name}...")
286
202
 
287
203
  if asset == TableauRevampAsset.DATASOURCE:
288
- # both APIs are required to extract datasources
204
+ # two APIs are required to extract datasources
289
205
  return self._fetch_datasources()
290
206
 
291
207
  if asset == TableauRevampAsset.WORKBOOK:
292
- # both APIs are required to extract workbooks
208
+ # two APIs are required to extract workbooks
293
209
  return self._fetch_workbooks()
294
210
 
295
211
  if asset in _TSC_ASSETS:
296
212
  # some assets can only be extracted via TSC
297
- return self._fetch_from_tsc(asset)
213
+ return self._client_tsc.fetch(asset)
214
+
215
+ if asset in _REST_API_ASSETS:
216
+ # some assets can only be extracted via REST API
217
+ return self._client_rest.fetch(asset)
298
218
 
299
- # extract most assets via Metadata API
300
- return self._fetch_from_metadata_api(asset)
219
+ # other assets can be extracted via Metadata API
220
+ return self._client_metadata.fetch(asset)
@@ -0,0 +1,95 @@
1
+ from typing import Dict, Iterator, Optional
2
+
3
+ import tableauserverclient as TSC # type: ignore
4
+
5
+ from ....utils import SerializedAsset
6
+ from ..assets import TableauRevampAsset
7
+ from ..constants import DEFAULT_PAGE_SIZE
8
+ from .errors import TableauApiError
9
+ from .gql_queries import FIELDS_QUERIES, GQL_QUERIES, QUERY_TEMPLATE
10
+
11
+ # increase the value when extraction is too slow
12
+ # decrease the value when timeouts arise
13
+ _CUSTOM_PAGE_SIZE: Dict[TableauRevampAsset, int] = {
14
+ # for some clients, extraction of columns tend to hit the node limit
15
+ # https://community.tableau.com/s/question/0D54T00000YuK60SAF/metadata-query-nodelimitexceeded-error
16
+ # the workaround is to reduce pagination
17
+ TableauRevampAsset.COLUMN: 50,
18
+ # fields are light but volumes are bigger
19
+ TableauRevampAsset.FIELD: 1000,
20
+ TableauRevampAsset.TABLE: 50,
21
+ }
22
+
23
+
24
+ def gql_query_scroll(
25
+ server,
26
+ query: str,
27
+ resource: str,
28
+ ) -> Iterator[SerializedAsset]:
29
+ """Iterate over GQL query results, handling pagination and cursor"""
30
+
31
+ def _call(cursor: Optional[str]) -> dict:
32
+ # If cursor is defined it must be quoted else use null token
33
+ token = "null" if cursor is None else f'"{cursor}"'
34
+ query_ = query.replace("AFTER_TOKEN_SIGNAL", token)
35
+ answer = server.metadata.query(query_)
36
+ if "errors" in answer:
37
+ raise TableauApiError(answer["errors"])
38
+ return answer["data"][f"{resource}Connection"]
39
+
40
+ cursor = None
41
+ while True:
42
+ payload = _call(cursor)
43
+ yield payload["nodes"]
44
+
45
+ page_info = payload["pageInfo"]
46
+ if page_info["hasNextPage"]:
47
+ cursor = page_info["endCursor"]
48
+ else:
49
+ break
50
+
51
+
52
+ class TableauClientMetadataApi:
53
+ """
54
+ Calls the MetadataAPI, using graphQL
55
+ https://help.tableau.com/current/api/metadata_api/en-us/reference/index.html
56
+ """
57
+
58
+ def __init__(
59
+ self,
60
+ server: TSC.Server,
61
+ ):
62
+ self._server = server
63
+
64
+ def _call(
65
+ self,
66
+ resource: str,
67
+ fields: str,
68
+ page_size: int = DEFAULT_PAGE_SIZE,
69
+ ) -> SerializedAsset:
70
+ query = QUERY_TEMPLATE.format(
71
+ resource=resource,
72
+ fields=fields,
73
+ page_size=page_size,
74
+ )
75
+ result_pages = gql_query_scroll(self._server, query, resource)
76
+ return [asset for page in result_pages for asset in page]
77
+
78
+ def _fetch_fields(self) -> SerializedAsset:
79
+ result: SerializedAsset = []
80
+ page_size = _CUSTOM_PAGE_SIZE[TableauRevampAsset.FIELD]
81
+ for resource, fields in FIELDS_QUERIES:
82
+ current = self._call(resource, fields, page_size)
83
+ result.extend(current)
84
+ return result
85
+
86
+ def fetch(
87
+ self,
88
+ asset: TableauRevampAsset,
89
+ ) -> SerializedAsset:
90
+ if asset == TableauRevampAsset.FIELD:
91
+ return self._fetch_fields()
92
+
93
+ page_size = _CUSTOM_PAGE_SIZE.get(asset) or DEFAULT_PAGE_SIZE
94
+ resource, fields = GQL_QUERIES[asset]
95
+ return self._call(resource, fields, page_size)
@@ -0,0 +1,128 @@
1
+ import logging
2
+ from typing import Dict, Optional
3
+
4
+ import requests
5
+ import tableauserverclient as TSC # type: ignore
6
+
7
+ from ....utils import SerializedAsset, deduplicate
8
+ from ..assets import TableauRevampAsset
9
+ from .rest_fields import REST_FIELDS
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ _PULSE_API = "api/-/pulse"
14
+
15
+ _METRICS_URL = "{base}/pulse/site/{site}/metrics/{definition_id}"
16
+
17
+
18
+ def _pick(
19
+ data: SerializedAsset,
20
+ asset: TableauRevampAsset,
21
+ ) -> SerializedAsset:
22
+ keys = REST_FIELDS[asset]
23
+ return [{key: row[key] for key in keys} for row in data]
24
+
25
+
26
+ class TableauClientRestApi:
27
+ """
28
+ Extract Tableau Assets using REST API
29
+ https://help.tableau.com/current/api/rest_api/en-us/REST/rest_api_ref.htm
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ server: TSC.Server,
35
+ ):
36
+ self._server = server
37
+
38
+ @property
39
+ def timeout_sec(self) -> int:
40
+ return self._server.http_options["timeout"]
41
+
42
+ @property
43
+ def headers(self) -> Dict[str, str]:
44
+ return {"x-tableau-auth": self._server.auth_token}
45
+
46
+ def _get_site_name(self) -> str:
47
+ site_id = self._server.site_id
48
+ site = self._server.sites.get_by_id(site_id)
49
+ return site.content_url
50
+
51
+ def _get(
52
+ self,
53
+ url: str,
54
+ page_token: Optional[str] = None,
55
+ ) -> Dict:
56
+ if page_token:
57
+ url += f"?page_token={page_token}"
58
+
59
+ logger.debug(f"Calling REST API: {url}")
60
+ response = requests.get(
61
+ url,
62
+ headers=self.headers,
63
+ timeout=self.timeout_sec,
64
+ )
65
+ response.raise_for_status()
66
+ return response.json()
67
+
68
+ def _call(
69
+ self,
70
+ path: str,
71
+ target: str,
72
+ ) -> SerializedAsset:
73
+ base = self._server.server_address.strip("/")
74
+ url = f"{base}/{path}/{target}"
75
+
76
+ next_page_token = None
77
+ data = []
78
+
79
+ while True:
80
+ response = self._get(url, next_page_token)
81
+ data += response[target]
82
+ next_page_token = response.get("next_page_token")
83
+ if not next_page_token:
84
+ break
85
+
86
+ return data
87
+
88
+ def _compute_metric_url(self, data: SerializedAsset) -> None:
89
+ site = self._get_site_name()
90
+ base_url = self._server.server_address.strip("/")
91
+ for row in data:
92
+ row["metadata"]["url"] = _METRICS_URL.format(
93
+ base=base_url,
94
+ site=site,
95
+ definition_id=row["metadata"]["id"],
96
+ )
97
+
98
+ def _fetch_metrics(self, definitions: SerializedAsset) -> SerializedAsset:
99
+ metrics = []
100
+ for definition in definitions:
101
+ definition_id = definition["metadata"]["id"]
102
+ path = f"{_PULSE_API}/definitions/{definition_id}"
103
+ metrics += self._call(path=path, target="metrics")
104
+
105
+ # for some reason, the REST API sometimes send the same metric twice
106
+ return deduplicate("id", metrics)
107
+
108
+ def fetch(
109
+ self,
110
+ asset: TableauRevampAsset,
111
+ ) -> SerializedAsset:
112
+ if asset == TableauRevampAsset.SUBSCRIPTION:
113
+ # https://help.tableau.com/current/api/rest_api/en-us/REST/rest_api_ref_pulse.htm#PulseSubscriptionService_ListSubscriptions
114
+ data = self._call(path=_PULSE_API, target="subscriptions")
115
+
116
+ elif asset == TableauRevampAsset.METRIC_DEFINITION:
117
+ # https://help.tableau.com/current/api/rest_api/en-us/REST/rest_api_ref_pulse.htm#MetricQueryService_ListDefinitions
118
+ data = self._call(path=_PULSE_API, target="definitions")
119
+ self._compute_metric_url(data)
120
+
121
+ elif asset == TableauRevampAsset.METRIC:
122
+ # https://help.tableau.com/current/api/rest_api/en-us/REST/rest_api_ref_pulse.htm#MetricQueryService_ListMetrics
123
+ definitions = self._call(path=_PULSE_API, target="definitions")
124
+ data = self._fetch_metrics(definitions)
125
+ else:
126
+ raise AssertionError(f"Unsupported asset {asset} for REST API")
127
+
128
+ return _pick(data, asset)