castor-extractor 0.19.4__py3-none-any.whl → 0.19.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +13 -0
- castor_extractor/quality/soda/client/pagination.py +1 -1
- castor_extractor/utils/__init__.py +1 -0
- castor_extractor/utils/client/__init__.py +1 -1
- castor_extractor/utils/client/api/__init__.py +1 -1
- castor_extractor/utils/client/api/client.py +33 -7
- castor_extractor/utils/client/api/pagination.py +23 -6
- castor_extractor/utils/pager/__init__.py +0 -1
- castor_extractor/utils/salesforce/client.py +45 -50
- castor_extractor/utils/salesforce/client_test.py +2 -2
- castor_extractor/utils/salesforce/pagination.py +33 -0
- castor_extractor/visualization/metabase/client/api/client.py +30 -11
- castor_extractor/visualization/salesforce_reporting/client/rest.py +4 -3
- castor_extractor/visualization/sigma/client/client.py +2 -1
- castor_extractor/visualization/tableau_revamp/assets.py +8 -0
- castor_extractor/visualization/tableau_revamp/client/client.py +6 -1
- castor_extractor/warehouse/databricks/api_client.py +239 -0
- castor_extractor/warehouse/databricks/api_client_test.py +15 -0
- castor_extractor/warehouse/databricks/client.py +37 -489
- castor_extractor/warehouse/databricks/client_test.py +1 -99
- castor_extractor/warehouse/databricks/endpoints.py +28 -0
- castor_extractor/warehouse/databricks/lineage.py +141 -0
- castor_extractor/warehouse/databricks/lineage_test.py +34 -0
- castor_extractor/warehouse/databricks/pagination.py +22 -0
- castor_extractor/warehouse/databricks/sql_client.py +90 -0
- castor_extractor/warehouse/databricks/utils.py +44 -1
- castor_extractor/warehouse/databricks/utils_test.py +58 -1
- castor_extractor/warehouse/mysql/client.py +0 -3
- castor_extractor/warehouse/salesforce/client.py +12 -59
- castor_extractor/warehouse/salesforce/pagination.py +34 -0
- castor_extractor/warehouse/sqlserver/client.py +0 -2
- {castor_extractor-0.19.4.dist-info → castor_extractor-0.19.7.dist-info}/METADATA +14 -1
- {castor_extractor-0.19.4.dist-info → castor_extractor-0.19.7.dist-info}/RECORD +36 -31
- castor_extractor/utils/client/api_deprecated.py +0 -89
- castor_extractor/utils/client/api_deprecated_test.py +0 -18
- castor_extractor/utils/pager/pager_on_token.py +0 -52
- castor_extractor/utils/pager/pager_on_token_test.py +0 -73
- {castor_extractor-0.19.4.dist-info → castor_extractor-0.19.7.dist-info}/LICENCE +0 -0
- {castor_extractor-0.19.4.dist-info → castor_extractor-0.19.7.dist-info}/WHEEL +0 -0
- {castor_extractor-0.19.4.dist-info → castor_extractor-0.19.7.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
|
@@ -1,5 +1,18 @@
|
|
|
1
|
+
|
|
1
2
|
# Changelog
|
|
2
3
|
|
|
4
|
+
## 0.19.7 - 2024-09-05
|
|
5
|
+
|
|
6
|
+
* Metabase: Handle compatibility with older version
|
|
7
|
+
|
|
8
|
+
## 0.19.6 - 2024-09-03
|
|
9
|
+
|
|
10
|
+
* Metabase: Adding error handler on API call
|
|
11
|
+
|
|
12
|
+
## 0.19.5 - 2024-09-02
|
|
13
|
+
|
|
14
|
+
* Databricks/Salesforce: Remove deprecated client dependencies
|
|
15
|
+
|
|
3
16
|
## 0.19.4 - 2024-08-29
|
|
4
17
|
|
|
5
18
|
* Tableau Pulse: extract Metrics and Subscriptions
|
|
@@ -15,7 +15,7 @@ class SodaCloudPagination(PaginationModel):
|
|
|
15
15
|
|
|
16
16
|
def next_page_payload(self) -> dict:
|
|
17
17
|
current_page = (
|
|
18
|
-
self.current_page_payload["page"]
|
|
18
|
+
self.current_page_payload[self.fetch_by.value]["page"]
|
|
19
19
|
if self.current_page_payload
|
|
20
20
|
else _CLOUD_FIRST_PAGE
|
|
21
21
|
)
|
|
@@ -5,6 +5,7 @@ from .api import (
|
|
|
5
5
|
BasicAuth,
|
|
6
6
|
BearerAuth,
|
|
7
7
|
CustomAuth,
|
|
8
|
+
FetchNextPageBy,
|
|
8
9
|
PaginationModel,
|
|
9
10
|
RequestSafeMode,
|
|
10
11
|
ResponseJson,
|
|
@@ -12,7 +13,6 @@ from .api import (
|
|
|
12
13
|
fetch_all_pages,
|
|
13
14
|
handle_response,
|
|
14
15
|
)
|
|
15
|
-
from .api_deprecated import APIClientDeprecated
|
|
16
16
|
from .postgres import PostgresClient
|
|
17
17
|
from .query import ExtractionQuery
|
|
18
18
|
from .uri import uri_encode
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from .auth import Auth, BasicAuth, BearerAuth, CustomAuth
|
|
2
2
|
from .client import APIClient
|
|
3
|
-
from .pagination import PaginationModel, fetch_all_pages
|
|
3
|
+
from .pagination import FetchNextPageBy, PaginationModel, fetch_all_pages
|
|
4
4
|
from .safe_request import RequestSafeMode, ResponseJson, handle_response
|
|
5
5
|
from .utils import build_url
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from http import HTTPStatus
|
|
3
|
-
from typing import Dict, Literal, Optional
|
|
3
|
+
from typing import Dict, Literal, Optional, Tuple
|
|
4
4
|
|
|
5
5
|
import requests
|
|
6
6
|
from requests import Response
|
|
@@ -21,6 +21,25 @@ DEFAULT_TIMEOUT = 60
|
|
|
21
21
|
RETRY_ON_EXPIRED_TOKEN = 1
|
|
22
22
|
|
|
23
23
|
|
|
24
|
+
def _generate_payloads(
|
|
25
|
+
method: HttpMethod,
|
|
26
|
+
params: Optional[dict],
|
|
27
|
+
data: Optional[dict],
|
|
28
|
+
pagination_params: Optional[dict],
|
|
29
|
+
) -> Tuple[dict, dict]:
|
|
30
|
+
_pagination_params = pagination_params or {}
|
|
31
|
+
params = params or {}
|
|
32
|
+
data = data or {}
|
|
33
|
+
|
|
34
|
+
if method == "GET":
|
|
35
|
+
params = {**params, **_pagination_params}
|
|
36
|
+
elif method == "POST":
|
|
37
|
+
data = {**data, **_pagination_params}
|
|
38
|
+
else:
|
|
39
|
+
raise ValueError(f"Method {method} is not yet supported")
|
|
40
|
+
return data, params
|
|
41
|
+
|
|
42
|
+
|
|
24
43
|
class APIClient:
|
|
25
44
|
"""
|
|
26
45
|
Interface to easily query REST-API with GET and POST requests
|
|
@@ -59,22 +78,27 @@ class APIClient:
|
|
|
59
78
|
*,
|
|
60
79
|
headers: Headers = None,
|
|
61
80
|
params: Optional[dict] = None,
|
|
81
|
+
data: Optional[dict] = None,
|
|
62
82
|
pagination_params: Optional[dict] = None,
|
|
63
83
|
) -> Response:
|
|
64
84
|
headers = headers or {}
|
|
65
|
-
|
|
66
|
-
params =
|
|
85
|
+
|
|
86
|
+
data, params = _generate_payloads(
|
|
87
|
+
method=method,
|
|
88
|
+
params=params,
|
|
89
|
+
data=data,
|
|
90
|
+
pagination_params=pagination_params,
|
|
91
|
+
)
|
|
67
92
|
|
|
68
93
|
url = build_url(self._host, endpoint)
|
|
69
|
-
combined_params = {**params, **_pagination_params}
|
|
70
94
|
|
|
71
95
|
return requests.request(
|
|
72
96
|
method=method,
|
|
73
97
|
url=url,
|
|
74
98
|
auth=self._auth,
|
|
75
99
|
headers={**self.base_headers, **headers},
|
|
76
|
-
params=
|
|
77
|
-
json=
|
|
100
|
+
params=params,
|
|
101
|
+
json=data,
|
|
78
102
|
timeout=self._timeout,
|
|
79
103
|
)
|
|
80
104
|
|
|
@@ -88,12 +112,14 @@ class APIClient:
|
|
|
88
112
|
*,
|
|
89
113
|
headers: Headers = None,
|
|
90
114
|
params: Optional[dict] = None,
|
|
115
|
+
data: Optional[dict] = None,
|
|
91
116
|
pagination_params: Optional[dict] = None,
|
|
92
117
|
):
|
|
93
118
|
response = self._call(
|
|
94
119
|
method="GET",
|
|
95
120
|
endpoint=endpoint,
|
|
96
121
|
params=params,
|
|
122
|
+
data=data,
|
|
97
123
|
pagination_params=pagination_params,
|
|
98
124
|
headers=headers,
|
|
99
125
|
)
|
|
@@ -117,7 +143,7 @@ class APIClient:
|
|
|
117
143
|
response = self._call(
|
|
118
144
|
method="POST",
|
|
119
145
|
endpoint=endpoint,
|
|
120
|
-
|
|
146
|
+
data=data,
|
|
121
147
|
pagination_params=pagination_params,
|
|
122
148
|
headers=headers,
|
|
123
149
|
)
|
|
@@ -1,14 +1,28 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from abc import abstractmethod
|
|
3
|
+
from enum import Enum
|
|
3
4
|
from functools import partial
|
|
4
5
|
from time import sleep
|
|
5
|
-
from typing import Callable, Iterator, Optional, Type
|
|
6
|
+
from typing import Callable, Iterator, Optional, Type, Union
|
|
6
7
|
|
|
7
8
|
from pydantic import BaseModel
|
|
8
9
|
|
|
9
10
|
logger = logging.getLogger(__name__)
|
|
10
11
|
|
|
11
12
|
|
|
13
|
+
class FetchNextPageBy(Enum):
|
|
14
|
+
"""
|
|
15
|
+
Enum to pick which APIClient._call() argument we want
|
|
16
|
+
to use for calling the next page in the pagination.
|
|
17
|
+
Supported arguments are :
|
|
18
|
+
- params (PAYLOAD)
|
|
19
|
+
- endpoint (URL)
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
PAYLOAD = "pagination_params"
|
|
23
|
+
URL = "endpoint"
|
|
24
|
+
|
|
25
|
+
|
|
12
26
|
class PaginationModel(BaseModel):
|
|
13
27
|
"""
|
|
14
28
|
Base abstract class defining a pagination model
|
|
@@ -18,6 +32,7 @@ class PaginationModel(BaseModel):
|
|
|
18
32
|
`fetch_all_pages` method
|
|
19
33
|
"""
|
|
20
34
|
|
|
35
|
+
fetch_by: FetchNextPageBy = FetchNextPageBy.PAYLOAD
|
|
21
36
|
current_page_payload: Optional["dict"] = None
|
|
22
37
|
|
|
23
38
|
@abstractmethod
|
|
@@ -26,7 +41,7 @@ class PaginationModel(BaseModel):
|
|
|
26
41
|
pass
|
|
27
42
|
|
|
28
43
|
@abstractmethod
|
|
29
|
-
def next_page_payload(self) -> dict:
|
|
44
|
+
def next_page_payload(self) -> Optional[Union[dict, str]]:
|
|
30
45
|
"""Payload enabling to generate the request for the next page"""
|
|
31
46
|
pass
|
|
32
47
|
|
|
@@ -35,6 +50,9 @@ class PaginationModel(BaseModel):
|
|
|
35
50
|
"""List of results of the current page"""
|
|
36
51
|
pass
|
|
37
52
|
|
|
53
|
+
def next_page_parameters(self) -> dict:
|
|
54
|
+
return {self.fetch_by.value: self.next_page_payload()}
|
|
55
|
+
|
|
38
56
|
|
|
39
57
|
def fetch_all_pages(
|
|
40
58
|
request: Callable,
|
|
@@ -51,13 +69,12 @@ def fetch_all_pages(
|
|
|
51
69
|
while not paginated_response.is_last():
|
|
52
70
|
logger.info(f"Fetching page number {page_number}")
|
|
53
71
|
yield from paginated_response.page_results()
|
|
54
|
-
|
|
55
|
-
new_request = partial(request,
|
|
72
|
+
next_page_parameters = paginated_response.next_page_parameters()
|
|
73
|
+
new_request = partial(request, **next_page_parameters)
|
|
56
74
|
if rate_limit:
|
|
57
75
|
sleep(rate_limit)
|
|
58
|
-
|
|
59
76
|
paginated_response = pagination_model(
|
|
60
|
-
current_page_payload=
|
|
77
|
+
current_page_payload=next_page_parameters, **new_request()
|
|
61
78
|
)
|
|
62
79
|
page_number += 1
|
|
63
80
|
|
|
@@ -1,16 +1,40 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
2
|
+
from functools import partial
|
|
3
|
+
from typing import Iterator, Optional
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
|
|
7
|
+
from ...utils import (
|
|
8
|
+
APIClient,
|
|
9
|
+
BearerAuth,
|
|
10
|
+
build_url,
|
|
11
|
+
fetch_all_pages,
|
|
12
|
+
handle_response,
|
|
13
|
+
)
|
|
7
14
|
from .constants import DEFAULT_API_VERSION, DEFAULT_PAGINATION_LIMIT
|
|
8
15
|
from .credentials import SalesforceCredentials
|
|
16
|
+
from .pagination import SalesforcePagination
|
|
9
17
|
|
|
10
18
|
logger = logging.getLogger(__name__)
|
|
11
19
|
|
|
20
|
+
SALESFORCE_TIMEOUT_S = 120
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class SalesforceAuth(BearerAuth):
|
|
24
|
+
_AUTH_ENDPOINT = "services/oauth2/token"
|
|
25
|
+
|
|
26
|
+
def __init__(self, credentials: SalesforceCredentials):
|
|
27
|
+
self._host = credentials.base_url
|
|
28
|
+
self._token_payload = credentials.token_request_payload()
|
|
29
|
+
|
|
30
|
+
def fetch_token(self) -> Optional[str]:
|
|
31
|
+
url = build_url(self._host, self._AUTH_ENDPOINT)
|
|
32
|
+
response = requests.post(url, "POST", params=self._token_payload)
|
|
33
|
+
handled_response = handle_response(response)
|
|
34
|
+
return handled_response["access_token"]
|
|
12
35
|
|
|
13
|
-
|
|
36
|
+
|
|
37
|
+
class SalesforceBaseClient(APIClient):
|
|
14
38
|
"""
|
|
15
39
|
Salesforce API client.
|
|
16
40
|
https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/intro_rest.htm
|
|
@@ -22,45 +46,24 @@ class SalesforceBaseClient(APIClientDeprecated):
|
|
|
22
46
|
PATH_TPL = "services/data/v{version}/{suffix}"
|
|
23
47
|
|
|
24
48
|
def __init__(self, credentials: SalesforceCredentials):
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def _access_token(self, credentials: SalesforceCredentials) -> str:
|
|
29
|
-
url = self.build_url(self._host, "services/oauth2/token")
|
|
30
|
-
response = self._call(
|
|
31
|
-
url, "POST", params=credentials.token_request_payload()
|
|
49
|
+
auth = SalesforceAuth(credentials)
|
|
50
|
+
super().__init__(
|
|
51
|
+
host=credentials.base_url, auth=auth, timeout=SALESFORCE_TIMEOUT_S
|
|
32
52
|
)
|
|
33
|
-
return response["access_token"]
|
|
34
53
|
|
|
35
|
-
def
|
|
54
|
+
def _endpoint(self, suffix: str) -> str:
|
|
36
55
|
path = self.PATH_TPL.format(version=self.api_version, suffix=suffix)
|
|
37
|
-
return
|
|
56
|
+
return path
|
|
38
57
|
|
|
39
58
|
@property
|
|
40
|
-
def
|
|
59
|
+
def query_endpoint(self) -> str:
|
|
41
60
|
"""Returns the query API url"""
|
|
42
|
-
return self.
|
|
61
|
+
return self._endpoint("query")
|
|
43
62
|
|
|
44
63
|
@property
|
|
45
|
-
def
|
|
64
|
+
def tooling_endpoint(self) -> str:
|
|
46
65
|
"""Returns the tooling API url"""
|
|
47
|
-
return self.
|
|
48
|
-
|
|
49
|
-
@staticmethod
|
|
50
|
-
def _query_processor(response: Response) -> Tuple[dict, Optional[str]]:
|
|
51
|
-
results = response.json()
|
|
52
|
-
return results["records"], results.get("nextRecordsUrl")
|
|
53
|
-
|
|
54
|
-
def _has_reached_pagination_limit(self, page_number: int) -> bool:
|
|
55
|
-
return page_number > self.pagination_limit
|
|
56
|
-
|
|
57
|
-
def _query_first_page(self, query: str) -> Tuple[Iterator[dict], str]:
|
|
58
|
-
url = self.query_url
|
|
59
|
-
logger.info("querying page 0")
|
|
60
|
-
records, next_page_url = self._call(
|
|
61
|
-
url, params={"q": query}, processor=self._query_processor
|
|
62
|
-
)
|
|
63
|
-
return records, next_page_url
|
|
66
|
+
return self._endpoint("tooling/query")
|
|
64
67
|
|
|
65
68
|
def _query_all(self, query: str) -> Iterator[dict]:
|
|
66
69
|
"""
|
|
@@ -68,17 +71,9 @@ class SalesforceBaseClient(APIClientDeprecated):
|
|
|
68
71
|
|
|
69
72
|
more: https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/dome_query.htm
|
|
70
73
|
"""
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
):
|
|
78
|
-
logger.info(f"querying page {page_count}")
|
|
79
|
-
url = self.build_url(self._host, next_page_path)
|
|
80
|
-
records, next_page_path = self._call(
|
|
81
|
-
url, processor=self._query_processor
|
|
82
|
-
)
|
|
83
|
-
yield from records
|
|
84
|
-
page_count += 1
|
|
74
|
+
request = partial(
|
|
75
|
+
self._get,
|
|
76
|
+
endpoint=self.query_endpoint,
|
|
77
|
+
params={"q": query},
|
|
78
|
+
)
|
|
79
|
+
yield from fetch_all_pages(request, SalesforcePagination)
|
|
@@ -17,5 +17,5 @@ def test_SalesforceBaseClient__urls(mock_call):
|
|
|
17
17
|
)
|
|
18
18
|
client = SalesforceBaseClient(credentials)
|
|
19
19
|
|
|
20
|
-
assert client.
|
|
21
|
-
assert client.
|
|
20
|
+
assert client.query_endpoint == "services/data/v59.0/query"
|
|
21
|
+
assert client.tooling_endpoint == "services/data/v59.0/tooling/query"
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from pydantic import ConfigDict
|
|
4
|
+
from pydantic.alias_generators import to_camel
|
|
5
|
+
|
|
6
|
+
from ...utils import (
|
|
7
|
+
FetchNextPageBy,
|
|
8
|
+
PaginationModel,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
LIMIT_RECORDS_PER_PAGE = 2000
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class SalesforcePagination(PaginationModel):
|
|
15
|
+
model_config = ConfigDict(
|
|
16
|
+
alias_generator=to_camel,
|
|
17
|
+
populate_by_name=True,
|
|
18
|
+
from_attributes=True,
|
|
19
|
+
)
|
|
20
|
+
fetch_by: FetchNextPageBy = FetchNextPageBy.URL
|
|
21
|
+
records: list
|
|
22
|
+
next_records_url: Optional[str] = None
|
|
23
|
+
|
|
24
|
+
def is_last(self) -> bool:
|
|
25
|
+
no_next_page = not self.next_records_url
|
|
26
|
+
page_incomplete = len(self.records) < LIMIT_RECORDS_PER_PAGE
|
|
27
|
+
return no_next_page or page_incomplete
|
|
28
|
+
|
|
29
|
+
def next_page_payload(self) -> Optional[str]:
|
|
30
|
+
return self.next_records_url
|
|
31
|
+
|
|
32
|
+
def page_results(self) -> list:
|
|
33
|
+
return self.records
|
|
@@ -1,10 +1,16 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from http import HTTPStatus
|
|
3
|
+
from typing import Any, Dict, Iterator, List, Optional, cast
|
|
3
4
|
|
|
4
5
|
import requests
|
|
5
|
-
from requests import HTTPError
|
|
6
|
-
|
|
7
|
-
from .....utils import
|
|
6
|
+
from requests import HTTPError
|
|
7
|
+
|
|
8
|
+
from .....utils import (
|
|
9
|
+
JsonType,
|
|
10
|
+
RequestSafeMode,
|
|
11
|
+
SerializedAsset,
|
|
12
|
+
handle_response,
|
|
13
|
+
)
|
|
8
14
|
from ...assets import EXPORTED_FIELDS, MetabaseAsset
|
|
9
15
|
from ...errors import MetabaseLoginError, SuperuserCredentialsRequired
|
|
10
16
|
from ...types import IdsType
|
|
@@ -13,6 +19,17 @@ from .credentials import MetabaseApiCredentials
|
|
|
13
19
|
|
|
14
20
|
logger = logging.getLogger(__name__)
|
|
15
21
|
|
|
22
|
+
# Safe mode
|
|
23
|
+
VOLUME_IGNORED = 5
|
|
24
|
+
IGNORED_ERROR_CODES = (
|
|
25
|
+
HTTPStatus.BAD_REQUEST,
|
|
26
|
+
HTTPStatus.NOT_FOUND,
|
|
27
|
+
)
|
|
28
|
+
METABASE_SAFE_MODE = RequestSafeMode(
|
|
29
|
+
max_errors=VOLUME_IGNORED,
|
|
30
|
+
status_codes=IGNORED_ERROR_CODES,
|
|
31
|
+
)
|
|
32
|
+
|
|
16
33
|
URL_TEMPLATE = "{base_url}/api/{endpoint}"
|
|
17
34
|
|
|
18
35
|
ROOT_KEY = "root"
|
|
@@ -31,12 +48,14 @@ class ApiClient:
|
|
|
31
48
|
def __init__(
|
|
32
49
|
self,
|
|
33
50
|
credentials: MetabaseApiCredentials,
|
|
51
|
+
safe_mode: Optional[RequestSafeMode] = None,
|
|
34
52
|
):
|
|
35
53
|
self.base_url = credentials.base_url
|
|
36
54
|
|
|
37
55
|
self._credentials = credentials
|
|
38
56
|
self._session = requests.Session()
|
|
39
57
|
self._session_id = self._login()
|
|
58
|
+
self.safe_mode = safe_mode or METABASE_SAFE_MODE
|
|
40
59
|
self._check_permissions() # verify that the given user is superuser
|
|
41
60
|
|
|
42
61
|
@staticmethod
|
|
@@ -57,8 +76,8 @@ class ApiClient:
|
|
|
57
76
|
}
|
|
58
77
|
|
|
59
78
|
@staticmethod
|
|
60
|
-
def _answer(response:
|
|
61
|
-
answer = response
|
|
79
|
+
def _answer(response: Any):
|
|
80
|
+
answer = response
|
|
62
81
|
if isinstance(answer, Dict) and DATA_KEY in answer:
|
|
63
82
|
# v0.41 of Metabase introduced embedded data for certain calls
|
|
64
83
|
# {'data': [{ }, ...] , 'total': 15, 'limit': None, 'offset': None}"
|
|
@@ -69,7 +88,7 @@ class ApiClient:
|
|
|
69
88
|
url = self._url(endpoint)
|
|
70
89
|
headers = self._headers()
|
|
71
90
|
response = self._session.get(url=url, headers=headers)
|
|
72
|
-
response
|
|
91
|
+
response = handle_response(response, safe_mode=self.safe_mode)
|
|
73
92
|
return self._answer(response)
|
|
74
93
|
|
|
75
94
|
def _check_permissions(self) -> None:
|
|
@@ -110,16 +129,16 @@ class ApiClient:
|
|
|
110
129
|
return ids
|
|
111
130
|
|
|
112
131
|
def _dashboards(self) -> Iterator[dict]:
|
|
113
|
-
"""
|
|
114
|
-
GET /api/dashboard is deprecated
|
|
115
|
-
https://github.com/metabase/metabase/pull/35235
|
|
116
|
-
"""
|
|
117
132
|
collection_ids = self._fetch_ids(MetabaseAsset.COLLECTION)
|
|
118
133
|
for _id in collection_ids:
|
|
119
134
|
collection = self._call(f"collection/{_id}/items?models=dashboard")
|
|
120
135
|
if not collection:
|
|
121
136
|
continue
|
|
122
137
|
for dashboard in cast(SerializedAsset, collection):
|
|
138
|
+
if dashboard.get("model") != "dashboard":
|
|
139
|
+
# This is to maintain compatibility with older versions
|
|
140
|
+
# where ?models=dashboard has no effects
|
|
141
|
+
continue
|
|
123
142
|
dashboard_id = dashboard.get("id")
|
|
124
143
|
if not dashboard_id:
|
|
125
144
|
continue
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import Dict, Iterator, List, Optional
|
|
3
3
|
|
|
4
|
+
from ....utils import build_url
|
|
4
5
|
from ....utils.salesforce import SalesforceBaseClient
|
|
5
6
|
from ..assets import SalesforceReportingAsset
|
|
6
7
|
from .soql import queries
|
|
@@ -28,15 +29,15 @@ class SalesforceReportingClient(SalesforceBaseClient):
|
|
|
28
29
|
|
|
29
30
|
if asset_type == SalesforceReportingAsset.DASHBOARDS:
|
|
30
31
|
path = f"lightning/r/Dashboard/{asset['Id']}/view"
|
|
31
|
-
return
|
|
32
|
+
return build_url(self._host, path)
|
|
32
33
|
|
|
33
34
|
if asset_type == SalesforceReportingAsset.FOLDERS:
|
|
34
35
|
path = asset["attributes"]["url"].lstrip("/")
|
|
35
|
-
return
|
|
36
|
+
return build_url(self._host, path)
|
|
36
37
|
|
|
37
38
|
if asset_type == SalesforceReportingAsset.REPORTS:
|
|
38
39
|
path = f"lightning/r/Report/{asset['Id']}/view"
|
|
39
|
-
return
|
|
40
|
+
return build_url(self._host, path)
|
|
40
41
|
|
|
41
42
|
return None
|
|
42
43
|
|
|
@@ -33,12 +33,13 @@ _SIGMA_HEADERS = {
|
|
|
33
33
|
"Content-Type": _CONTENT_TYPE,
|
|
34
34
|
}
|
|
35
35
|
|
|
36
|
-
_VOLUME_IGNORED =
|
|
36
|
+
_VOLUME_IGNORED = 10_000
|
|
37
37
|
_IGNORED_ERROR_CODES = (
|
|
38
38
|
HTTPStatus.BAD_REQUEST,
|
|
39
39
|
HTTPStatus.BAD_GATEWAY,
|
|
40
40
|
HTTPStatus.INTERNAL_SERVER_ERROR,
|
|
41
41
|
HTTPStatus.CONFLICT,
|
|
42
|
+
HTTPStatus.NOT_FOUND,
|
|
42
43
|
)
|
|
43
44
|
SIGMA_SAFE_MODE = RequestSafeMode(
|
|
44
45
|
max_errors=_VOLUME_IGNORED,
|
|
@@ -19,3 +19,11 @@ class TableauRevampAsset(ExternalAsset):
|
|
|
19
19
|
USAGE = "usage"
|
|
20
20
|
USER = "users"
|
|
21
21
|
WORKBOOK = "workbooks"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# assets that are only available for clients using Tableau Pulse
|
|
25
|
+
TABLEAU_PULSE_ASSETS = (
|
|
26
|
+
TableauRevampAsset.METRIC,
|
|
27
|
+
TableauRevampAsset.METRIC_DEFINITION,
|
|
28
|
+
TableauRevampAsset.SUBSCRIPTION,
|
|
29
|
+
)
|
|
@@ -3,7 +3,7 @@ import logging
|
|
|
3
3
|
import tableauserverclient as TSC # type: ignore
|
|
4
4
|
|
|
5
5
|
from ....utils import SerializedAsset
|
|
6
|
-
from ..assets import TableauRevampAsset
|
|
6
|
+
from ..assets import TABLEAU_PULSE_ASSETS, TableauRevampAsset
|
|
7
7
|
from ..constants import DEFAULT_TIMEOUT_SECONDS
|
|
8
8
|
from .client_metadata_api import TableauClientMetadataApi
|
|
9
9
|
from .client_rest_api import TableauClientRestApi
|
|
@@ -111,9 +111,11 @@ class TableauRevampClient:
|
|
|
111
111
|
self,
|
|
112
112
|
credentials: TableauRevampCredentials,
|
|
113
113
|
timeout_sec: int = DEFAULT_TIMEOUT_SECONDS,
|
|
114
|
+
with_pulse: bool = False,
|
|
114
115
|
):
|
|
115
116
|
self._credentials = credentials
|
|
116
117
|
self._server = _server(credentials.server_url, timeout_sec)
|
|
118
|
+
self._with_pulse = with_pulse
|
|
117
119
|
|
|
118
120
|
self._client_metadata = TableauClientMetadataApi(server=self._server)
|
|
119
121
|
self._client_rest = TableauClientRestApi(server=self._server)
|
|
@@ -192,6 +194,9 @@ class TableauRevampClient:
|
|
|
192
194
|
"""
|
|
193
195
|
Extract the given Tableau Asset
|
|
194
196
|
"""
|
|
197
|
+
if asset in TABLEAU_PULSE_ASSETS and not self._with_pulse:
|
|
198
|
+
logger.info(f"Skipping asset {asset} - Tableau Pulse de-activated")
|
|
199
|
+
return []
|
|
195
200
|
|
|
196
201
|
logger.info(f"Extracting {asset.name}...")
|
|
197
202
|
|