castor-extractor 0.17.4__py3-none-any.whl → 0.18.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +28 -0
- DockerfileUsage.md +21 -0
- castor_extractor/commands/extract_domo.py +2 -10
- castor_extractor/commands/extract_looker.py +2 -13
- castor_extractor/commands/extract_metabase_api.py +5 -10
- castor_extractor/commands/extract_metabase_db.py +6 -16
- castor_extractor/commands/extract_mode.py +2 -13
- castor_extractor/commands/extract_powerbi.py +2 -8
- castor_extractor/commands/extract_qlik.py +2 -7
- castor_extractor/commands/extract_salesforce.py +3 -12
- castor_extractor/commands/extract_salesforce_reporting.py +2 -10
- castor_extractor/commands/extract_sigma.py +2 -7
- castor_extractor/utils/__init__.py +3 -1
- castor_extractor/utils/argument_parser.py +7 -0
- castor_extractor/utils/argument_parser_test.py +25 -0
- castor_extractor/utils/collection.py +8 -0
- castor_extractor/utils/safe_request.py +57 -0
- castor_extractor/utils/safe_request_test.py +77 -0
- castor_extractor/utils/salesforce/__init__.py +1 -2
- castor_extractor/utils/salesforce/constants.py +0 -11
- castor_extractor/utils/salesforce/credentials.py +22 -45
- castor_extractor/visualization/domo/__init__.py +1 -1
- castor_extractor/visualization/domo/client/__init__.py +1 -1
- castor_extractor/visualization/domo/client/client.py +37 -52
- castor_extractor/visualization/domo/client/credentials.py +14 -27
- castor_extractor/visualization/domo/extract.py +5 -26
- castor_extractor/visualization/looker/__init__.py +6 -1
- castor_extractor/visualization/looker/api/__init__.py +2 -1
- castor_extractor/visualization/looker/api/client.py +6 -4
- castor_extractor/visualization/looker/api/client_test.py +5 -3
- castor_extractor/visualization/looker/api/credentials.py +33 -0
- castor_extractor/visualization/looker/api/extraction_parameters.py +38 -0
- castor_extractor/visualization/looker/api/sdk.py +2 -28
- castor_extractor/visualization/looker/constant.py +2 -27
- castor_extractor/visualization/looker/constants.py +17 -0
- castor_extractor/visualization/looker/extract.py +29 -29
- castor_extractor/visualization/metabase/__init__.py +6 -1
- castor_extractor/visualization/metabase/client/__init__.py +2 -2
- castor_extractor/visualization/metabase/client/api/__init__.py +1 -0
- castor_extractor/visualization/metabase/client/api/client.py +8 -14
- castor_extractor/visualization/metabase/client/api/credentials.py +13 -40
- castor_extractor/visualization/metabase/client/db/__init__.py +1 -0
- castor_extractor/visualization/metabase/client/db/client.py +13 -34
- castor_extractor/visualization/metabase/client/db/credentials.py +19 -73
- castor_extractor/visualization/metabase/errors.py +5 -3
- castor_extractor/visualization/metabase/extract.py +3 -3
- castor_extractor/visualization/mode/__init__.py +1 -1
- castor_extractor/visualization/mode/client/__init__.py +1 -0
- castor_extractor/visualization/mode/client/client.py +9 -12
- castor_extractor/visualization/mode/client/client_test.py +3 -3
- castor_extractor/visualization/mode/client/credentials.py +18 -51
- castor_extractor/visualization/mode/extract.py +6 -3
- castor_extractor/visualization/powerbi/__init__.py +1 -1
- castor_extractor/visualization/powerbi/client/__init__.py +2 -1
- castor_extractor/visualization/powerbi/client/credentials.py +17 -9
- castor_extractor/visualization/powerbi/client/credentials_test.py +12 -4
- castor_extractor/visualization/powerbi/client/rest.py +2 -2
- castor_extractor/visualization/powerbi/client/rest_test.py +2 -2
- castor_extractor/visualization/powerbi/extract.py +5 -16
- castor_extractor/visualization/qlik/__init__.py +5 -1
- castor_extractor/visualization/qlik/client/__init__.py +1 -0
- castor_extractor/visualization/qlik/client/engine/__init__.py +1 -0
- castor_extractor/visualization/qlik/client/engine/client.py +5 -6
- castor_extractor/visualization/qlik/client/engine/credentials.py +26 -0
- castor_extractor/visualization/qlik/client/master.py +5 -11
- castor_extractor/visualization/qlik/client/rest.py +4 -4
- castor_extractor/visualization/qlik/client/rest_test.py +6 -2
- castor_extractor/visualization/qlik/extract.py +6 -13
- castor_extractor/visualization/salesforce_reporting/extract.py +6 -20
- castor_extractor/visualization/sigma/__init__.py +1 -1
- castor_extractor/visualization/sigma/client/__init__.py +1 -1
- castor_extractor/visualization/sigma/client/client.py +5 -4
- castor_extractor/visualization/sigma/client/credentials.py +12 -28
- castor_extractor/visualization/sigma/extract.py +5 -18
- castor_extractor/visualization/tableau_revamp/client/credentials.py +40 -87
- castor_extractor/warehouse/databricks/client.py +3 -0
- castor_extractor/warehouse/redshift/queries/column.sql +0 -5
- castor_extractor/warehouse/salesforce/extract.py +2 -2
- castor_extractor/warehouse/salesforce/format.py +5 -3
- castor_extractor/warehouse/snowflake/queries/column.sql +0 -1
- castor_extractor/warehouse/synapse/queries/column.sql +0 -1
- {castor_extractor-0.17.4.dist-info → castor_extractor-0.18.5.dist-info}/METADATA +9 -9
- {castor_extractor-0.17.4.dist-info → castor_extractor-0.18.5.dist-info}/RECORD +86 -83
- castor_extractor/visualization/domo/client/client_test.py +0 -60
- castor_extractor/visualization/domo/constants.py +0 -6
- castor_extractor/visualization/looker/env.py +0 -48
- castor_extractor/visualization/looker/parameters.py +0 -78
- castor_extractor/visualization/qlik/constants.py +0 -3
- castor_extractor/visualization/sigma/constants.py +0 -4
- {castor_extractor-0.17.4.dist-info → castor_extractor-0.18.5.dist-info}/LICENCE +0 -0
- {castor_extractor-0.17.4.dist-info → castor_extractor-0.18.5.dist-info}/WHEEL +0 -0
- {castor_extractor-0.17.4.dist-info → castor_extractor-0.18.5.dist-info}/entry_points.txt +0 -0
|
@@ -5,7 +5,7 @@ import pytest
|
|
|
5
5
|
from requests import HTTPError
|
|
6
6
|
|
|
7
7
|
from .constants import GET, POST, Assertions, Keys, QueryParams, Urls
|
|
8
|
-
from .credentials import
|
|
8
|
+
from .credentials import PowerbiCredentials
|
|
9
9
|
from .rest import Client, msal
|
|
10
10
|
|
|
11
11
|
FAKE_TENANT_ID = "IamFake"
|
|
@@ -14,7 +14,7 @@ FAKE_SECRET = "MeThree"
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def _client() -> Client:
|
|
17
|
-
creds =
|
|
17
|
+
creds = PowerbiCredentials(
|
|
18
18
|
tenant_id=FAKE_TENANT_ID,
|
|
19
19
|
client_id=FAKE_CLIENT_ID,
|
|
20
20
|
secret=FAKE_SECRET,
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Iterable, List,
|
|
1
|
+
from typing import Iterable, List, Tuple, Union
|
|
2
2
|
|
|
3
3
|
from ...utils import (
|
|
4
4
|
OUTPUT_DIR,
|
|
@@ -10,7 +10,7 @@ from ...utils import (
|
|
|
10
10
|
write_summary,
|
|
11
11
|
)
|
|
12
12
|
from .assets import METADATA_ASSETS, PowerBiAsset
|
|
13
|
-
from .client import Client,
|
|
13
|
+
from .client import Client, PowerbiCredentials
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def iterate_all_data(
|
|
@@ -24,24 +24,13 @@ def iterate_all_data(
|
|
|
24
24
|
yield asset, deep_serialize(data)
|
|
25
25
|
|
|
26
26
|
|
|
27
|
-
def extract_all(
|
|
28
|
-
tenant_id: str,
|
|
29
|
-
client_id: str,
|
|
30
|
-
secret: str,
|
|
31
|
-
scopes: Optional[List[str]] = None,
|
|
32
|
-
output_directory: Optional[str] = None,
|
|
33
|
-
) -> None:
|
|
27
|
+
def extract_all(**kwargs) -> None:
|
|
34
28
|
"""
|
|
35
29
|
Extract data from PowerBI REST API
|
|
36
30
|
Store the output files locally under the given output_directory
|
|
37
31
|
"""
|
|
38
|
-
_output_directory =
|
|
39
|
-
creds =
|
|
40
|
-
tenant_id=tenant_id,
|
|
41
|
-
client_id=client_id,
|
|
42
|
-
secret=secret,
|
|
43
|
-
scopes=scopes,
|
|
44
|
-
)
|
|
32
|
+
_output_directory = kwargs.get("output") or from_env(OUTPUT_DIR)
|
|
33
|
+
creds = PowerbiCredentials(**kwargs)
|
|
45
34
|
client = Client(creds)
|
|
46
35
|
ts = current_timestamp()
|
|
47
36
|
|
|
@@ -2,6 +2,7 @@ import logging
|
|
|
2
2
|
|
|
3
3
|
from .....utils import SafeMode, safe_mode
|
|
4
4
|
from .constants import MEASURES_SESSION_PARAMS, JsonRpcMethod
|
|
5
|
+
from .credentials import QlikCredentials
|
|
5
6
|
from .error import AccessDeniedError, AppSizeExceededError
|
|
6
7
|
from .json_rpc import JsonRpcClient
|
|
7
8
|
from .websocket import open_websocket
|
|
@@ -49,10 +50,8 @@ class EngineApiClient:
|
|
|
49
50
|
get measures using JsonRpcClient and websocket connection.
|
|
50
51
|
"""
|
|
51
52
|
|
|
52
|
-
def __init__(self,
|
|
53
|
-
self.
|
|
54
|
-
self.api_key = api_key
|
|
55
|
-
|
|
53
|
+
def __init__(self, credentials: QlikCredentials):
|
|
54
|
+
self.credentials = credentials
|
|
56
55
|
self._safe_mode = SafeMode(
|
|
57
56
|
exceptions=(AccessDeniedError, AppSizeExceededError),
|
|
58
57
|
max_errors=float("inf"),
|
|
@@ -70,8 +69,8 @@ class EngineApiClient:
|
|
|
70
69
|
|
|
71
70
|
with open_websocket(
|
|
72
71
|
app_id=app_id,
|
|
73
|
-
server_url=self.
|
|
74
|
-
api_key=self.api_key,
|
|
72
|
+
server_url=self.credentials.base_url,
|
|
73
|
+
api_key=self.credentials.api_key,
|
|
75
74
|
) as websocket:
|
|
76
75
|
json_rpc_client = JsonRpcClient(websocket=websocket)
|
|
77
76
|
return _call(json_rpc_client, app_id)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from pydantic import Field, SecretStr, field_validator
|
|
2
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
3
|
+
|
|
4
|
+
from .....utils import validate_baseurl
|
|
5
|
+
|
|
6
|
+
QLIK_ENV_PREFIX = "CASTOR_QLIK_"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class QlikCredentials(BaseSettings):
|
|
10
|
+
"""
|
|
11
|
+
Qlik's credentials to connect to the API
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
model_config = SettingsConfigDict(
|
|
15
|
+
env_prefix=QLIK_ENV_PREFIX,
|
|
16
|
+
extra="ignore",
|
|
17
|
+
populate_by_name=True,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
api_key: str = Field(repr=False)
|
|
21
|
+
base_url: str
|
|
22
|
+
|
|
23
|
+
@field_validator("base_url", mode="before")
|
|
24
|
+
@classmethod
|
|
25
|
+
def _check_base_url(cls, base_url: str) -> str:
|
|
26
|
+
return validate_baseurl(base_url)
|
|
@@ -4,7 +4,7 @@ from tqdm import tqdm # type: ignore
|
|
|
4
4
|
|
|
5
5
|
from ..assets import QlikAsset
|
|
6
6
|
from .constants import APP_EXTERNAL_ID_KEY, SCOPED_ASSETS
|
|
7
|
-
from .engine import EngineApiClient
|
|
7
|
+
from .engine import EngineApiClient, QlikCredentials
|
|
8
8
|
from .rest import RestApiClient
|
|
9
9
|
|
|
10
10
|
ListedData = List[dict]
|
|
@@ -53,25 +53,19 @@ class QlikMasterClient:
|
|
|
53
53
|
|
|
54
54
|
def __init__(
|
|
55
55
|
self,
|
|
56
|
-
|
|
57
|
-
api_key: str,
|
|
56
|
+
credentials: QlikCredentials,
|
|
58
57
|
except_http_error_statuses: Optional[List[int]] = None,
|
|
59
58
|
display_progress: bool = True,
|
|
60
59
|
):
|
|
61
|
-
self._server_url =
|
|
62
|
-
self._api_key = api_key
|
|
60
|
+
self._server_url = credentials.base_url
|
|
63
61
|
self.display_progress = display_progress
|
|
64
62
|
|
|
65
63
|
self.rest_api_client = RestApiClient(
|
|
66
|
-
|
|
67
|
-
api_key=self._api_key,
|
|
64
|
+
credentials=credentials,
|
|
68
65
|
except_http_error_statuses=except_http_error_statuses,
|
|
69
66
|
)
|
|
70
67
|
|
|
71
|
-
self.engine_api_client = EngineApiClient(
|
|
72
|
-
server_url=self._server_url,
|
|
73
|
-
api_key=self._api_key,
|
|
74
|
-
)
|
|
68
|
+
self.engine_api_client = EngineApiClient(credentials=credentials)
|
|
75
69
|
|
|
76
70
|
def _fetch_lineage(self, apps: ListedData) -> ListedData:
|
|
77
71
|
callback = self.rest_api_client.data_lineage
|
|
@@ -16,6 +16,7 @@ from .constants import (
|
|
|
16
16
|
RETRY_COUNTS,
|
|
17
17
|
RETRY_STATUSES,
|
|
18
18
|
)
|
|
19
|
+
from .engine import QlikCredentials
|
|
19
20
|
|
|
20
21
|
logger = logging.getLogger(__name__)
|
|
21
22
|
|
|
@@ -60,12 +61,11 @@ class RestApiClient:
|
|
|
60
61
|
|
|
61
62
|
def __init__(
|
|
62
63
|
self,
|
|
63
|
-
|
|
64
|
-
api_key: str,
|
|
64
|
+
credentials: QlikCredentials,
|
|
65
65
|
except_http_error_statuses: Optional[List[int]] = None,
|
|
66
66
|
):
|
|
67
|
-
self._server_url =
|
|
68
|
-
self._api_key = api_key
|
|
67
|
+
self._server_url = credentials.base_url
|
|
68
|
+
self._api_key = credentials.api_key
|
|
69
69
|
self._session = _session()
|
|
70
70
|
self._except_http_error_statuses = except_http_error_statuses or []
|
|
71
71
|
self._authenticate()
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from typing import Optional
|
|
2
2
|
from unittest.mock import call, patch
|
|
3
3
|
|
|
4
|
+
from .engine import QlikCredentials
|
|
4
5
|
from .rest import RestApiClient
|
|
5
6
|
|
|
6
7
|
|
|
@@ -22,8 +23,11 @@ def _check_called_once(
|
|
|
22
23
|
def test_rest_api_client_pager():
|
|
23
24
|
dummy_server_url = "https://clic.kom"
|
|
24
25
|
dummy_api_key = "i-am-the-key-dont-let-others-know-about"
|
|
25
|
-
|
|
26
|
-
|
|
26
|
+
credentials = QlikCredentials(
|
|
27
|
+
base_url=dummy_server_url,
|
|
28
|
+
api_key=dummy_api_key,
|
|
29
|
+
)
|
|
30
|
+
client = RestApiClient(credentials=credentials)
|
|
27
31
|
|
|
28
32
|
first_page_url = "https://clic.kom/assets"
|
|
29
33
|
|
|
@@ -7,13 +7,11 @@ from ...utils import (
|
|
|
7
7
|
deep_serialize,
|
|
8
8
|
from_env,
|
|
9
9
|
get_output_filename,
|
|
10
|
-
validate_baseurl,
|
|
11
10
|
write_json,
|
|
12
11
|
write_summary,
|
|
13
12
|
)
|
|
14
13
|
from .assets import QlikAsset
|
|
15
|
-
from .client import QlikClient
|
|
16
|
-
from .constants import API_KEY, BASE_URL
|
|
14
|
+
from .client import QlikClient, QlikCredentials
|
|
17
15
|
|
|
18
16
|
logger = logging.getLogger(__name__)
|
|
19
17
|
|
|
@@ -49,23 +47,18 @@ def iterate_all_data(
|
|
|
49
47
|
|
|
50
48
|
|
|
51
49
|
def extract_all(
|
|
52
|
-
|
|
53
|
-
api_key: Optional[str] = None,
|
|
54
|
-
output_directory: Optional[str] = None,
|
|
55
|
-
except_http_error_statuses: Optional[List[int]] = None,
|
|
50
|
+
except_http_error_statuses: Optional[List[int]] = None, **kwargs
|
|
56
51
|
) -> None:
|
|
57
52
|
"""
|
|
58
53
|
Extract data from Qlik REST API
|
|
59
54
|
Store the output files locally under the given output_directory
|
|
60
55
|
"""
|
|
61
56
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
_api_key = api_key or from_env(API_KEY)
|
|
57
|
+
credentials = QlikCredentials(**kwargs)
|
|
58
|
+
_output_directory = kwargs.get("output") or from_env(OUTPUT_DIR)
|
|
65
59
|
|
|
66
60
|
client = QlikClient(
|
|
67
|
-
|
|
68
|
-
api_key=_api_key,
|
|
61
|
+
credentials=credentials,
|
|
69
62
|
except_http_error_statuses=except_http_error_statuses,
|
|
70
63
|
)
|
|
71
64
|
|
|
@@ -75,4 +68,4 @@ def extract_all(
|
|
|
75
68
|
filename = get_output_filename(key.name.lower(), _output_directory, ts)
|
|
76
69
|
write_json(filename, data)
|
|
77
70
|
|
|
78
|
-
write_summary(_output_directory, ts, base_url=
|
|
71
|
+
write_summary(_output_directory, ts, base_url=credentials.base_url)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Iterable,
|
|
2
|
+
from typing import Iterable, Tuple, Union
|
|
3
3
|
|
|
4
4
|
from ...utils import (
|
|
5
5
|
OUTPUT_DIR,
|
|
@@ -28,29 +28,15 @@ def iterate_all_data(
|
|
|
28
28
|
yield asset.name.lower(), deep_serialize(data)
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
def extract_all(
|
|
32
|
-
username: str,
|
|
33
|
-
password: str,
|
|
34
|
-
client_id: str,
|
|
35
|
-
client_secret: str,
|
|
36
|
-
security_token: str,
|
|
37
|
-
base_url: str,
|
|
38
|
-
output_directory: Optional[str] = None,
|
|
39
|
-
) -> None:
|
|
31
|
+
def extract_all(**kwargs) -> None:
|
|
40
32
|
"""
|
|
41
33
|
Extract data from Salesforce REST API
|
|
42
34
|
Store the output files locally under the given output_directory
|
|
43
35
|
"""
|
|
44
|
-
_output_directory =
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
client_id=client_id,
|
|
49
|
-
client_secret=client_secret,
|
|
50
|
-
security_token=security_token,
|
|
51
|
-
base_url=base_url,
|
|
52
|
-
)
|
|
53
|
-
client = SalesforceReportingClient(credentials=creds)
|
|
36
|
+
_output_directory = kwargs.get("output") or from_env(OUTPUT_DIR)
|
|
37
|
+
credentials = SalesforceCredentials(**kwargs)
|
|
38
|
+
|
|
39
|
+
client = SalesforceReportingClient(credentials=credentials)
|
|
54
40
|
ts = current_timestamp()
|
|
55
41
|
|
|
56
42
|
for key, data in iterate_all_data(client):
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
from .client import SigmaClient
|
|
2
|
-
from .credentials import
|
|
2
|
+
from .credentials import SigmaCredentials
|
|
@@ -5,7 +5,7 @@ from urllib.parse import urljoin
|
|
|
5
5
|
import requests
|
|
6
6
|
|
|
7
7
|
from ..assets import SigmaAsset
|
|
8
|
-
from .credentials import
|
|
8
|
+
from .credentials import SigmaCredentials
|
|
9
9
|
from .endpoints import EndpointFactory
|
|
10
10
|
from .pagination import Pagination
|
|
11
11
|
|
|
@@ -29,6 +29,7 @@ class SigmaClient:
|
|
|
29
29
|
self.host = credentials.host
|
|
30
30
|
self.client_id = credentials.client_id
|
|
31
31
|
self.api_token = credentials.api_token
|
|
32
|
+
self.grant_type = credentials.grant_type
|
|
32
33
|
self.headers: Optional[Dict[str, str]] = None
|
|
33
34
|
|
|
34
35
|
def _get_token(self) -> Dict[str, str]:
|
|
@@ -37,9 +38,9 @@ class SigmaClient:
|
|
|
37
38
|
token_response = requests.post( # noqa: S113
|
|
38
39
|
token_api_path,
|
|
39
40
|
data={
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
41
|
+
"grant_type": self.grant_type,
|
|
42
|
+
"client_id": self.client_id,
|
|
43
|
+
"client_secret": self.api_token,
|
|
43
44
|
},
|
|
44
45
|
)
|
|
45
46
|
if token_response.status_code != requests.codes.OK:
|
|
@@ -1,35 +1,19 @@
|
|
|
1
|
-
from
|
|
2
|
-
from
|
|
1
|
+
from pydantic import Field, SecretStr
|
|
2
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
3
3
|
|
|
4
|
+
CASTOR_ENV_PREFIX = "CASTOR_SIGMA_"
|
|
4
5
|
|
|
5
|
-
class CredentialsKey(Enum):
|
|
6
|
-
"""Value enum object for the credentials"""
|
|
7
6
|
|
|
8
|
-
|
|
9
|
-
CLIENT_ID = "client_id"
|
|
10
|
-
HOST = "host"
|
|
11
|
-
GRANT_TYPE = "grant_type"
|
|
12
|
-
API_TOKEN = "api_token" # noqa: S105
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
CLIENT_ALLOWED_KEYS = (
|
|
16
|
-
CredentialsKey.HOST.value,
|
|
17
|
-
CredentialsKey.CLIENT_ID.value,
|
|
18
|
-
CredentialsKey.API_TOKEN.value,
|
|
19
|
-
)
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
@dataclass
|
|
23
|
-
class SigmaCredentials:
|
|
7
|
+
class SigmaCredentials(BaseSettings):
|
|
24
8
|
"""Class to handle Sigma rest API permissions"""
|
|
25
9
|
|
|
26
|
-
|
|
10
|
+
model_config = SettingsConfigDict(
|
|
11
|
+
env_prefix=CASTOR_ENV_PREFIX,
|
|
12
|
+
extra="ignore",
|
|
13
|
+
populate_by_name=True,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
api_token: str = Field(repr=False)
|
|
27
17
|
client_id: str
|
|
28
18
|
host: str
|
|
29
|
-
|
|
30
|
-
@classmethod
|
|
31
|
-
def from_secret(cls, secret: dict) -> "SigmaCredentials":
|
|
32
|
-
credentials = {
|
|
33
|
-
k: v for k, v in secret.items() if k in CLIENT_ALLOWED_KEYS
|
|
34
|
-
}
|
|
35
|
-
return cls(**credentials)
|
|
19
|
+
grant_type: str = "client_credentials"
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Iterable, Iterator,
|
|
2
|
+
from typing import Iterable, Iterator, Tuple, Union
|
|
3
3
|
|
|
4
4
|
from ...utils import (
|
|
5
5
|
OUTPUT_DIR,
|
|
@@ -12,7 +12,6 @@ from ...utils import (
|
|
|
12
12
|
)
|
|
13
13
|
from .assets import SigmaAsset
|
|
14
14
|
from .client import SigmaClient, SigmaCredentials
|
|
15
|
-
from .constants import API_TOKEN, CLIENT_ID, HOST
|
|
16
15
|
|
|
17
16
|
logger = logging.getLogger(__name__)
|
|
18
17
|
|
|
@@ -51,27 +50,15 @@ def iterate_all_data(
|
|
|
51
50
|
yield SigmaAsset.LINEAGES, list(deep_serialize(lineages))
|
|
52
51
|
|
|
53
52
|
|
|
54
|
-
def extract_all(
|
|
55
|
-
host: Optional[str] = None,
|
|
56
|
-
client_id: Optional[str] = None,
|
|
57
|
-
api_token: Optional[str] = None,
|
|
58
|
-
output_directory: Optional[str] = None,
|
|
59
|
-
) -> None:
|
|
53
|
+
def extract_all(**kwargs) -> None:
|
|
60
54
|
"""
|
|
61
55
|
Extract data from Sigma API
|
|
62
56
|
Store the output files locally under the given output_directory
|
|
63
57
|
"""
|
|
64
58
|
|
|
65
|
-
_output_directory =
|
|
66
|
-
_client_id = client_id or from_env(CLIENT_ID)
|
|
67
|
-
_host = host or from_env(HOST)
|
|
68
|
-
_api_token = api_token or from_env(API_TOKEN)
|
|
59
|
+
_output_directory = kwargs.get("output") or from_env(OUTPUT_DIR)
|
|
69
60
|
|
|
70
|
-
credentials = SigmaCredentials(
|
|
71
|
-
host=_host,
|
|
72
|
-
client_id=_client_id,
|
|
73
|
-
api_token=_api_token,
|
|
74
|
-
)
|
|
61
|
+
credentials = SigmaCredentials(**kwargs)
|
|
75
62
|
client = SigmaClient(credentials=credentials)
|
|
76
63
|
|
|
77
64
|
ts = current_timestamp()
|
|
@@ -80,4 +67,4 @@ def extract_all(
|
|
|
80
67
|
filename = get_output_filename(key.name.lower(), _output_directory, ts)
|
|
81
68
|
write_json(filename, data)
|
|
82
69
|
|
|
83
|
-
write_summary(_output_directory, ts, host=
|
|
70
|
+
write_summary(_output_directory, ts, host=credentials.host)
|
|
@@ -1,104 +1,57 @@
|
|
|
1
|
-
from
|
|
2
|
-
from typing import Dict, Literal, Optional, overload
|
|
1
|
+
from typing import Optional
|
|
3
2
|
|
|
4
|
-
from
|
|
5
|
-
|
|
6
|
-
_AUTH_ERROR_MSG = "Need either user and password or token_name and token"
|
|
3
|
+
from pydantic import field_validator, model_validator
|
|
4
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
7
5
|
|
|
8
6
|
# To specify the default site on Tableau Server, you can use an empty string
|
|
9
7
|
# https://tableau.github.io/server-client-python/docs/api-ref#authentication
|
|
10
8
|
_DEFAULT_SERVER_SITE_ID = ""
|
|
11
9
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
TABLEAU_USER = "user"
|
|
17
|
-
TABLEAU_PASSWORD = "password" # noqa: S105
|
|
18
|
-
TABLEAU_TOKEN_NAME = "token_name" # noqa: S105
|
|
19
|
-
TABLEAU_TOKEN = "token" # noqa: S105
|
|
20
|
-
TABLEAU_SITE_ID = "site_id"
|
|
21
|
-
TABLEAU_SERVER_URL = "server_url"
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
CREDENTIALS_ENV: Dict[CredentialsKey, str] = {
|
|
25
|
-
CredentialsKey.TABLEAU_USER: "CASTOR_TABLEAU_USER",
|
|
26
|
-
CredentialsKey.TABLEAU_PASSWORD: "CASTOR_TABLEAU_PASSWORD",
|
|
27
|
-
CredentialsKey.TABLEAU_TOKEN_NAME: "CASTOR_TABLEAU_TOKEN_NAME",
|
|
28
|
-
CredentialsKey.TABLEAU_TOKEN: "CASTOR_TABLEAU_TOKEN",
|
|
29
|
-
CredentialsKey.TABLEAU_SITE_ID: "CASTOR_TABLEAU_SITE_ID",
|
|
30
|
-
CredentialsKey.TABLEAU_SERVER_URL: "CASTOR_TABLEAU_SERVER_URL",
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
@overload
|
|
35
|
-
def get_value(key: CredentialsKey, kwargs: dict) -> Optional[str]: ...
|
|
10
|
+
# In Castor APP, site_id is mandatory: users can't let this field empty
|
|
11
|
+
# In that case, we encourage users to write "Default" instead
|
|
12
|
+
_DEFAULT_SITE_ID_USER_INPUT = "default"
|
|
36
13
|
|
|
37
14
|
|
|
38
|
-
|
|
39
|
-
def get_value(
|
|
40
|
-
key: CredentialsKey, kwargs: dict, optional: Literal[True]
|
|
41
|
-
) -> Optional[str]: ...
|
|
15
|
+
TABLEAU_ENV_PREFIX = "CASTOR_TABLEAU_"
|
|
42
16
|
|
|
43
17
|
|
|
44
|
-
|
|
45
|
-
def get_value(
|
|
46
|
-
key: CredentialsKey, kwargs: dict, optional: Literal[False]
|
|
47
|
-
) -> str: ...
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def get_value(
|
|
51
|
-
key: CredentialsKey,
|
|
52
|
-
kwargs: dict,
|
|
53
|
-
optional: bool = True,
|
|
54
|
-
) -> Optional[str]:
|
|
18
|
+
class TableauRevampCredentials(BaseSettings):
|
|
55
19
|
"""
|
|
56
|
-
|
|
57
|
-
- from kwargs in priority
|
|
58
|
-
- from ENV otherwise
|
|
59
|
-
Raises an error if not found (unless optional)
|
|
20
|
+
Tableau's credentials to connect to both APIs (REST and GRAPHQL)
|
|
60
21
|
"""
|
|
61
22
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
23
|
+
model_config = SettingsConfigDict(
|
|
24
|
+
env_prefix=TABLEAU_ENV_PREFIX,
|
|
25
|
+
extra="ignore",
|
|
26
|
+
populate_by_name=True,
|
|
27
|
+
)
|
|
67
28
|
|
|
29
|
+
server_url: str
|
|
30
|
+
site_id: str = ""
|
|
68
31
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
def __init__(
|
|
75
|
-
self,
|
|
76
|
-
*,
|
|
77
|
-
server_url: str,
|
|
78
|
-
site_id: Optional[str],
|
|
79
|
-
user: Optional[str],
|
|
80
|
-
password: Optional[str],
|
|
81
|
-
token_name: Optional[str],
|
|
82
|
-
token: Optional[str],
|
|
83
|
-
):
|
|
84
|
-
self.user = user
|
|
85
|
-
self.site_id = site_id or _DEFAULT_SERVER_SITE_ID
|
|
86
|
-
self.server_url = server_url
|
|
87
|
-
self.password = password
|
|
88
|
-
self.token_name = token_name
|
|
89
|
-
self.token = token
|
|
32
|
+
password: Optional[str] = None
|
|
33
|
+
token: Optional[str] = None
|
|
34
|
+
token_name: Optional[str] = None
|
|
35
|
+
user: Optional[str] = None
|
|
90
36
|
|
|
37
|
+
@field_validator("site_id", mode="before")
|
|
91
38
|
@classmethod
|
|
92
|
-
def
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
39
|
+
def _check_site_id(cls, site_id: Optional[str]) -> str:
|
|
40
|
+
if not site_id or site_id.lower() == _DEFAULT_SITE_ID_USER_INPUT:
|
|
41
|
+
return _DEFAULT_SERVER_SITE_ID
|
|
42
|
+
return site_id
|
|
43
|
+
|
|
44
|
+
@model_validator(mode="after")
|
|
45
|
+
def _check_user_xor_pat_login(self) -> "TableauRevampCredentials":
|
|
46
|
+
"""
|
|
47
|
+
Checks that credentials are correctly input, it means either:
|
|
48
|
+
- User and password are filled
|
|
49
|
+
- Token and Token name are filled
|
|
50
|
+
"""
|
|
51
|
+
user_login = self.password and self.user
|
|
52
|
+
pat_login = self.token_name and self.token
|
|
53
|
+
if not user_login and not pat_login:
|
|
54
|
+
raise ValueError("Either token or user identification is required")
|
|
55
|
+
if user_login and pat_login:
|
|
56
|
+
raise ValueError("Can't have both token and user identification")
|
|
57
|
+
return self
|
|
@@ -28,6 +28,7 @@ from .utils import build_path, tag_label
|
|
|
28
28
|
|
|
29
29
|
logger = logging.getLogger(__name__)
|
|
30
30
|
|
|
31
|
+
_DATABRICKS_CLIENT_TIMEOUT = 60
|
|
31
32
|
_MAX_NUMBER_OF_LINEAGE_ERRORS = 1000
|
|
32
33
|
_MAX_THREADS = 10
|
|
33
34
|
_RETRY_ATTEMPTS = 3
|
|
@@ -98,6 +99,8 @@ class DatabricksClient(APIClient):
|
|
|
98
99
|
self._db_blocked = db_blocked
|
|
99
100
|
self._has_table_tags = has_table_tags
|
|
100
101
|
self._has_column_tags = has_column_tags
|
|
102
|
+
|
|
103
|
+
self._timeout = _DATABRICKS_CLIENT_TIMEOUT
|
|
101
104
|
self.formatter = DatabricksFormatter()
|
|
102
105
|
|
|
103
106
|
def execute_sql(
|
|
@@ -28,7 +28,6 @@ information_tables AS (
|
|
|
28
28
|
i.table_id || '.' || c.column_name AS column_id,
|
|
29
29
|
c.data_type,
|
|
30
30
|
c.ordinal_position,
|
|
31
|
-
c.column_default,
|
|
32
31
|
c.is_nullable,
|
|
33
32
|
c.character_maximum_length,
|
|
34
33
|
c.character_octet_length,
|
|
@@ -59,7 +58,6 @@ raw_tables AS (
|
|
|
59
58
|
a.attname AS column_name,
|
|
60
59
|
c.oid::TEXT || '.' || a.attname AS column_id,
|
|
61
60
|
a.attnum AS ordinal_position,
|
|
62
|
-
ad.adsrc AS column_default,
|
|
63
61
|
CASE
|
|
64
62
|
WHEN t.typname = 'bpchar' THEN 'char'
|
|
65
63
|
ELSE t.typname
|
|
@@ -90,7 +88,6 @@ tables AS (
|
|
|
90
88
|
COALESCE(i.data_type, r.data_type) AS data_type,
|
|
91
89
|
COALESCE(i.ordinal_position, r.ordinal_position) AS ordinal_position,
|
|
92
90
|
COALESCE(i.is_nullable, r.is_nullable) AS is_nullable,
|
|
93
|
-
COALESCE(i.column_default, r.column_default) AS column_default,
|
|
94
91
|
i.character_maximum_length::INT AS character_maximum_length,
|
|
95
92
|
i.character_octet_length::INT AS character_octet_length,
|
|
96
93
|
i.numeric_precision::INT AS numeric_precision,
|
|
@@ -117,7 +114,6 @@ views_late_binding AS (
|
|
|
117
114
|
c.data_type,
|
|
118
115
|
c.ordinal_position,
|
|
119
116
|
'YES' AS is_nullable,
|
|
120
|
-
NULL::TEXT AS column_default,
|
|
121
117
|
NULL::INT AS character_maximum_length,
|
|
122
118
|
NULL::INT AS character_octet_length,
|
|
123
119
|
NULL::INT AS numeric_precision,
|
|
@@ -162,7 +158,6 @@ external_columns AS (
|
|
|
162
158
|
c.external_type AS data_type,
|
|
163
159
|
MIN(c.columnnum) AS ordinal_position,
|
|
164
160
|
CASE c.is_nullable WHEN 'false' THEN 'NO' ELSE 'YES' END AS is_nullable,
|
|
165
|
-
NULL AS column_default,
|
|
166
161
|
NULL AS character_maximum_length,
|
|
167
162
|
NULL AS character_octet_length,
|
|
168
163
|
NULL AS numeric_precision,
|