castor-extractor 0.17.4__py3-none-any.whl → 0.18.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +28 -0
- DockerfileUsage.md +21 -0
- castor_extractor/commands/extract_domo.py +2 -10
- castor_extractor/commands/extract_looker.py +2 -13
- castor_extractor/commands/extract_metabase_api.py +5 -10
- castor_extractor/commands/extract_metabase_db.py +6 -16
- castor_extractor/commands/extract_mode.py +2 -13
- castor_extractor/commands/extract_powerbi.py +2 -8
- castor_extractor/commands/extract_qlik.py +2 -7
- castor_extractor/commands/extract_salesforce.py +3 -12
- castor_extractor/commands/extract_salesforce_reporting.py +2 -10
- castor_extractor/commands/extract_sigma.py +2 -7
- castor_extractor/utils/__init__.py +3 -1
- castor_extractor/utils/argument_parser.py +7 -0
- castor_extractor/utils/argument_parser_test.py +25 -0
- castor_extractor/utils/collection.py +8 -0
- castor_extractor/utils/safe_request.py +57 -0
- castor_extractor/utils/safe_request_test.py +77 -0
- castor_extractor/utils/salesforce/__init__.py +1 -2
- castor_extractor/utils/salesforce/constants.py +0 -11
- castor_extractor/utils/salesforce/credentials.py +22 -45
- castor_extractor/visualization/domo/__init__.py +1 -1
- castor_extractor/visualization/domo/client/__init__.py +1 -1
- castor_extractor/visualization/domo/client/client.py +37 -52
- castor_extractor/visualization/domo/client/credentials.py +14 -27
- castor_extractor/visualization/domo/extract.py +5 -26
- castor_extractor/visualization/looker/__init__.py +6 -1
- castor_extractor/visualization/looker/api/__init__.py +2 -1
- castor_extractor/visualization/looker/api/client.py +6 -4
- castor_extractor/visualization/looker/api/client_test.py +5 -3
- castor_extractor/visualization/looker/api/credentials.py +33 -0
- castor_extractor/visualization/looker/api/extraction_parameters.py +38 -0
- castor_extractor/visualization/looker/api/sdk.py +2 -28
- castor_extractor/visualization/looker/constant.py +2 -27
- castor_extractor/visualization/looker/constants.py +17 -0
- castor_extractor/visualization/looker/extract.py +29 -29
- castor_extractor/visualization/metabase/__init__.py +6 -1
- castor_extractor/visualization/metabase/client/__init__.py +2 -2
- castor_extractor/visualization/metabase/client/api/__init__.py +1 -0
- castor_extractor/visualization/metabase/client/api/client.py +8 -14
- castor_extractor/visualization/metabase/client/api/credentials.py +13 -40
- castor_extractor/visualization/metabase/client/db/__init__.py +1 -0
- castor_extractor/visualization/metabase/client/db/client.py +13 -34
- castor_extractor/visualization/metabase/client/db/credentials.py +19 -73
- castor_extractor/visualization/metabase/errors.py +5 -3
- castor_extractor/visualization/metabase/extract.py +3 -3
- castor_extractor/visualization/mode/__init__.py +1 -1
- castor_extractor/visualization/mode/client/__init__.py +1 -0
- castor_extractor/visualization/mode/client/client.py +9 -12
- castor_extractor/visualization/mode/client/client_test.py +3 -3
- castor_extractor/visualization/mode/client/credentials.py +18 -51
- castor_extractor/visualization/mode/extract.py +6 -3
- castor_extractor/visualization/powerbi/__init__.py +1 -1
- castor_extractor/visualization/powerbi/client/__init__.py +2 -1
- castor_extractor/visualization/powerbi/client/credentials.py +17 -9
- castor_extractor/visualization/powerbi/client/credentials_test.py +12 -4
- castor_extractor/visualization/powerbi/client/rest.py +2 -2
- castor_extractor/visualization/powerbi/client/rest_test.py +2 -2
- castor_extractor/visualization/powerbi/extract.py +5 -16
- castor_extractor/visualization/qlik/__init__.py +5 -1
- castor_extractor/visualization/qlik/client/__init__.py +1 -0
- castor_extractor/visualization/qlik/client/engine/__init__.py +1 -0
- castor_extractor/visualization/qlik/client/engine/client.py +5 -6
- castor_extractor/visualization/qlik/client/engine/credentials.py +26 -0
- castor_extractor/visualization/qlik/client/master.py +5 -11
- castor_extractor/visualization/qlik/client/rest.py +4 -4
- castor_extractor/visualization/qlik/client/rest_test.py +6 -2
- castor_extractor/visualization/qlik/extract.py +6 -13
- castor_extractor/visualization/salesforce_reporting/extract.py +6 -20
- castor_extractor/visualization/sigma/__init__.py +1 -1
- castor_extractor/visualization/sigma/client/__init__.py +1 -1
- castor_extractor/visualization/sigma/client/client.py +5 -4
- castor_extractor/visualization/sigma/client/credentials.py +12 -28
- castor_extractor/visualization/sigma/extract.py +5 -18
- castor_extractor/visualization/tableau_revamp/client/credentials.py +40 -87
- castor_extractor/warehouse/databricks/client.py +3 -0
- castor_extractor/warehouse/redshift/queries/column.sql +0 -5
- castor_extractor/warehouse/salesforce/extract.py +2 -2
- castor_extractor/warehouse/salesforce/format.py +5 -3
- castor_extractor/warehouse/snowflake/queries/column.sql +0 -1
- castor_extractor/warehouse/synapse/queries/column.sql +0 -1
- {castor_extractor-0.17.4.dist-info → castor_extractor-0.18.5.dist-info}/METADATA +9 -9
- {castor_extractor-0.17.4.dist-info → castor_extractor-0.18.5.dist-info}/RECORD +86 -83
- castor_extractor/visualization/domo/client/client_test.py +0 -60
- castor_extractor/visualization/domo/constants.py +0 -6
- castor_extractor/visualization/looker/env.py +0 -48
- castor_extractor/visualization/looker/parameters.py +0 -78
- castor_extractor/visualization/qlik/constants.py +0 -3
- castor_extractor/visualization/sigma/constants.py +0 -4
- {castor_extractor-0.17.4.dist-info → castor_extractor-0.18.5.dist-info}/LICENCE +0 -0
- {castor_extractor-0.17.4.dist-info → castor_extractor-0.18.5.dist-info}/WHEEL +0 -0
- {castor_extractor-0.17.4.dist-info → castor_extractor-0.18.5.dist-info}/entry_points.txt +0 -0
|
@@ -1,11 +1,21 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from datetime import datetime, timedelta
|
|
3
3
|
from http import HTTPStatus
|
|
4
|
-
from typing import Iterator, List, Optional, Set
|
|
4
|
+
from typing import Iterator, List, Optional, Set
|
|
5
5
|
|
|
6
6
|
import requests
|
|
7
7
|
|
|
8
|
-
from ....utils import
|
|
8
|
+
from ....utils import (
|
|
9
|
+
RequestSafeMode,
|
|
10
|
+
ResponseJson,
|
|
11
|
+
at_midnight,
|
|
12
|
+
current_date,
|
|
13
|
+
empty_iterator,
|
|
14
|
+
handle_response,
|
|
15
|
+
past_date,
|
|
16
|
+
retry,
|
|
17
|
+
timestamp_ms,
|
|
18
|
+
)
|
|
9
19
|
from ..assets import DomoAsset
|
|
10
20
|
from .credentials import DomoCredentials
|
|
11
21
|
from .endpoints import Endpoint, EndpointFactory
|
|
@@ -17,11 +27,17 @@ DOMO_PUBLIC_URL = "https://api.domo.com"
|
|
|
17
27
|
DEFAULT_TIMEOUT = 120
|
|
18
28
|
TOKEN_EXPIRATION_SECONDS = timedelta(seconds=3000) # auth token lasts 1 hour
|
|
19
29
|
|
|
30
|
+
|
|
31
|
+
# Safe Mode
|
|
32
|
+
VOLUME_IGNORED = 10
|
|
20
33
|
IGNORED_ERROR_CODES = (
|
|
21
34
|
HTTPStatus.BAD_REQUEST,
|
|
22
35
|
HTTPStatus.NOT_FOUND,
|
|
23
36
|
)
|
|
24
|
-
|
|
37
|
+
DOMO_SAFE_MODE = RequestSafeMode(
|
|
38
|
+
max_errors=VOLUME_IGNORED,
|
|
39
|
+
status_codes=IGNORED_ERROR_CODES,
|
|
40
|
+
)
|
|
25
41
|
|
|
26
42
|
_RETRY_EXCEPTIONS = [
|
|
27
43
|
requests.exceptions.ConnectTimeout,
|
|
@@ -33,40 +49,17 @@ _RETRY_BASE_MS = 10 * 60 * 1000 # 10 minutes
|
|
|
33
49
|
logger = logging.getLogger(__name__)
|
|
34
50
|
|
|
35
51
|
|
|
36
|
-
def _handle_response(response: requests.Response) -> requests.Response:
|
|
37
|
-
response.raise_for_status()
|
|
38
|
-
return response
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
def _ignore_or_raise(
|
|
42
|
-
error: requests.RequestException,
|
|
43
|
-
ignore_error_codes: Optional[Tuple[int, ...]],
|
|
44
|
-
) -> dict:
|
|
45
|
-
"""
|
|
46
|
-
Raises the error unless the response status code is in the ignored error
|
|
47
|
-
codes list.
|
|
48
|
-
"""
|
|
49
|
-
if not ignore_error_codes:
|
|
50
|
-
raise error
|
|
51
|
-
|
|
52
|
-
response = error.response
|
|
53
|
-
if response is None:
|
|
54
|
-
raise error
|
|
55
|
-
|
|
56
|
-
if response.status_code in ignore_error_codes:
|
|
57
|
-
logger.warning(error)
|
|
58
|
-
return {}
|
|
59
|
-
|
|
60
|
-
raise error
|
|
61
|
-
|
|
62
|
-
|
|
63
52
|
class DomoClient:
|
|
64
53
|
"""
|
|
65
54
|
Connect to Domo API and fetch main assets.
|
|
66
55
|
https://developer.domo.com/portal/8ba9aedad3679-ap-is#platform-oauth-apis
|
|
67
56
|
"""
|
|
68
57
|
|
|
69
|
-
def __init__(
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
credentials: DomoCredentials,
|
|
61
|
+
safe_mode: Optional[RequestSafeMode] = None,
|
|
62
|
+
):
|
|
70
63
|
self._authentication = credentials.authentication
|
|
71
64
|
self._bearer_headers: Optional[dict] = None
|
|
72
65
|
self._session = requests.session()
|
|
@@ -76,6 +69,7 @@ class DomoClient:
|
|
|
76
69
|
self._timeout = DEFAULT_TIMEOUT
|
|
77
70
|
self.base_url = credentials.base_url
|
|
78
71
|
self.cloud_id = credentials.cloud_id
|
|
72
|
+
self.safe_mode = safe_mode or DOMO_SAFE_MODE
|
|
79
73
|
|
|
80
74
|
def _token_expired(self) -> bool:
|
|
81
75
|
token_lifetime = datetime.now() - self._token_creation_time
|
|
@@ -95,7 +89,8 @@ class DomoClient:
|
|
|
95
89
|
auth=basic_authentication,
|
|
96
90
|
timeout=self._timeout,
|
|
97
91
|
)
|
|
98
|
-
|
|
92
|
+
response.raise_for_status()
|
|
93
|
+
result = response.json()
|
|
99
94
|
|
|
100
95
|
bearer_token = result["access_token"]
|
|
101
96
|
self._bearer_headers = {"authorization": f"Bearer {bearer_token}"}
|
|
@@ -113,7 +108,7 @@ class DomoClient:
|
|
|
113
108
|
endpoint: Endpoint,
|
|
114
109
|
params: Optional[dict] = None,
|
|
115
110
|
asset_id: Optional[str] = None,
|
|
116
|
-
) ->
|
|
111
|
+
) -> ResponseJson:
|
|
117
112
|
params = params if params else {}
|
|
118
113
|
is_private = endpoint.is_private
|
|
119
114
|
headers = self._private_headers if is_private else self._bearer_auth()
|
|
@@ -125,27 +120,16 @@ class DomoClient:
|
|
|
125
120
|
timeout=self._timeout,
|
|
126
121
|
)
|
|
127
122
|
|
|
128
|
-
|
|
129
|
-
logger.warning(
|
|
130
|
-
ERROR_TPL.format(
|
|
131
|
-
status_code=response.status_code,
|
|
132
|
-
reason=response.reason,
|
|
133
|
-
)
|
|
134
|
-
)
|
|
135
|
-
return response
|
|
123
|
+
return handle_response(response, self.safe_mode)
|
|
136
124
|
|
|
137
125
|
def _get_element(
|
|
138
126
|
self,
|
|
139
127
|
endpoint: Endpoint,
|
|
140
128
|
params: Optional[dict] = None,
|
|
141
129
|
asset_id: Optional[str] = None,
|
|
142
|
-
ignore_error_codes: Optional[Tuple[int, ...]] = None,
|
|
143
130
|
) -> dict:
|
|
144
131
|
"""Used when the response only contains one element"""
|
|
145
|
-
|
|
146
|
-
return self._get(endpoint, params, asset_id).json()
|
|
147
|
-
except requests.RequestException as error:
|
|
148
|
-
return _ignore_or_raise(error, ignore_error_codes)
|
|
132
|
+
return self._get(endpoint, params, asset_id)
|
|
149
133
|
|
|
150
134
|
def _get_many(
|
|
151
135
|
self,
|
|
@@ -154,7 +138,7 @@ class DomoClient:
|
|
|
154
138
|
asset_id: Optional[str] = None,
|
|
155
139
|
) -> List[dict]:
|
|
156
140
|
"""Used when the response contains multiple elements"""
|
|
157
|
-
return self._get(endpoint, params, asset_id)
|
|
141
|
+
return self._get(endpoint, params, asset_id)
|
|
158
142
|
|
|
159
143
|
def _get_paginated(self, endpoint: Endpoint) -> List[dict]:
|
|
160
144
|
"""Used when the response is paginated and need iterations"""
|
|
@@ -172,10 +156,7 @@ class DomoClient:
|
|
|
172
156
|
|
|
173
157
|
def _datasources(self, page_id: str) -> RawData:
|
|
174
158
|
endpoint = self._endpoint_factory.page_content(page_id)
|
|
175
|
-
page_content = self._get_element(
|
|
176
|
-
endpoint,
|
|
177
|
-
ignore_error_codes=IGNORED_ERROR_CODES,
|
|
178
|
-
)
|
|
159
|
+
page_content = self._get_element(endpoint)
|
|
179
160
|
processed: set[str] = set()
|
|
180
161
|
for card in page_content.get("cards", []):
|
|
181
162
|
for datasource in card["datasources"]:
|
|
@@ -196,7 +177,7 @@ class DomoClient:
|
|
|
196
177
|
) -> Iterator[dict]:
|
|
197
178
|
"""Recursively fetch pages while building the folder architecture"""
|
|
198
179
|
if not page_tree:
|
|
199
|
-
return
|
|
180
|
+
return empty_iterator()
|
|
200
181
|
|
|
201
182
|
for page in page_tree:
|
|
202
183
|
page_id = page.get("id")
|
|
@@ -211,6 +192,10 @@ class DomoClient:
|
|
|
211
192
|
self._endpoint_factory.pages,
|
|
212
193
|
asset_id=page_id,
|
|
213
194
|
)
|
|
195
|
+
|
|
196
|
+
if not detail:
|
|
197
|
+
continue
|
|
198
|
+
|
|
214
199
|
datasources = self._datasources(page_id)
|
|
215
200
|
yield {
|
|
216
201
|
**detail,
|
|
@@ -1,39 +1,26 @@
|
|
|
1
|
-
from
|
|
2
|
-
from enum import Enum
|
|
3
|
-
from typing import Dict, Optional, Tuple
|
|
1
|
+
from typing import Dict, Optional
|
|
4
2
|
|
|
3
|
+
from pydantic import Field, SecretStr
|
|
4
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
5
5
|
from requests.auth import HTTPBasicAuth
|
|
6
6
|
|
|
7
|
+
DOMO_ENV_PREFIX = "CASTOR_DOMO_"
|
|
7
8
|
|
|
8
|
-
class CredentialsKey(Enum):
|
|
9
|
-
"""Value enum object for the credentials"""
|
|
10
9
|
|
|
11
|
-
|
|
12
|
-
BASE_URL = "base_url"
|
|
13
|
-
CLIENT_ID = "client_id"
|
|
14
|
-
CLOUD_ID = "cloud_id"
|
|
15
|
-
DEVELOPER_TOKEN = "developer_token" # noqa: S105
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
CLIENT_ALLOWED_KEYS: Tuple[str, ...] = tuple(c.value for c in CredentialsKey)
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
@dataclass
|
|
22
|
-
class DomoCredentials:
|
|
10
|
+
class DomoCredentials(BaseSettings):
|
|
23
11
|
"""Class to handle Domo rest API permissions"""
|
|
24
12
|
|
|
25
|
-
|
|
13
|
+
model_config = SettingsConfigDict(
|
|
14
|
+
env_prefix=DOMO_ENV_PREFIX,
|
|
15
|
+
extra="ignore",
|
|
16
|
+
populate_by_name=True,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
api_token: str = Field(repr=False)
|
|
26
20
|
base_url: str
|
|
27
21
|
client_id: str
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
@classmethod
|
|
32
|
-
def from_secret(cls, secret: dict) -> "DomoCredentials":
|
|
33
|
-
credentials = {
|
|
34
|
-
k: v for k, v in secret.items() if k in CLIENT_ALLOWED_KEYS
|
|
35
|
-
}
|
|
36
|
-
return cls(**credentials)
|
|
22
|
+
cloud_id: Optional[str] = Field(validation_alias="CLOUD_ID", default=None)
|
|
23
|
+
developer_token: str = Field(repr=False)
|
|
37
24
|
|
|
38
25
|
@property
|
|
39
26
|
def authentication(self) -> HTTPBasicAuth:
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Iterable, Iterator,
|
|
2
|
+
from typing import Iterable, Iterator, Tuple, Union
|
|
3
3
|
|
|
4
4
|
from ...utils import (
|
|
5
5
|
OUTPUT_DIR,
|
|
@@ -12,7 +12,6 @@ from ...utils import (
|
|
|
12
12
|
)
|
|
13
13
|
from .assets import DomoAsset
|
|
14
14
|
from .client import DomoClient, DomoCredentials
|
|
15
|
-
from .constants import API_TOKEN, BASE_URL, CLIENT_ID, CLOUD_ID, DEVELOPER_TOKEN
|
|
16
15
|
|
|
17
16
|
logger = logging.getLogger(__name__)
|
|
18
17
|
|
|
@@ -43,33 +42,13 @@ def iterate_all_data(
|
|
|
43
42
|
yield DomoAsset.DATAFLOWS, list(deep_serialize(dataflows))
|
|
44
43
|
|
|
45
44
|
|
|
46
|
-
def extract_all(
|
|
47
|
-
api_token: Optional[str] = None,
|
|
48
|
-
base_url: Optional[str] = None,
|
|
49
|
-
client_id: Optional[str] = None,
|
|
50
|
-
cloud_id: Optional[str] = None,
|
|
51
|
-
developer_token: Optional[str] = None,
|
|
52
|
-
output_directory: Optional[str] = None,
|
|
53
|
-
) -> None:
|
|
45
|
+
def extract_all(**kwargs) -> None:
|
|
54
46
|
"""
|
|
55
47
|
Extract data from Domo API
|
|
56
48
|
Store the output files locally under the given output_directory
|
|
57
49
|
"""
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
_client_id = client_id or from_env(CLIENT_ID)
|
|
61
|
-
_base_url = base_url or from_env(BASE_URL)
|
|
62
|
-
_api_token = api_token or from_env(API_TOKEN)
|
|
63
|
-
_developer_token = developer_token or from_env(DEVELOPER_TOKEN)
|
|
64
|
-
_cloud_id = cloud_id or from_env(CLOUD_ID)
|
|
65
|
-
|
|
66
|
-
credentials = DomoCredentials(
|
|
67
|
-
base_url=_base_url,
|
|
68
|
-
client_id=_client_id,
|
|
69
|
-
api_token=_api_token,
|
|
70
|
-
developer_token=_developer_token,
|
|
71
|
-
cloud_id=_cloud_id,
|
|
72
|
-
)
|
|
50
|
+
_output_directory = kwargs.get("output") or from_env(OUTPUT_DIR)
|
|
51
|
+
credentials = DomoCredentials(**kwargs)
|
|
73
52
|
client = DomoClient(credentials=credentials)
|
|
74
53
|
|
|
75
54
|
ts = current_timestamp()
|
|
@@ -78,4 +57,4 @@ def extract_all(
|
|
|
78
57
|
filename = get_output_filename(key.name.lower(), _output_directory, ts)
|
|
79
58
|
write_json(filename, data)
|
|
80
59
|
|
|
81
|
-
write_summary(_output_directory, ts, base_url=
|
|
60
|
+
write_summary(_output_directory, ts, base_url=credentials.base_url)
|
|
@@ -22,7 +22,7 @@ from looker_sdk.sdk.api40.models import (
|
|
|
22
22
|
from looker_sdk.sdk.constants import sdk_version
|
|
23
23
|
|
|
24
24
|
from ....utils import Pager, PagerLogger, SafeMode, past_date, safe_mode
|
|
25
|
-
from ..
|
|
25
|
+
from ..constants import DEFAULT_LOOKER_PAGE_SIZE
|
|
26
26
|
from ..fields import format_fields
|
|
27
27
|
from .constants import (
|
|
28
28
|
CONNECTION_FIELDS,
|
|
@@ -39,7 +39,8 @@ from .constants import (
|
|
|
39
39
|
USER_FIELDS,
|
|
40
40
|
USERS_ATTRIBUTES_FIELDS,
|
|
41
41
|
)
|
|
42
|
-
from .
|
|
42
|
+
from .credentials import LookerCredentials
|
|
43
|
+
from .sdk import CastorApiSettings, has_admin_permissions
|
|
43
44
|
|
|
44
45
|
logger = logging.getLogger(__name__)
|
|
45
46
|
|
|
@@ -78,9 +79,10 @@ class ApiClient:
|
|
|
78
79
|
|
|
79
80
|
def __init__(
|
|
80
81
|
self,
|
|
81
|
-
credentials:
|
|
82
|
+
credentials: LookerCredentials,
|
|
82
83
|
on_api_call: OnApiCall = lambda: None,
|
|
83
84
|
safe_mode: Optional[SafeMode] = None,
|
|
85
|
+
page_size: int = DEFAULT_LOOKER_PAGE_SIZE,
|
|
84
86
|
):
|
|
85
87
|
settings = CastorApiSettings(
|
|
86
88
|
credentials=credentials, sdk_version=sdk_version
|
|
@@ -92,7 +94,7 @@ class ApiClient:
|
|
|
92
94
|
self._sdk = sdk
|
|
93
95
|
self._on_api_call = on_api_call
|
|
94
96
|
self._logger = ApiPagerLogger(on_api_call)
|
|
95
|
-
self.per_page = page_size
|
|
97
|
+
self.per_page = page_size
|
|
96
98
|
self._safe_mode = safe_mode
|
|
97
99
|
|
|
98
100
|
def folders(self) -> List[Folder]:
|
|
@@ -2,9 +2,11 @@ import datetime
|
|
|
2
2
|
from unittest.mock import patch
|
|
3
3
|
|
|
4
4
|
import pytest
|
|
5
|
-
from castor_extractor.visualization.looker import ( # type: ignore
|
|
5
|
+
from castor_extractor.visualization.looker.api.client import ( # type: ignore
|
|
6
6
|
ApiClient,
|
|
7
|
-
|
|
7
|
+
)
|
|
8
|
+
from castor_extractor.visualization.looker.api.credentials import ( # type: ignore
|
|
9
|
+
LookerCredentials,
|
|
8
10
|
)
|
|
9
11
|
from dateutil.utils import today
|
|
10
12
|
from freezegun import freeze_time
|
|
@@ -13,7 +15,7 @@ from .client import _mondays
|
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
def _credentials():
|
|
16
|
-
return
|
|
18
|
+
return LookerCredentials( # noqa: S106
|
|
17
19
|
base_url="base_url",
|
|
18
20
|
client_id="client_id",
|
|
19
21
|
client_secret="secret",
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from looker_sdk.rtl.api_settings import SettingsConfig
|
|
2
|
+
from pydantic import Field, SecretStr
|
|
3
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
4
|
+
|
|
5
|
+
from ..constants import DEFAULT_LOOKER_TIMEOUT_SECOND, LOOKER_ENV_PREFIX
|
|
6
|
+
|
|
7
|
+
KEY_LOOKER_TIMEOUT_SECOND = f"{LOOKER_ENV_PREFIX}TIMEOUT_SECOND"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class LookerCredentials(BaseSettings):
|
|
11
|
+
"""ValueObject for the credentials"""
|
|
12
|
+
|
|
13
|
+
model_config = SettingsConfigDict(
|
|
14
|
+
env_prefix=LOOKER_ENV_PREFIX,
|
|
15
|
+
extra="ignore",
|
|
16
|
+
populate_by_name=True,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
base_url: str
|
|
20
|
+
client_id: str
|
|
21
|
+
client_secret: str = Field(repr=False)
|
|
22
|
+
timeout: int = Field(
|
|
23
|
+
validation_alias=KEY_LOOKER_TIMEOUT_SECOND,
|
|
24
|
+
default=DEFAULT_LOOKER_TIMEOUT_SECOND,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
def to_settings_config(self) -> SettingsConfig:
|
|
28
|
+
return SettingsConfig(
|
|
29
|
+
base_url=self.base_url,
|
|
30
|
+
client_id=self.client_id,
|
|
31
|
+
client_secret=self.client_secret,
|
|
32
|
+
timeout=str(self.timeout),
|
|
33
|
+
)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from pydantic import Field, field_validator
|
|
2
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
3
|
+
|
|
4
|
+
from ..constants import (
|
|
5
|
+
DEFAULT_LOOKER_PAGE_SIZE,
|
|
6
|
+
DEFAULT_LOOKER_THREAD_POOL_SIZE,
|
|
7
|
+
LOOKER_ENV_PREFIX,
|
|
8
|
+
MAX_THREAD_POOL_SIZE,
|
|
9
|
+
MIN_THREAD_POOL_SIZE,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ExtractionParameters(BaseSettings):
|
|
14
|
+
"""
|
|
15
|
+
Class holding all the parameters needed for the extraction of
|
|
16
|
+
Looker metadata
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
model_config = SettingsConfigDict(
|
|
20
|
+
env_prefix=LOOKER_ENV_PREFIX,
|
|
21
|
+
extra="ignore",
|
|
22
|
+
populate_by_name=True,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
is_safe_mode: bool = False
|
|
26
|
+
log_to_stdout: bool
|
|
27
|
+
output_directory: str
|
|
28
|
+
search_per_folder: bool
|
|
29
|
+
page_size: int = Field(default=DEFAULT_LOOKER_PAGE_SIZE)
|
|
30
|
+
thread_pool_size: int = Field(default=DEFAULT_LOOKER_THREAD_POOL_SIZE)
|
|
31
|
+
|
|
32
|
+
@field_validator("thread_pool_size", mode="before")
|
|
33
|
+
@classmethod
|
|
34
|
+
def _check_thread_pool_size(cls, thread_pool_size: int) -> int:
|
|
35
|
+
thread_pool_size = thread_pool_size or DEFAULT_LOOKER_THREAD_POOL_SIZE
|
|
36
|
+
if MIN_THREAD_POOL_SIZE <= thread_pool_size <= MAX_THREAD_POOL_SIZE:
|
|
37
|
+
return thread_pool_size
|
|
38
|
+
raise ValueError("Thread pool size must be between 1 and 200 inclusive")
|
|
@@ -8,33 +8,7 @@ from looker_sdk.rtl import transport
|
|
|
8
8
|
from looker_sdk.rtl.api_settings import ApiSettings, SettingsConfig
|
|
9
9
|
from looker_sdk.sdk.api40 import methods as methods40
|
|
10
10
|
|
|
11
|
-
from
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class Credentials:
|
|
15
|
-
"""ValueObject for the credentials"""
|
|
16
|
-
|
|
17
|
-
def __init__(
|
|
18
|
-
self,
|
|
19
|
-
*,
|
|
20
|
-
base_url: str,
|
|
21
|
-
client_id: str,
|
|
22
|
-
client_secret: str,
|
|
23
|
-
timeout: Optional[int] = None,
|
|
24
|
-
**_kwargs,
|
|
25
|
-
):
|
|
26
|
-
self.base_url = base_url
|
|
27
|
-
self.client_id = client_id
|
|
28
|
-
self.client_secret = client_secret
|
|
29
|
-
self.timeout: int = timeout or timeout_second()
|
|
30
|
-
|
|
31
|
-
def to_settings_config(self) -> SettingsConfig:
|
|
32
|
-
return SettingsConfig(
|
|
33
|
-
base_url=self.base_url,
|
|
34
|
-
client_id=self.client_id,
|
|
35
|
-
client_secret=self.client_secret,
|
|
36
|
-
timeout=str(self.timeout),
|
|
37
|
-
)
|
|
11
|
+
from .credentials import LookerCredentials
|
|
38
12
|
|
|
39
13
|
|
|
40
14
|
def has_admin_permissions(sdk_: methods40.Looker40SDK) -> bool:
|
|
@@ -54,7 +28,7 @@ class CastorApiSettings(ApiSettings):
|
|
|
54
28
|
"""SDK settings with initialisation using a credential object instead of a path to a .ini file"""
|
|
55
29
|
|
|
56
30
|
def __init__(
|
|
57
|
-
self, credentials:
|
|
31
|
+
self, credentials: LookerCredentials, sdk_version: Optional[str] = ""
|
|
58
32
|
):
|
|
59
33
|
"""Configure using a config dict"""
|
|
60
34
|
self.config = credentials.to_settings_config()
|
|
@@ -1,33 +1,8 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
from looker_sdk.error import SDKError # type: ignore
|
|
6
|
-
|
|
7
|
-
DEFAULT_LOOKER_TIMEOUT_SECOND = 120
|
|
8
|
-
KEY_LOOKER_TIMEOUT_SECOND = "CASTOR_LOOKER_TIMEOUT_SECOND"
|
|
9
|
-
|
|
10
|
-
"""
|
|
11
|
-
Number of items per page when requesting Looker API
|
|
12
|
-
"""
|
|
13
|
-
DEFAULT_LOOKER_PAGE_SIZE = 500
|
|
14
|
-
KEY_LOOKER_PAGE_SIZE = "CASTOR_LOOKER_PAGE_SIZE"
|
|
15
|
-
|
|
16
|
-
"""
|
|
17
|
-
Maximum concurrent threads to run when fetching
|
|
2
|
+
Safe mode parameters
|
|
18
3
|
"""
|
|
19
|
-
DEFAULT_LOOKER_THREAD_POOL_SIZE = 20
|
|
20
|
-
KEY_LOOKER_THREAD_POOL_SIZE = "CASTOR_LOOKER_THREAD_POOL_SIZE"
|
|
21
4
|
|
|
22
|
-
|
|
23
|
-
BASE_URL = "CASTOR_LOOKER_BASE_URL"
|
|
24
|
-
CLIENT_ID = "CASTOR_LOOKER_CLIENT_ID"
|
|
25
|
-
CLIENT_SECRET = "CASTOR_LOOKER_CLIENT_SECRET" # noqa: S105
|
|
26
|
-
SEARCH_PER_FOLDER = "CASTOR_LOOKER_SEARCH_PER_FOLDER"
|
|
27
|
-
LOG_TO_STDOUT = "CASTOR_LOOKER_LOG_TO_STDOUT"
|
|
5
|
+
from looker_sdk.error import SDKError
|
|
28
6
|
|
|
29
|
-
"""
|
|
30
|
-
Safe mode parameters
|
|
31
|
-
"""
|
|
32
7
|
SAFE_MODE_MAX_ERRORS = 3
|
|
33
8
|
SAFE_MODE_EXCEPTIONS = (SDKError,)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Request timeout in seconds for Looker API
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
LOOKER_ENV_PREFIX = "CASTOR_LOOKER_"
|
|
6
|
+
|
|
7
|
+
DEFAULT_LOOKER_TIMEOUT_SECOND = 120
|
|
8
|
+
"""
|
|
9
|
+
Number of items per page when requesting Looker API
|
|
10
|
+
"""
|
|
11
|
+
DEFAULT_LOOKER_PAGE_SIZE = 500
|
|
12
|
+
"""
|
|
13
|
+
Maximum concurrent threads to run when fetching
|
|
14
|
+
"""
|
|
15
|
+
DEFAULT_LOOKER_THREAD_POOL_SIZE = 20
|
|
16
|
+
MIN_THREAD_POOL_SIZE = 1
|
|
17
|
+
MAX_THREAD_POOL_SIZE = 200
|
|
@@ -13,10 +13,14 @@ from ...utils import (
|
|
|
13
13
|
write_json,
|
|
14
14
|
write_summary,
|
|
15
15
|
)
|
|
16
|
-
from .api import
|
|
16
|
+
from .api import (
|
|
17
|
+
ApiClient,
|
|
18
|
+
ExtractionParameters,
|
|
19
|
+
LookerCredentials,
|
|
20
|
+
lookml_explore_names,
|
|
21
|
+
)
|
|
17
22
|
from .assets import LookerAsset
|
|
18
23
|
from .multithreading import MultithreadingFetcher
|
|
19
|
-
from .parameters import get_parameters
|
|
20
24
|
|
|
21
25
|
logger = logging.getLogger(__name__)
|
|
22
26
|
|
|
@@ -30,25 +34,23 @@ def _extract_explores_by_name(
|
|
|
30
34
|
yield deep_serialize(explore) # type: ignore
|
|
31
35
|
|
|
32
36
|
|
|
33
|
-
def _safe_mode(
|
|
34
|
-
|
|
37
|
+
def _safe_mode(
|
|
38
|
+
extraction_parameters: ExtractionParameters,
|
|
39
|
+
) -> Optional[SafeMode]:
|
|
40
|
+
if extraction_parameters.is_safe_mode:
|
|
41
|
+
return None
|
|
42
|
+
add_logging_file_handler(extraction_parameters.output_directory)
|
|
35
43
|
return SafeMode((Exception,), float("inf"))
|
|
36
44
|
|
|
37
45
|
|
|
38
46
|
def _client(
|
|
39
|
-
|
|
40
|
-
client_id: str,
|
|
41
|
-
client_secret: str,
|
|
42
|
-
timeout: Optional[int],
|
|
47
|
+
credentials: LookerCredentials,
|
|
43
48
|
safe_mode: Optional[SafeMode],
|
|
49
|
+
page_size: int,
|
|
44
50
|
) -> ApiClient:
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
client_id=client_id,
|
|
48
|
-
client_secret=client_secret,
|
|
49
|
-
timeout=timeout,
|
|
51
|
+
return ApiClient(
|
|
52
|
+
credentials=credentials, safe_mode=safe_mode, page_size=page_size
|
|
50
53
|
)
|
|
51
|
-
return ApiClient(credentials=credentials, safe_mode=safe_mode)
|
|
52
54
|
|
|
53
55
|
|
|
54
56
|
def iterate_all_data(
|
|
@@ -56,7 +58,7 @@ def iterate_all_data(
|
|
|
56
58
|
search_per_folder: bool,
|
|
57
59
|
thread_pool_size: int,
|
|
58
60
|
log_to_stdout: bool,
|
|
59
|
-
) -> Union[StreamableList,
|
|
61
|
+
) -> Iterable[Union[StreamableList, Tuple[LookerAsset, list]]]:
|
|
60
62
|
"""Iterate over the extracted Data From looker"""
|
|
61
63
|
|
|
62
64
|
logger.info("Extracting users from Looker API")
|
|
@@ -124,33 +126,31 @@ def extract_all(**kwargs) -> None:
|
|
|
124
126
|
Extract Data From looker and store it locally in files under the
|
|
125
127
|
output_directory
|
|
126
128
|
"""
|
|
127
|
-
|
|
128
|
-
output_directory =
|
|
129
|
-
|
|
129
|
+
extraction_parameters = ExtractionParameters(**kwargs)
|
|
130
|
+
output_directory = extraction_parameters.output_directory
|
|
131
|
+
|
|
132
|
+
credentials = LookerCredentials(**kwargs)
|
|
130
133
|
|
|
131
|
-
if
|
|
134
|
+
if extraction_parameters.log_to_stdout:
|
|
132
135
|
set_stream_handler_to_stdout()
|
|
133
136
|
|
|
134
|
-
|
|
135
|
-
safe_mode = _safe_mode(output_directory) if is_safe_mode else None
|
|
137
|
+
safe_mode = _safe_mode(extraction_parameters)
|
|
136
138
|
client = _client(
|
|
137
|
-
|
|
138
|
-
client_id=parameters.client_id,
|
|
139
|
-
client_secret=parameters.client_secret,
|
|
140
|
-
timeout=parameters.timeout,
|
|
139
|
+
credentials=credentials,
|
|
141
140
|
safe_mode=safe_mode,
|
|
141
|
+
page_size=extraction_parameters.page_size,
|
|
142
142
|
)
|
|
143
143
|
|
|
144
144
|
ts = current_timestamp()
|
|
145
145
|
|
|
146
146
|
data = iterate_all_data(
|
|
147
147
|
client=client,
|
|
148
|
-
search_per_folder=
|
|
149
|
-
thread_pool_size=
|
|
150
|
-
log_to_stdout=
|
|
148
|
+
search_per_folder=extraction_parameters.search_per_folder,
|
|
149
|
+
thread_pool_size=extraction_parameters.thread_pool_size,
|
|
150
|
+
log_to_stdout=extraction_parameters.log_to_stdout,
|
|
151
151
|
)
|
|
152
152
|
for asset, data in data:
|
|
153
153
|
filename = get_output_filename(asset.value, output_directory, ts)
|
|
154
154
|
write_json(filename, data)
|
|
155
155
|
|
|
156
|
-
write_summary(output_directory, ts, base_url=base_url)
|
|
156
|
+
write_summary(output_directory, ts, base_url=credentials.base_url)
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
from .api import ApiClient
|
|
2
|
-
from .db import DbClient
|
|
1
|
+
from .api import ApiClient, MetabaseApiCredentials
|
|
2
|
+
from .db import DbClient, MetabaseDbCredentials
|