castor-extractor 0.17.4__py3-none-any.whl → 0.18.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

Files changed (92) hide show
  1. CHANGELOG.md +28 -0
  2. DockerfileUsage.md +21 -0
  3. castor_extractor/commands/extract_domo.py +2 -10
  4. castor_extractor/commands/extract_looker.py +2 -13
  5. castor_extractor/commands/extract_metabase_api.py +5 -10
  6. castor_extractor/commands/extract_metabase_db.py +6 -16
  7. castor_extractor/commands/extract_mode.py +2 -13
  8. castor_extractor/commands/extract_powerbi.py +2 -8
  9. castor_extractor/commands/extract_qlik.py +2 -7
  10. castor_extractor/commands/extract_salesforce.py +3 -12
  11. castor_extractor/commands/extract_salesforce_reporting.py +2 -10
  12. castor_extractor/commands/extract_sigma.py +2 -7
  13. castor_extractor/utils/__init__.py +3 -1
  14. castor_extractor/utils/argument_parser.py +7 -0
  15. castor_extractor/utils/argument_parser_test.py +25 -0
  16. castor_extractor/utils/collection.py +8 -0
  17. castor_extractor/utils/safe_request.py +57 -0
  18. castor_extractor/utils/safe_request_test.py +77 -0
  19. castor_extractor/utils/salesforce/__init__.py +1 -2
  20. castor_extractor/utils/salesforce/constants.py +0 -11
  21. castor_extractor/utils/salesforce/credentials.py +22 -45
  22. castor_extractor/visualization/domo/__init__.py +1 -1
  23. castor_extractor/visualization/domo/client/__init__.py +1 -1
  24. castor_extractor/visualization/domo/client/client.py +37 -52
  25. castor_extractor/visualization/domo/client/credentials.py +14 -27
  26. castor_extractor/visualization/domo/extract.py +5 -26
  27. castor_extractor/visualization/looker/__init__.py +6 -1
  28. castor_extractor/visualization/looker/api/__init__.py +2 -1
  29. castor_extractor/visualization/looker/api/client.py +6 -4
  30. castor_extractor/visualization/looker/api/client_test.py +5 -3
  31. castor_extractor/visualization/looker/api/credentials.py +33 -0
  32. castor_extractor/visualization/looker/api/extraction_parameters.py +38 -0
  33. castor_extractor/visualization/looker/api/sdk.py +2 -28
  34. castor_extractor/visualization/looker/constant.py +2 -27
  35. castor_extractor/visualization/looker/constants.py +17 -0
  36. castor_extractor/visualization/looker/extract.py +29 -29
  37. castor_extractor/visualization/metabase/__init__.py +6 -1
  38. castor_extractor/visualization/metabase/client/__init__.py +2 -2
  39. castor_extractor/visualization/metabase/client/api/__init__.py +1 -0
  40. castor_extractor/visualization/metabase/client/api/client.py +8 -14
  41. castor_extractor/visualization/metabase/client/api/credentials.py +13 -40
  42. castor_extractor/visualization/metabase/client/db/__init__.py +1 -0
  43. castor_extractor/visualization/metabase/client/db/client.py +13 -34
  44. castor_extractor/visualization/metabase/client/db/credentials.py +19 -73
  45. castor_extractor/visualization/metabase/errors.py +5 -3
  46. castor_extractor/visualization/metabase/extract.py +3 -3
  47. castor_extractor/visualization/mode/__init__.py +1 -1
  48. castor_extractor/visualization/mode/client/__init__.py +1 -0
  49. castor_extractor/visualization/mode/client/client.py +9 -12
  50. castor_extractor/visualization/mode/client/client_test.py +3 -3
  51. castor_extractor/visualization/mode/client/credentials.py +18 -51
  52. castor_extractor/visualization/mode/extract.py +6 -3
  53. castor_extractor/visualization/powerbi/__init__.py +1 -1
  54. castor_extractor/visualization/powerbi/client/__init__.py +2 -1
  55. castor_extractor/visualization/powerbi/client/credentials.py +17 -9
  56. castor_extractor/visualization/powerbi/client/credentials_test.py +12 -4
  57. castor_extractor/visualization/powerbi/client/rest.py +2 -2
  58. castor_extractor/visualization/powerbi/client/rest_test.py +2 -2
  59. castor_extractor/visualization/powerbi/extract.py +5 -16
  60. castor_extractor/visualization/qlik/__init__.py +5 -1
  61. castor_extractor/visualization/qlik/client/__init__.py +1 -0
  62. castor_extractor/visualization/qlik/client/engine/__init__.py +1 -0
  63. castor_extractor/visualization/qlik/client/engine/client.py +5 -6
  64. castor_extractor/visualization/qlik/client/engine/credentials.py +26 -0
  65. castor_extractor/visualization/qlik/client/master.py +5 -11
  66. castor_extractor/visualization/qlik/client/rest.py +4 -4
  67. castor_extractor/visualization/qlik/client/rest_test.py +6 -2
  68. castor_extractor/visualization/qlik/extract.py +6 -13
  69. castor_extractor/visualization/salesforce_reporting/extract.py +6 -20
  70. castor_extractor/visualization/sigma/__init__.py +1 -1
  71. castor_extractor/visualization/sigma/client/__init__.py +1 -1
  72. castor_extractor/visualization/sigma/client/client.py +5 -4
  73. castor_extractor/visualization/sigma/client/credentials.py +12 -28
  74. castor_extractor/visualization/sigma/extract.py +5 -18
  75. castor_extractor/visualization/tableau_revamp/client/credentials.py +40 -87
  76. castor_extractor/warehouse/databricks/client.py +3 -0
  77. castor_extractor/warehouse/redshift/queries/column.sql +0 -5
  78. castor_extractor/warehouse/salesforce/extract.py +2 -2
  79. castor_extractor/warehouse/salesforce/format.py +5 -3
  80. castor_extractor/warehouse/snowflake/queries/column.sql +0 -1
  81. castor_extractor/warehouse/synapse/queries/column.sql +0 -1
  82. {castor_extractor-0.17.4.dist-info → castor_extractor-0.18.5.dist-info}/METADATA +9 -9
  83. {castor_extractor-0.17.4.dist-info → castor_extractor-0.18.5.dist-info}/RECORD +86 -83
  84. castor_extractor/visualization/domo/client/client_test.py +0 -60
  85. castor_extractor/visualization/domo/constants.py +0 -6
  86. castor_extractor/visualization/looker/env.py +0 -48
  87. castor_extractor/visualization/looker/parameters.py +0 -78
  88. castor_extractor/visualization/qlik/constants.py +0 -3
  89. castor_extractor/visualization/sigma/constants.py +0 -4
  90. {castor_extractor-0.17.4.dist-info → castor_extractor-0.18.5.dist-info}/LICENCE +0 -0
  91. {castor_extractor-0.17.4.dist-info → castor_extractor-0.18.5.dist-info}/WHEEL +0 -0
  92. {castor_extractor-0.17.4.dist-info → castor_extractor-0.18.5.dist-info}/entry_points.txt +0 -0
@@ -1,11 +1,21 @@
1
1
  import logging
2
2
  from datetime import datetime, timedelta
3
3
  from http import HTTPStatus
4
- from typing import Iterator, List, Optional, Set, Tuple
4
+ from typing import Iterator, List, Optional, Set
5
5
 
6
6
  import requests
7
7
 
8
- from ....utils import at_midnight, current_date, past_date, retry, timestamp_ms
8
+ from ....utils import (
9
+ RequestSafeMode,
10
+ ResponseJson,
11
+ at_midnight,
12
+ current_date,
13
+ empty_iterator,
14
+ handle_response,
15
+ past_date,
16
+ retry,
17
+ timestamp_ms,
18
+ )
9
19
  from ..assets import DomoAsset
10
20
  from .credentials import DomoCredentials
11
21
  from .endpoints import Endpoint, EndpointFactory
@@ -17,11 +27,17 @@ DOMO_PUBLIC_URL = "https://api.domo.com"
17
27
  DEFAULT_TIMEOUT = 120
18
28
  TOKEN_EXPIRATION_SECONDS = timedelta(seconds=3000) # auth token lasts 1 hour
19
29
 
30
+
31
+ # Safe Mode
32
+ VOLUME_IGNORED = 10
20
33
  IGNORED_ERROR_CODES = (
21
34
  HTTPStatus.BAD_REQUEST,
22
35
  HTTPStatus.NOT_FOUND,
23
36
  )
24
- ERROR_TPL = "Request failed with status code {status_code} and reason {reason}"
37
+ DOMO_SAFE_MODE = RequestSafeMode(
38
+ max_errors=VOLUME_IGNORED,
39
+ status_codes=IGNORED_ERROR_CODES,
40
+ )
25
41
 
26
42
  _RETRY_EXCEPTIONS = [
27
43
  requests.exceptions.ConnectTimeout,
@@ -33,40 +49,17 @@ _RETRY_BASE_MS = 10 * 60 * 1000 # 10 minutes
33
49
  logger = logging.getLogger(__name__)
34
50
 
35
51
 
36
- def _handle_response(response: requests.Response) -> requests.Response:
37
- response.raise_for_status()
38
- return response
39
-
40
-
41
- def _ignore_or_raise(
42
- error: requests.RequestException,
43
- ignore_error_codes: Optional[Tuple[int, ...]],
44
- ) -> dict:
45
- """
46
- Raises the error unless the response status code is in the ignored error
47
- codes list.
48
- """
49
- if not ignore_error_codes:
50
- raise error
51
-
52
- response = error.response
53
- if response is None:
54
- raise error
55
-
56
- if response.status_code in ignore_error_codes:
57
- logger.warning(error)
58
- return {}
59
-
60
- raise error
61
-
62
-
63
52
  class DomoClient:
64
53
  """
65
54
  Connect to Domo API and fetch main assets.
66
55
  https://developer.domo.com/portal/8ba9aedad3679-ap-is#platform-oauth-apis
67
56
  """
68
57
 
69
- def __init__(self, credentials: DomoCredentials):
58
+ def __init__(
59
+ self,
60
+ credentials: DomoCredentials,
61
+ safe_mode: Optional[RequestSafeMode] = None,
62
+ ):
70
63
  self._authentication = credentials.authentication
71
64
  self._bearer_headers: Optional[dict] = None
72
65
  self._session = requests.session()
@@ -76,6 +69,7 @@ class DomoClient:
76
69
  self._timeout = DEFAULT_TIMEOUT
77
70
  self.base_url = credentials.base_url
78
71
  self.cloud_id = credentials.cloud_id
72
+ self.safe_mode = safe_mode or DOMO_SAFE_MODE
79
73
 
80
74
  def _token_expired(self) -> bool:
81
75
  token_lifetime = datetime.now() - self._token_creation_time
@@ -95,7 +89,8 @@ class DomoClient:
95
89
  auth=basic_authentication,
96
90
  timeout=self._timeout,
97
91
  )
98
- result = _handle_response(response).json()
92
+ response.raise_for_status()
93
+ result = response.json()
99
94
 
100
95
  bearer_token = result["access_token"]
101
96
  self._bearer_headers = {"authorization": f"Bearer {bearer_token}"}
@@ -113,7 +108,7 @@ class DomoClient:
113
108
  endpoint: Endpoint,
114
109
  params: Optional[dict] = None,
115
110
  asset_id: Optional[str] = None,
116
- ) -> requests.Response:
111
+ ) -> ResponseJson:
117
112
  params = params if params else {}
118
113
  is_private = endpoint.is_private
119
114
  headers = self._private_headers if is_private else self._bearer_auth()
@@ -125,27 +120,16 @@ class DomoClient:
125
120
  timeout=self._timeout,
126
121
  )
127
122
 
128
- if response.status_code != HTTPStatus.OK:
129
- logger.warning(
130
- ERROR_TPL.format(
131
- status_code=response.status_code,
132
- reason=response.reason,
133
- )
134
- )
135
- return response
123
+ return handle_response(response, self.safe_mode)
136
124
 
137
125
  def _get_element(
138
126
  self,
139
127
  endpoint: Endpoint,
140
128
  params: Optional[dict] = None,
141
129
  asset_id: Optional[str] = None,
142
- ignore_error_codes: Optional[Tuple[int, ...]] = None,
143
130
  ) -> dict:
144
131
  """Used when the response only contains one element"""
145
- try:
146
- return self._get(endpoint, params, asset_id).json()
147
- except requests.RequestException as error:
148
- return _ignore_or_raise(error, ignore_error_codes)
132
+ return self._get(endpoint, params, asset_id)
149
133
 
150
134
  def _get_many(
151
135
  self,
@@ -154,7 +138,7 @@ class DomoClient:
154
138
  asset_id: Optional[str] = None,
155
139
  ) -> List[dict]:
156
140
  """Used when the response contains multiple elements"""
157
- return self._get(endpoint, params, asset_id).json()
141
+ return self._get(endpoint, params, asset_id)
158
142
 
159
143
  def _get_paginated(self, endpoint: Endpoint) -> List[dict]:
160
144
  """Used when the response is paginated and need iterations"""
@@ -172,10 +156,7 @@ class DomoClient:
172
156
 
173
157
  def _datasources(self, page_id: str) -> RawData:
174
158
  endpoint = self._endpoint_factory.page_content(page_id)
175
- page_content = self._get_element(
176
- endpoint,
177
- ignore_error_codes=IGNORED_ERROR_CODES,
178
- )
159
+ page_content = self._get_element(endpoint)
179
160
  processed: set[str] = set()
180
161
  for card in page_content.get("cards", []):
181
162
  for datasource in card["datasources"]:
@@ -196,7 +177,7 @@ class DomoClient:
196
177
  ) -> Iterator[dict]:
197
178
  """Recursively fetch pages while building the folder architecture"""
198
179
  if not page_tree:
199
- return []
180
+ return empty_iterator()
200
181
 
201
182
  for page in page_tree:
202
183
  page_id = page.get("id")
@@ -211,6 +192,10 @@ class DomoClient:
211
192
  self._endpoint_factory.pages,
212
193
  asset_id=page_id,
213
194
  )
195
+
196
+ if not detail:
197
+ continue
198
+
214
199
  datasources = self._datasources(page_id)
215
200
  yield {
216
201
  **detail,
@@ -1,39 +1,26 @@
1
- from dataclasses import dataclass
2
- from enum import Enum
3
- from typing import Dict, Optional, Tuple
1
+ from typing import Dict, Optional
4
2
 
3
+ from pydantic import Field, SecretStr
4
+ from pydantic_settings import BaseSettings, SettingsConfigDict
5
5
  from requests.auth import HTTPBasicAuth
6
6
 
7
+ DOMO_ENV_PREFIX = "CASTOR_DOMO_"
7
8
 
8
- class CredentialsKey(Enum):
9
- """Value enum object for the credentials"""
10
9
 
11
- API_TOKEN = "api_token" # noqa: S105
12
- BASE_URL = "base_url"
13
- CLIENT_ID = "client_id"
14
- CLOUD_ID = "cloud_id"
15
- DEVELOPER_TOKEN = "developer_token" # noqa: S105
16
-
17
-
18
- CLIENT_ALLOWED_KEYS: Tuple[str, ...] = tuple(c.value for c in CredentialsKey)
19
-
20
-
21
- @dataclass
22
- class DomoCredentials:
10
+ class DomoCredentials(BaseSettings):
23
11
  """Class to handle Domo rest API permissions"""
24
12
 
25
- api_token: str
13
+ model_config = SettingsConfigDict(
14
+ env_prefix=DOMO_ENV_PREFIX,
15
+ extra="ignore",
16
+ populate_by_name=True,
17
+ )
18
+
19
+ api_token: str = Field(repr=False)
26
20
  base_url: str
27
21
  client_id: str
28
- developer_token: str
29
- cloud_id: Optional[str] = None
30
-
31
- @classmethod
32
- def from_secret(cls, secret: dict) -> "DomoCredentials":
33
- credentials = {
34
- k: v for k, v in secret.items() if k in CLIENT_ALLOWED_KEYS
35
- }
36
- return cls(**credentials)
22
+ cloud_id: Optional[str] = Field(validation_alias="CLOUD_ID", default=None)
23
+ developer_token: str = Field(repr=False)
37
24
 
38
25
  @property
39
26
  def authentication(self) -> HTTPBasicAuth:
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Iterable, Iterator, Optional, Tuple, Union
2
+ from typing import Iterable, Iterator, Tuple, Union
3
3
 
4
4
  from ...utils import (
5
5
  OUTPUT_DIR,
@@ -12,7 +12,6 @@ from ...utils import (
12
12
  )
13
13
  from .assets import DomoAsset
14
14
  from .client import DomoClient, DomoCredentials
15
- from .constants import API_TOKEN, BASE_URL, CLIENT_ID, CLOUD_ID, DEVELOPER_TOKEN
16
15
 
17
16
  logger = logging.getLogger(__name__)
18
17
 
@@ -43,33 +42,13 @@ def iterate_all_data(
43
42
  yield DomoAsset.DATAFLOWS, list(deep_serialize(dataflows))
44
43
 
45
44
 
46
- def extract_all(
47
- api_token: Optional[str] = None,
48
- base_url: Optional[str] = None,
49
- client_id: Optional[str] = None,
50
- cloud_id: Optional[str] = None,
51
- developer_token: Optional[str] = None,
52
- output_directory: Optional[str] = None,
53
- ) -> None:
45
+ def extract_all(**kwargs) -> None:
54
46
  """
55
47
  Extract data from Domo API
56
48
  Store the output files locally under the given output_directory
57
49
  """
58
-
59
- _output_directory = output_directory or from_env(OUTPUT_DIR)
60
- _client_id = client_id or from_env(CLIENT_ID)
61
- _base_url = base_url or from_env(BASE_URL)
62
- _api_token = api_token or from_env(API_TOKEN)
63
- _developer_token = developer_token or from_env(DEVELOPER_TOKEN)
64
- _cloud_id = cloud_id or from_env(CLOUD_ID)
65
-
66
- credentials = DomoCredentials(
67
- base_url=_base_url,
68
- client_id=_client_id,
69
- api_token=_api_token,
70
- developer_token=_developer_token,
71
- cloud_id=_cloud_id,
72
- )
50
+ _output_directory = kwargs.get("output") or from_env(OUTPUT_DIR)
51
+ credentials = DomoCredentials(**kwargs)
73
52
  client = DomoClient(credentials=credentials)
74
53
 
75
54
  ts = current_timestamp()
@@ -78,4 +57,4 @@ def extract_all(
78
57
  filename = get_output_filename(key.name.lower(), _output_directory, ts)
79
58
  write_json(filename, data)
80
59
 
81
- write_summary(_output_directory, ts, base_url=_base_url)
60
+ write_summary(_output_directory, ts, base_url=credentials.base_url)
@@ -1,3 +1,8 @@
1
- from .api import ApiClient, Credentials, lookml_explore_names
1
+ from .api import (
2
+ ApiClient,
3
+ ExtractionParameters,
4
+ LookerCredentials,
5
+ lookml_explore_names,
6
+ )
2
7
  from .assets import LookerAsset
3
8
  from .extract import extract_all, iterate_all_data
@@ -1,3 +1,4 @@
1
1
  from .client import ApiClient
2
- from .sdk import Credentials
2
+ from .credentials import LookerCredentials
3
+ from .extraction_parameters import ExtractionParameters
3
4
  from .utils import lookml_explore_names
@@ -22,7 +22,7 @@ from looker_sdk.sdk.api40.models import (
22
22
  from looker_sdk.sdk.constants import sdk_version
23
23
 
24
24
  from ....utils import Pager, PagerLogger, SafeMode, past_date, safe_mode
25
- from ..env import page_size
25
+ from ..constants import DEFAULT_LOOKER_PAGE_SIZE
26
26
  from ..fields import format_fields
27
27
  from .constants import (
28
28
  CONNECTION_FIELDS,
@@ -39,7 +39,8 @@ from .constants import (
39
39
  USER_FIELDS,
40
40
  USERS_ATTRIBUTES_FIELDS,
41
41
  )
42
- from .sdk import CastorApiSettings, Credentials, has_admin_permissions
42
+ from .credentials import LookerCredentials
43
+ from .sdk import CastorApiSettings, has_admin_permissions
43
44
 
44
45
  logger = logging.getLogger(__name__)
45
46
 
@@ -78,9 +79,10 @@ class ApiClient:
78
79
 
79
80
  def __init__(
80
81
  self,
81
- credentials: Credentials,
82
+ credentials: LookerCredentials,
82
83
  on_api_call: OnApiCall = lambda: None,
83
84
  safe_mode: Optional[SafeMode] = None,
85
+ page_size: int = DEFAULT_LOOKER_PAGE_SIZE,
84
86
  ):
85
87
  settings = CastorApiSettings(
86
88
  credentials=credentials, sdk_version=sdk_version
@@ -92,7 +94,7 @@ class ApiClient:
92
94
  self._sdk = sdk
93
95
  self._on_api_call = on_api_call
94
96
  self._logger = ApiPagerLogger(on_api_call)
95
- self.per_page = page_size()
97
+ self.per_page = page_size
96
98
  self._safe_mode = safe_mode
97
99
 
98
100
  def folders(self) -> List[Folder]:
@@ -2,9 +2,11 @@ import datetime
2
2
  from unittest.mock import patch
3
3
 
4
4
  import pytest
5
- from castor_extractor.visualization.looker import ( # type: ignore
5
+ from castor_extractor.visualization.looker.api.client import ( # type: ignore
6
6
  ApiClient,
7
- Credentials,
7
+ )
8
+ from castor_extractor.visualization.looker.api.credentials import ( # type: ignore
9
+ LookerCredentials,
8
10
  )
9
11
  from dateutil.utils import today
10
12
  from freezegun import freeze_time
@@ -13,7 +15,7 @@ from .client import _mondays
13
15
 
14
16
 
15
17
  def _credentials():
16
- return Credentials( # noqa: S106
18
+ return LookerCredentials( # noqa: S106
17
19
  base_url="base_url",
18
20
  client_id="client_id",
19
21
  client_secret="secret",
@@ -0,0 +1,33 @@
1
+ from looker_sdk.rtl.api_settings import SettingsConfig
2
+ from pydantic import Field, SecretStr
3
+ from pydantic_settings import BaseSettings, SettingsConfigDict
4
+
5
+ from ..constants import DEFAULT_LOOKER_TIMEOUT_SECOND, LOOKER_ENV_PREFIX
6
+
7
+ KEY_LOOKER_TIMEOUT_SECOND = f"{LOOKER_ENV_PREFIX}TIMEOUT_SECOND"
8
+
9
+
10
+ class LookerCredentials(BaseSettings):
11
+ """ValueObject for the credentials"""
12
+
13
+ model_config = SettingsConfigDict(
14
+ env_prefix=LOOKER_ENV_PREFIX,
15
+ extra="ignore",
16
+ populate_by_name=True,
17
+ )
18
+
19
+ base_url: str
20
+ client_id: str
21
+ client_secret: str = Field(repr=False)
22
+ timeout: int = Field(
23
+ validation_alias=KEY_LOOKER_TIMEOUT_SECOND,
24
+ default=DEFAULT_LOOKER_TIMEOUT_SECOND,
25
+ )
26
+
27
+ def to_settings_config(self) -> SettingsConfig:
28
+ return SettingsConfig(
29
+ base_url=self.base_url,
30
+ client_id=self.client_id,
31
+ client_secret=self.client_secret,
32
+ timeout=str(self.timeout),
33
+ )
@@ -0,0 +1,38 @@
1
+ from pydantic import Field, field_validator
2
+ from pydantic_settings import BaseSettings, SettingsConfigDict
3
+
4
+ from ..constants import (
5
+ DEFAULT_LOOKER_PAGE_SIZE,
6
+ DEFAULT_LOOKER_THREAD_POOL_SIZE,
7
+ LOOKER_ENV_PREFIX,
8
+ MAX_THREAD_POOL_SIZE,
9
+ MIN_THREAD_POOL_SIZE,
10
+ )
11
+
12
+
13
+ class ExtractionParameters(BaseSettings):
14
+ """
15
+ Class holding all the parameters needed for the extraction of
16
+ Looker metadata
17
+ """
18
+
19
+ model_config = SettingsConfigDict(
20
+ env_prefix=LOOKER_ENV_PREFIX,
21
+ extra="ignore",
22
+ populate_by_name=True,
23
+ )
24
+
25
+ is_safe_mode: bool = False
26
+ log_to_stdout: bool
27
+ output_directory: str
28
+ search_per_folder: bool
29
+ page_size: int = Field(default=DEFAULT_LOOKER_PAGE_SIZE)
30
+ thread_pool_size: int = Field(default=DEFAULT_LOOKER_THREAD_POOL_SIZE)
31
+
32
+ @field_validator("thread_pool_size", mode="before")
33
+ @classmethod
34
+ def _check_thread_pool_size(cls, thread_pool_size: int) -> int:
35
+ thread_pool_size = thread_pool_size or DEFAULT_LOOKER_THREAD_POOL_SIZE
36
+ if MIN_THREAD_POOL_SIZE <= thread_pool_size <= MAX_THREAD_POOL_SIZE:
37
+ return thread_pool_size
38
+ raise ValueError("Thread pool size must be between 1 and 200 inclusive")
@@ -8,33 +8,7 @@ from looker_sdk.rtl import transport
8
8
  from looker_sdk.rtl.api_settings import ApiSettings, SettingsConfig
9
9
  from looker_sdk.sdk.api40 import methods as methods40
10
10
 
11
- from ..env import timeout_second
12
-
13
-
14
- class Credentials:
15
- """ValueObject for the credentials"""
16
-
17
- def __init__(
18
- self,
19
- *,
20
- base_url: str,
21
- client_id: str,
22
- client_secret: str,
23
- timeout: Optional[int] = None,
24
- **_kwargs,
25
- ):
26
- self.base_url = base_url
27
- self.client_id = client_id
28
- self.client_secret = client_secret
29
- self.timeout: int = timeout or timeout_second()
30
-
31
- def to_settings_config(self) -> SettingsConfig:
32
- return SettingsConfig(
33
- base_url=self.base_url,
34
- client_id=self.client_id,
35
- client_secret=self.client_secret,
36
- timeout=str(self.timeout),
37
- )
11
+ from .credentials import LookerCredentials
38
12
 
39
13
 
40
14
  def has_admin_permissions(sdk_: methods40.Looker40SDK) -> bool:
@@ -54,7 +28,7 @@ class CastorApiSettings(ApiSettings):
54
28
  """SDK settings with initialisation using a credential object instead of a path to a .ini file"""
55
29
 
56
30
  def __init__(
57
- self, credentials: Credentials, sdk_version: Optional[str] = ""
31
+ self, credentials: LookerCredentials, sdk_version: Optional[str] = ""
58
32
  ):
59
33
  """Configure using a config dict"""
60
34
  self.config = credentials.to_settings_config()
@@ -1,33 +1,8 @@
1
1
  """
2
- Request timeout in seconds for Looker API
3
- """
4
-
5
- from looker_sdk.error import SDKError # type: ignore
6
-
7
- DEFAULT_LOOKER_TIMEOUT_SECOND = 120
8
- KEY_LOOKER_TIMEOUT_SECOND = "CASTOR_LOOKER_TIMEOUT_SECOND"
9
-
10
- """
11
- Number of items per page when requesting Looker API
12
- """
13
- DEFAULT_LOOKER_PAGE_SIZE = 500
14
- KEY_LOOKER_PAGE_SIZE = "CASTOR_LOOKER_PAGE_SIZE"
15
-
16
- """
17
- Maximum concurrent threads to run when fetching
2
+ Safe mode parameters
18
3
  """
19
- DEFAULT_LOOKER_THREAD_POOL_SIZE = 20
20
- KEY_LOOKER_THREAD_POOL_SIZE = "CASTOR_LOOKER_THREAD_POOL_SIZE"
21
4
 
22
- # env variables
23
- BASE_URL = "CASTOR_LOOKER_BASE_URL"
24
- CLIENT_ID = "CASTOR_LOOKER_CLIENT_ID"
25
- CLIENT_SECRET = "CASTOR_LOOKER_CLIENT_SECRET" # noqa: S105
26
- SEARCH_PER_FOLDER = "CASTOR_LOOKER_SEARCH_PER_FOLDER"
27
- LOG_TO_STDOUT = "CASTOR_LOOKER_LOG_TO_STDOUT"
5
+ from looker_sdk.error import SDKError
28
6
 
29
- """
30
- Safe mode parameters
31
- """
32
7
  SAFE_MODE_MAX_ERRORS = 3
33
8
  SAFE_MODE_EXCEPTIONS = (SDKError,)
@@ -0,0 +1,17 @@
1
+ """
2
+ Request timeout in seconds for Looker API
3
+ """
4
+
5
+ LOOKER_ENV_PREFIX = "CASTOR_LOOKER_"
6
+
7
+ DEFAULT_LOOKER_TIMEOUT_SECOND = 120
8
+ """
9
+ Number of items per page when requesting Looker API
10
+ """
11
+ DEFAULT_LOOKER_PAGE_SIZE = 500
12
+ """
13
+ Maximum concurrent threads to run when fetching
14
+ """
15
+ DEFAULT_LOOKER_THREAD_POOL_SIZE = 20
16
+ MIN_THREAD_POOL_SIZE = 1
17
+ MAX_THREAD_POOL_SIZE = 200
@@ -13,10 +13,14 @@ from ...utils import (
13
13
  write_json,
14
14
  write_summary,
15
15
  )
16
- from .api import ApiClient, Credentials, lookml_explore_names
16
+ from .api import (
17
+ ApiClient,
18
+ ExtractionParameters,
19
+ LookerCredentials,
20
+ lookml_explore_names,
21
+ )
17
22
  from .assets import LookerAsset
18
23
  from .multithreading import MultithreadingFetcher
19
- from .parameters import get_parameters
20
24
 
21
25
  logger = logging.getLogger(__name__)
22
26
 
@@ -30,25 +34,23 @@ def _extract_explores_by_name(
30
34
  yield deep_serialize(explore) # type: ignore
31
35
 
32
36
 
33
- def _safe_mode(directory: str) -> SafeMode:
34
- add_logging_file_handler(directory)
37
+ def _safe_mode(
38
+ extraction_parameters: ExtractionParameters,
39
+ ) -> Optional[SafeMode]:
40
+ if extraction_parameters.is_safe_mode:
41
+ return None
42
+ add_logging_file_handler(extraction_parameters.output_directory)
35
43
  return SafeMode((Exception,), float("inf"))
36
44
 
37
45
 
38
46
  def _client(
39
- base_url: str,
40
- client_id: str,
41
- client_secret: str,
42
- timeout: Optional[int],
47
+ credentials: LookerCredentials,
43
48
  safe_mode: Optional[SafeMode],
49
+ page_size: int,
44
50
  ) -> ApiClient:
45
- credentials = Credentials(
46
- base_url=base_url,
47
- client_id=client_id,
48
- client_secret=client_secret,
49
- timeout=timeout,
51
+ return ApiClient(
52
+ credentials=credentials, safe_mode=safe_mode, page_size=page_size
50
53
  )
51
- return ApiClient(credentials=credentials, safe_mode=safe_mode)
52
54
 
53
55
 
54
56
  def iterate_all_data(
@@ -56,7 +58,7 @@ def iterate_all_data(
56
58
  search_per_folder: bool,
57
59
  thread_pool_size: int,
58
60
  log_to_stdout: bool,
59
- ) -> Union[StreamableList, Iterable[Tuple[LookerAsset, list]]]:
61
+ ) -> Iterable[Union[StreamableList, Tuple[LookerAsset, list]]]:
60
62
  """Iterate over the extracted Data From looker"""
61
63
 
62
64
  logger.info("Extracting users from Looker API")
@@ -124,33 +126,31 @@ def extract_all(**kwargs) -> None:
124
126
  Extract Data From looker and store it locally in files under the
125
127
  output_directory
126
128
  """
127
- parameters = get_parameters(**kwargs)
128
- output_directory = parameters.output_directory
129
- base_url = parameters.base_url
129
+ extraction_parameters = ExtractionParameters(**kwargs)
130
+ output_directory = extraction_parameters.output_directory
131
+
132
+ credentials = LookerCredentials(**kwargs)
130
133
 
131
- if parameters.log_to_stdout:
134
+ if extraction_parameters.log_to_stdout:
132
135
  set_stream_handler_to_stdout()
133
136
 
134
- is_safe_mode = parameters.is_safe_mode
135
- safe_mode = _safe_mode(output_directory) if is_safe_mode else None
137
+ safe_mode = _safe_mode(extraction_parameters)
136
138
  client = _client(
137
- base_url=base_url,
138
- client_id=parameters.client_id,
139
- client_secret=parameters.client_secret,
140
- timeout=parameters.timeout,
139
+ credentials=credentials,
141
140
  safe_mode=safe_mode,
141
+ page_size=extraction_parameters.page_size,
142
142
  )
143
143
 
144
144
  ts = current_timestamp()
145
145
 
146
146
  data = iterate_all_data(
147
147
  client=client,
148
- search_per_folder=parameters.search_per_folder,
149
- thread_pool_size=parameters.thread_pool_size,
150
- log_to_stdout=parameters.log_to_stdout,
148
+ search_per_folder=extraction_parameters.search_per_folder,
149
+ thread_pool_size=extraction_parameters.thread_pool_size,
150
+ log_to_stdout=extraction_parameters.log_to_stdout,
151
151
  )
152
152
  for asset, data in data:
153
153
  filename = get_output_filename(asset.value, output_directory, ts)
154
154
  write_json(filename, data)
155
155
 
156
- write_summary(output_directory, ts, base_url=base_url)
156
+ write_summary(output_directory, ts, base_url=credentials.base_url)
@@ -1,3 +1,8 @@
1
1
  from .assets import MetabaseAsset
2
- from .client import ApiClient, DbClient
2
+ from .client import (
3
+ ApiClient,
4
+ DbClient,
5
+ MetabaseApiCredentials,
6
+ MetabaseDbCredentials,
7
+ )
3
8
  from .extract import extract_all, iterate_all_data
@@ -1,2 +1,2 @@
1
- from .api import ApiClient
2
- from .db import DbClient
1
+ from .api import ApiClient, MetabaseApiCredentials
2
+ from .db import DbClient, MetabaseDbCredentials
@@ -1 +1,2 @@
1
1
  from .client import ApiClient
2
+ from .credentials import MetabaseApiCredentials