castor-extractor 0.17.4__py3-none-any.whl → 0.18.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

Files changed (92) hide show
  1. CHANGELOG.md +28 -0
  2. DockerfileUsage.md +21 -0
  3. castor_extractor/commands/extract_domo.py +2 -10
  4. castor_extractor/commands/extract_looker.py +2 -13
  5. castor_extractor/commands/extract_metabase_api.py +5 -10
  6. castor_extractor/commands/extract_metabase_db.py +6 -16
  7. castor_extractor/commands/extract_mode.py +2 -13
  8. castor_extractor/commands/extract_powerbi.py +2 -8
  9. castor_extractor/commands/extract_qlik.py +2 -7
  10. castor_extractor/commands/extract_salesforce.py +3 -12
  11. castor_extractor/commands/extract_salesforce_reporting.py +2 -10
  12. castor_extractor/commands/extract_sigma.py +2 -7
  13. castor_extractor/utils/__init__.py +3 -1
  14. castor_extractor/utils/argument_parser.py +7 -0
  15. castor_extractor/utils/argument_parser_test.py +25 -0
  16. castor_extractor/utils/collection.py +8 -0
  17. castor_extractor/utils/safe_request.py +57 -0
  18. castor_extractor/utils/safe_request_test.py +77 -0
  19. castor_extractor/utils/salesforce/__init__.py +1 -2
  20. castor_extractor/utils/salesforce/constants.py +0 -11
  21. castor_extractor/utils/salesforce/credentials.py +22 -45
  22. castor_extractor/visualization/domo/__init__.py +1 -1
  23. castor_extractor/visualization/domo/client/__init__.py +1 -1
  24. castor_extractor/visualization/domo/client/client.py +37 -52
  25. castor_extractor/visualization/domo/client/credentials.py +14 -27
  26. castor_extractor/visualization/domo/extract.py +5 -26
  27. castor_extractor/visualization/looker/__init__.py +6 -1
  28. castor_extractor/visualization/looker/api/__init__.py +2 -1
  29. castor_extractor/visualization/looker/api/client.py +6 -4
  30. castor_extractor/visualization/looker/api/client_test.py +5 -3
  31. castor_extractor/visualization/looker/api/credentials.py +33 -0
  32. castor_extractor/visualization/looker/api/extraction_parameters.py +38 -0
  33. castor_extractor/visualization/looker/api/sdk.py +2 -28
  34. castor_extractor/visualization/looker/constant.py +2 -27
  35. castor_extractor/visualization/looker/constants.py +17 -0
  36. castor_extractor/visualization/looker/extract.py +29 -29
  37. castor_extractor/visualization/metabase/__init__.py +6 -1
  38. castor_extractor/visualization/metabase/client/__init__.py +2 -2
  39. castor_extractor/visualization/metabase/client/api/__init__.py +1 -0
  40. castor_extractor/visualization/metabase/client/api/client.py +8 -14
  41. castor_extractor/visualization/metabase/client/api/credentials.py +13 -40
  42. castor_extractor/visualization/metabase/client/db/__init__.py +1 -0
  43. castor_extractor/visualization/metabase/client/db/client.py +13 -34
  44. castor_extractor/visualization/metabase/client/db/credentials.py +19 -73
  45. castor_extractor/visualization/metabase/errors.py +5 -3
  46. castor_extractor/visualization/metabase/extract.py +3 -3
  47. castor_extractor/visualization/mode/__init__.py +1 -1
  48. castor_extractor/visualization/mode/client/__init__.py +1 -0
  49. castor_extractor/visualization/mode/client/client.py +9 -12
  50. castor_extractor/visualization/mode/client/client_test.py +3 -3
  51. castor_extractor/visualization/mode/client/credentials.py +18 -51
  52. castor_extractor/visualization/mode/extract.py +6 -3
  53. castor_extractor/visualization/powerbi/__init__.py +1 -1
  54. castor_extractor/visualization/powerbi/client/__init__.py +2 -1
  55. castor_extractor/visualization/powerbi/client/credentials.py +17 -9
  56. castor_extractor/visualization/powerbi/client/credentials_test.py +12 -4
  57. castor_extractor/visualization/powerbi/client/rest.py +2 -2
  58. castor_extractor/visualization/powerbi/client/rest_test.py +2 -2
  59. castor_extractor/visualization/powerbi/extract.py +5 -16
  60. castor_extractor/visualization/qlik/__init__.py +5 -1
  61. castor_extractor/visualization/qlik/client/__init__.py +1 -0
  62. castor_extractor/visualization/qlik/client/engine/__init__.py +1 -0
  63. castor_extractor/visualization/qlik/client/engine/client.py +5 -6
  64. castor_extractor/visualization/qlik/client/engine/credentials.py +26 -0
  65. castor_extractor/visualization/qlik/client/master.py +5 -11
  66. castor_extractor/visualization/qlik/client/rest.py +4 -4
  67. castor_extractor/visualization/qlik/client/rest_test.py +6 -2
  68. castor_extractor/visualization/qlik/extract.py +6 -13
  69. castor_extractor/visualization/salesforce_reporting/extract.py +6 -20
  70. castor_extractor/visualization/sigma/__init__.py +1 -1
  71. castor_extractor/visualization/sigma/client/__init__.py +1 -1
  72. castor_extractor/visualization/sigma/client/client.py +5 -4
  73. castor_extractor/visualization/sigma/client/credentials.py +12 -28
  74. castor_extractor/visualization/sigma/extract.py +5 -18
  75. castor_extractor/visualization/tableau_revamp/client/credentials.py +40 -87
  76. castor_extractor/warehouse/databricks/client.py +3 -0
  77. castor_extractor/warehouse/redshift/queries/column.sql +0 -5
  78. castor_extractor/warehouse/salesforce/extract.py +2 -2
  79. castor_extractor/warehouse/salesforce/format.py +5 -3
  80. castor_extractor/warehouse/snowflake/queries/column.sql +0 -1
  81. castor_extractor/warehouse/synapse/queries/column.sql +0 -1
  82. {castor_extractor-0.17.4.dist-info → castor_extractor-0.18.5.dist-info}/METADATA +9 -9
  83. {castor_extractor-0.17.4.dist-info → castor_extractor-0.18.5.dist-info}/RECORD +86 -83
  84. castor_extractor/visualization/domo/client/client_test.py +0 -60
  85. castor_extractor/visualization/domo/constants.py +0 -6
  86. castor_extractor/visualization/looker/env.py +0 -48
  87. castor_extractor/visualization/looker/parameters.py +0 -78
  88. castor_extractor/visualization/qlik/constants.py +0 -3
  89. castor_extractor/visualization/sigma/constants.py +0 -4
  90. {castor_extractor-0.17.4.dist-info → castor_extractor-0.18.5.dist-info}/LICENCE +0 -0
  91. {castor_extractor-0.17.4.dist-info → castor_extractor-0.18.5.dist-info}/WHEEL +0 -0
  92. {castor_extractor-0.17.4.dist-info → castor_extractor-0.18.5.dist-info}/entry_points.txt +0 -0
@@ -5,7 +5,7 @@ import pytest
5
5
  from requests import HTTPError
6
6
 
7
7
  from .constants import GET, POST, Assertions, Keys, QueryParams, Urls
8
- from .credentials import Credentials
8
+ from .credentials import PowerbiCredentials
9
9
  from .rest import Client, msal
10
10
 
11
11
  FAKE_TENANT_ID = "IamFake"
@@ -14,7 +14,7 @@ FAKE_SECRET = "MeThree"
14
14
 
15
15
 
16
16
  def _client() -> Client:
17
- creds = Credentials(
17
+ creds = PowerbiCredentials(
18
18
  tenant_id=FAKE_TENANT_ID,
19
19
  client_id=FAKE_CLIENT_ID,
20
20
  secret=FAKE_SECRET,
@@ -1,4 +1,4 @@
1
- from typing import Iterable, List, Optional, Tuple, Union
1
+ from typing import Iterable, List, Tuple, Union
2
2
 
3
3
  from ...utils import (
4
4
  OUTPUT_DIR,
@@ -10,7 +10,7 @@ from ...utils import (
10
10
  write_summary,
11
11
  )
12
12
  from .assets import METADATA_ASSETS, PowerBiAsset
13
- from .client import Client, Credentials
13
+ from .client import Client, PowerbiCredentials
14
14
 
15
15
 
16
16
  def iterate_all_data(
@@ -24,24 +24,13 @@ def iterate_all_data(
24
24
  yield asset, deep_serialize(data)
25
25
 
26
26
 
27
- def extract_all(
28
- tenant_id: str,
29
- client_id: str,
30
- secret: str,
31
- scopes: Optional[List[str]] = None,
32
- output_directory: Optional[str] = None,
33
- ) -> None:
27
+ def extract_all(**kwargs) -> None:
34
28
  """
35
29
  Extract data from PowerBI REST API
36
30
  Store the output files locally under the given output_directory
37
31
  """
38
- _output_directory = output_directory or from_env(OUTPUT_DIR)
39
- creds = Credentials(
40
- tenant_id=tenant_id,
41
- client_id=client_id,
42
- secret=secret,
43
- scopes=scopes,
44
- )
32
+ _output_directory = kwargs.get("output") or from_env(OUTPUT_DIR)
33
+ creds = PowerbiCredentials(**kwargs)
45
34
  client = Client(creds)
46
35
  ts = current_timestamp()
47
36
 
@@ -1,3 +1,7 @@
1
1
  from .assets import QlikAsset
2
- from .client import APP_EXTERNAL_ID_KEY as QLIK_APP_EXTERNAL_ID_KEY, QlikClient
2
+ from .client import (
3
+ APP_EXTERNAL_ID_KEY as QLIK_APP_EXTERNAL_ID_KEY,
4
+ QlikClient,
5
+ QlikCredentials,
6
+ )
3
7
  from .extract import extract_all
@@ -1,2 +1,3 @@
1
1
  from .constants import APP_EXTERNAL_ID_KEY
2
+ from .engine import QlikCredentials
2
3
  from .master import QlikMasterClient as QlikClient
@@ -1 +1,2 @@
1
1
  from .client import EngineApiClient
2
+ from .credentials import QlikCredentials
@@ -2,6 +2,7 @@ import logging
2
2
 
3
3
  from .....utils import SafeMode, safe_mode
4
4
  from .constants import MEASURES_SESSION_PARAMS, JsonRpcMethod
5
+ from .credentials import QlikCredentials
5
6
  from .error import AccessDeniedError, AppSizeExceededError
6
7
  from .json_rpc import JsonRpcClient
7
8
  from .websocket import open_websocket
@@ -49,10 +50,8 @@ class EngineApiClient:
49
50
  get measures using JsonRpcClient and websocket connection.
50
51
  """
51
52
 
52
- def __init__(self, server_url: str, api_key: str):
53
- self.server_url = server_url
54
- self.api_key = api_key
55
-
53
+ def __init__(self, credentials: QlikCredentials):
54
+ self.credentials = credentials
56
55
  self._safe_mode = SafeMode(
57
56
  exceptions=(AccessDeniedError, AppSizeExceededError),
58
57
  max_errors=float("inf"),
@@ -70,8 +69,8 @@ class EngineApiClient:
70
69
 
71
70
  with open_websocket(
72
71
  app_id=app_id,
73
- server_url=self.server_url,
74
- api_key=self.api_key,
72
+ server_url=self.credentials.base_url,
73
+ api_key=self.credentials.api_key,
75
74
  ) as websocket:
76
75
  json_rpc_client = JsonRpcClient(websocket=websocket)
77
76
  return _call(json_rpc_client, app_id)
@@ -0,0 +1,26 @@
1
+ from pydantic import Field, SecretStr, field_validator
2
+ from pydantic_settings import BaseSettings, SettingsConfigDict
3
+
4
+ from .....utils import validate_baseurl
5
+
6
+ QLIK_ENV_PREFIX = "CASTOR_QLIK_"
7
+
8
+
9
+ class QlikCredentials(BaseSettings):
10
+ """
11
+ Qlik's credentials to connect to the API
12
+ """
13
+
14
+ model_config = SettingsConfigDict(
15
+ env_prefix=QLIK_ENV_PREFIX,
16
+ extra="ignore",
17
+ populate_by_name=True,
18
+ )
19
+
20
+ api_key: str = Field(repr=False)
21
+ base_url: str
22
+
23
+ @field_validator("base_url", mode="before")
24
+ @classmethod
25
+ def _check_base_url(cls, base_url: str) -> str:
26
+ return validate_baseurl(base_url)
@@ -4,7 +4,7 @@ from tqdm import tqdm # type: ignore
4
4
 
5
5
  from ..assets import QlikAsset
6
6
  from .constants import APP_EXTERNAL_ID_KEY, SCOPED_ASSETS
7
- from .engine import EngineApiClient
7
+ from .engine import EngineApiClient, QlikCredentials
8
8
  from .rest import RestApiClient
9
9
 
10
10
  ListedData = List[dict]
@@ -53,25 +53,19 @@ class QlikMasterClient:
53
53
 
54
54
  def __init__(
55
55
  self,
56
- server_url: str,
57
- api_key: str,
56
+ credentials: QlikCredentials,
58
57
  except_http_error_statuses: Optional[List[int]] = None,
59
58
  display_progress: bool = True,
60
59
  ):
61
- self._server_url = server_url
62
- self._api_key = api_key
60
+ self._server_url = credentials.base_url
63
61
  self.display_progress = display_progress
64
62
 
65
63
  self.rest_api_client = RestApiClient(
66
- server_url=self._server_url,
67
- api_key=self._api_key,
64
+ credentials=credentials,
68
65
  except_http_error_statuses=except_http_error_statuses,
69
66
  )
70
67
 
71
- self.engine_api_client = EngineApiClient(
72
- server_url=self._server_url,
73
- api_key=self._api_key,
74
- )
68
+ self.engine_api_client = EngineApiClient(credentials=credentials)
75
69
 
76
70
  def _fetch_lineage(self, apps: ListedData) -> ListedData:
77
71
  callback = self.rest_api_client.data_lineage
@@ -16,6 +16,7 @@ from .constants import (
16
16
  RETRY_COUNTS,
17
17
  RETRY_STATUSES,
18
18
  )
19
+ from .engine import QlikCredentials
19
20
 
20
21
  logger = logging.getLogger(__name__)
21
22
 
@@ -60,12 +61,11 @@ class RestApiClient:
60
61
 
61
62
  def __init__(
62
63
  self,
63
- server_url: str,
64
- api_key: str,
64
+ credentials: QlikCredentials,
65
65
  except_http_error_statuses: Optional[List[int]] = None,
66
66
  ):
67
- self._server_url = server_url
68
- self._api_key = api_key
67
+ self._server_url = credentials.base_url
68
+ self._api_key = credentials.api_key
69
69
  self._session = _session()
70
70
  self._except_http_error_statuses = except_http_error_statuses or []
71
71
  self._authenticate()
@@ -1,6 +1,7 @@
1
1
  from typing import Optional
2
2
  from unittest.mock import call, patch
3
3
 
4
+ from .engine import QlikCredentials
4
5
  from .rest import RestApiClient
5
6
 
6
7
 
@@ -22,8 +23,11 @@ def _check_called_once(
22
23
  def test_rest_api_client_pager():
23
24
  dummy_server_url = "https://clic.kom"
24
25
  dummy_api_key = "i-am-the-key-dont-let-others-know-about"
25
-
26
- client = RestApiClient(server_url=dummy_server_url, api_key=dummy_api_key)
26
+ credentials = QlikCredentials(
27
+ base_url=dummy_server_url,
28
+ api_key=dummy_api_key,
29
+ )
30
+ client = RestApiClient(credentials=credentials)
27
31
 
28
32
  first_page_url = "https://clic.kom/assets"
29
33
 
@@ -7,13 +7,11 @@ from ...utils import (
7
7
  deep_serialize,
8
8
  from_env,
9
9
  get_output_filename,
10
- validate_baseurl,
11
10
  write_json,
12
11
  write_summary,
13
12
  )
14
13
  from .assets import QlikAsset
15
- from .client import QlikClient
16
- from .constants import API_KEY, BASE_URL
14
+ from .client import QlikClient, QlikCredentials
17
15
 
18
16
  logger = logging.getLogger(__name__)
19
17
 
@@ -49,23 +47,18 @@ def iterate_all_data(
49
47
 
50
48
 
51
49
  def extract_all(
52
- base_url: Optional[str] = None,
53
- api_key: Optional[str] = None,
54
- output_directory: Optional[str] = None,
55
- except_http_error_statuses: Optional[List[int]] = None,
50
+ except_http_error_statuses: Optional[List[int]] = None, **kwargs
56
51
  ) -> None:
57
52
  """
58
53
  Extract data from Qlik REST API
59
54
  Store the output files locally under the given output_directory
60
55
  """
61
56
 
62
- _output_directory = output_directory or from_env(OUTPUT_DIR)
63
- _base_url = validate_baseurl(base_url or from_env(BASE_URL))
64
- _api_key = api_key or from_env(API_KEY)
57
+ credentials = QlikCredentials(**kwargs)
58
+ _output_directory = kwargs.get("output") or from_env(OUTPUT_DIR)
65
59
 
66
60
  client = QlikClient(
67
- server_url=_base_url,
68
- api_key=_api_key,
61
+ credentials=credentials,
69
62
  except_http_error_statuses=except_http_error_statuses,
70
63
  )
71
64
 
@@ -75,4 +68,4 @@ def extract_all(
75
68
  filename = get_output_filename(key.name.lower(), _output_directory, ts)
76
69
  write_json(filename, data)
77
70
 
78
- write_summary(_output_directory, ts, base_url=_base_url)
71
+ write_summary(_output_directory, ts, base_url=credentials.base_url)
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Iterable, Optional, Tuple, Union
2
+ from typing import Iterable, Tuple, Union
3
3
 
4
4
  from ...utils import (
5
5
  OUTPUT_DIR,
@@ -28,29 +28,15 @@ def iterate_all_data(
28
28
  yield asset.name.lower(), deep_serialize(data)
29
29
 
30
30
 
31
- def extract_all(
32
- username: str,
33
- password: str,
34
- client_id: str,
35
- client_secret: str,
36
- security_token: str,
37
- base_url: str,
38
- output_directory: Optional[str] = None,
39
- ) -> None:
31
+ def extract_all(**kwargs) -> None:
40
32
  """
41
33
  Extract data from Salesforce REST API
42
34
  Store the output files locally under the given output_directory
43
35
  """
44
- _output_directory = output_directory or from_env(OUTPUT_DIR)
45
- creds = SalesforceCredentials(
46
- username=username,
47
- password=password,
48
- client_id=client_id,
49
- client_secret=client_secret,
50
- security_token=security_token,
51
- base_url=base_url,
52
- )
53
- client = SalesforceReportingClient(credentials=creds)
36
+ _output_directory = kwargs.get("output") or from_env(OUTPUT_DIR)
37
+ credentials = SalesforceCredentials(**kwargs)
38
+
39
+ client = SalesforceReportingClient(credentials=credentials)
54
40
  ts = current_timestamp()
55
41
 
56
42
  for key, data in iterate_all_data(client):
@@ -1,3 +1,3 @@
1
1
  from .assets import SigmaAsset
2
- from .client import CredentialsKey, SigmaClient, SigmaCredentials
2
+ from .client import SigmaClient, SigmaCredentials
3
3
  from .extract import extract_all
@@ -1,2 +1,2 @@
1
1
  from .client import SigmaClient
2
- from .credentials import CredentialsKey, SigmaCredentials
2
+ from .credentials import SigmaCredentials
@@ -5,7 +5,7 @@ from urllib.parse import urljoin
5
5
  import requests
6
6
 
7
7
  from ..assets import SigmaAsset
8
- from .credentials import CredentialsKey, SigmaCredentials
8
+ from .credentials import SigmaCredentials
9
9
  from .endpoints import EndpointFactory
10
10
  from .pagination import Pagination
11
11
 
@@ -29,6 +29,7 @@ class SigmaClient:
29
29
  self.host = credentials.host
30
30
  self.client_id = credentials.client_id
31
31
  self.api_token = credentials.api_token
32
+ self.grant_type = credentials.grant_type
32
33
  self.headers: Optional[Dict[str, str]] = None
33
34
 
34
35
  def _get_token(self) -> Dict[str, str]:
@@ -37,9 +38,9 @@ class SigmaClient:
37
38
  token_response = requests.post( # noqa: S113
38
39
  token_api_path,
39
40
  data={
40
- CredentialsKey.GRANT_TYPE.value: "client_credentials",
41
- CredentialsKey.CLIENT_ID.value: self.client_id,
42
- CredentialsKey.CLIENT_SECRET.value: self.api_token,
41
+ "grant_type": self.grant_type,
42
+ "client_id": self.client_id,
43
+ "client_secret": self.api_token,
43
44
  },
44
45
  )
45
46
  if token_response.status_code != requests.codes.OK:
@@ -1,35 +1,19 @@
1
- from dataclasses import dataclass
2
- from enum import Enum
1
+ from pydantic import Field, SecretStr
2
+ from pydantic_settings import BaseSettings, SettingsConfigDict
3
3
 
4
+ CASTOR_ENV_PREFIX = "CASTOR_SIGMA_"
4
5
 
5
- class CredentialsKey(Enum):
6
- """Value enum object for the credentials"""
7
6
 
8
- CLIENT_SECRET = "client_secret" # noqa: S105
9
- CLIENT_ID = "client_id"
10
- HOST = "host"
11
- GRANT_TYPE = "grant_type"
12
- API_TOKEN = "api_token" # noqa: S105
13
-
14
-
15
- CLIENT_ALLOWED_KEYS = (
16
- CredentialsKey.HOST.value,
17
- CredentialsKey.CLIENT_ID.value,
18
- CredentialsKey.API_TOKEN.value,
19
- )
20
-
21
-
22
- @dataclass
23
- class SigmaCredentials:
7
+ class SigmaCredentials(BaseSettings):
24
8
  """Class to handle Sigma rest API permissions"""
25
9
 
26
- api_token: str
10
+ model_config = SettingsConfigDict(
11
+ env_prefix=CASTOR_ENV_PREFIX,
12
+ extra="ignore",
13
+ populate_by_name=True,
14
+ )
15
+
16
+ api_token: str = Field(repr=False)
27
17
  client_id: str
28
18
  host: str
29
-
30
- @classmethod
31
- def from_secret(cls, secret: dict) -> "SigmaCredentials":
32
- credentials = {
33
- k: v for k, v in secret.items() if k in CLIENT_ALLOWED_KEYS
34
- }
35
- return cls(**credentials)
19
+ grant_type: str = "client_credentials"
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Iterable, Iterator, Optional, Tuple, Union
2
+ from typing import Iterable, Iterator, Tuple, Union
3
3
 
4
4
  from ...utils import (
5
5
  OUTPUT_DIR,
@@ -12,7 +12,6 @@ from ...utils import (
12
12
  )
13
13
  from .assets import SigmaAsset
14
14
  from .client import SigmaClient, SigmaCredentials
15
- from .constants import API_TOKEN, CLIENT_ID, HOST
16
15
 
17
16
  logger = logging.getLogger(__name__)
18
17
 
@@ -51,27 +50,15 @@ def iterate_all_data(
51
50
  yield SigmaAsset.LINEAGES, list(deep_serialize(lineages))
52
51
 
53
52
 
54
- def extract_all(
55
- host: Optional[str] = None,
56
- client_id: Optional[str] = None,
57
- api_token: Optional[str] = None,
58
- output_directory: Optional[str] = None,
59
- ) -> None:
53
+ def extract_all(**kwargs) -> None:
60
54
  """
61
55
  Extract data from Sigma API
62
56
  Store the output files locally under the given output_directory
63
57
  """
64
58
 
65
- _output_directory = output_directory or from_env(OUTPUT_DIR)
66
- _client_id = client_id or from_env(CLIENT_ID)
67
- _host = host or from_env(HOST)
68
- _api_token = api_token or from_env(API_TOKEN)
59
+ _output_directory = kwargs.get("output") or from_env(OUTPUT_DIR)
69
60
 
70
- credentials = SigmaCredentials(
71
- host=_host,
72
- client_id=_client_id,
73
- api_token=_api_token,
74
- )
61
+ credentials = SigmaCredentials(**kwargs)
75
62
  client = SigmaClient(credentials=credentials)
76
63
 
77
64
  ts = current_timestamp()
@@ -80,4 +67,4 @@ def extract_all(
80
67
  filename = get_output_filename(key.name.lower(), _output_directory, ts)
81
68
  write_json(filename, data)
82
69
 
83
- write_summary(_output_directory, ts, host=_host)
70
+ write_summary(_output_directory, ts, host=credentials.host)
@@ -1,104 +1,57 @@
1
- from enum import Enum
2
- from typing import Dict, Literal, Optional, overload
1
+ from typing import Optional
3
2
 
4
- from ....utils import from_env
5
-
6
- _AUTH_ERROR_MSG = "Need either user and password or token_name and token"
3
+ from pydantic import field_validator, model_validator
4
+ from pydantic_settings import BaseSettings, SettingsConfigDict
7
5
 
8
6
  # To specify the default site on Tableau Server, you can use an empty string
9
7
  # https://tableau.github.io/server-client-python/docs/api-ref#authentication
10
8
  _DEFAULT_SERVER_SITE_ID = ""
11
9
 
12
-
13
- class CredentialsKey(Enum):
14
- """Value enum object for the credentials"""
15
-
16
- TABLEAU_USER = "user"
17
- TABLEAU_PASSWORD = "password" # noqa: S105
18
- TABLEAU_TOKEN_NAME = "token_name" # noqa: S105
19
- TABLEAU_TOKEN = "token" # noqa: S105
20
- TABLEAU_SITE_ID = "site_id"
21
- TABLEAU_SERVER_URL = "server_url"
22
-
23
-
24
- CREDENTIALS_ENV: Dict[CredentialsKey, str] = {
25
- CredentialsKey.TABLEAU_USER: "CASTOR_TABLEAU_USER",
26
- CredentialsKey.TABLEAU_PASSWORD: "CASTOR_TABLEAU_PASSWORD",
27
- CredentialsKey.TABLEAU_TOKEN_NAME: "CASTOR_TABLEAU_TOKEN_NAME",
28
- CredentialsKey.TABLEAU_TOKEN: "CASTOR_TABLEAU_TOKEN",
29
- CredentialsKey.TABLEAU_SITE_ID: "CASTOR_TABLEAU_SITE_ID",
30
- CredentialsKey.TABLEAU_SERVER_URL: "CASTOR_TABLEAU_SERVER_URL",
31
- }
32
-
33
-
34
- @overload
35
- def get_value(key: CredentialsKey, kwargs: dict) -> Optional[str]: ...
10
+ # In Castor APP, site_id is mandatory: users can't let this field empty
11
+ # In that case, we encourage users to write "Default" instead
12
+ _DEFAULT_SITE_ID_USER_INPUT = "default"
36
13
 
37
14
 
38
- @overload
39
- def get_value(
40
- key: CredentialsKey, kwargs: dict, optional: Literal[True]
41
- ) -> Optional[str]: ...
15
+ TABLEAU_ENV_PREFIX = "CASTOR_TABLEAU_"
42
16
 
43
17
 
44
- @overload
45
- def get_value(
46
- key: CredentialsKey, kwargs: dict, optional: Literal[False]
47
- ) -> str: ...
48
-
49
-
50
- def get_value(
51
- key: CredentialsKey,
52
- kwargs: dict,
53
- optional: bool = True,
54
- ) -> Optional[str]:
18
+ class TableauRevampCredentials(BaseSettings):
55
19
  """
56
- Returns the value of the given key:
57
- - from kwargs in priority
58
- - from ENV otherwise
59
- Raises an error if not found (unless optional)
20
+ Tableau's credentials to connect to both APIs (REST and GRAPHQL)
60
21
  """
61
22
 
62
- if key.value in kwargs:
63
- return kwargs[key.value]
64
-
65
- env_key = CREDENTIALS_ENV[key]
66
- return from_env(env_key, optional)
23
+ model_config = SettingsConfigDict(
24
+ env_prefix=TABLEAU_ENV_PREFIX,
25
+ extra="ignore",
26
+ populate_by_name=True,
27
+ )
67
28
 
29
+ server_url: str
30
+ site_id: str = ""
68
31
 
69
- class TableauRevampCredentials:
70
- """
71
- Tableau's credentials to connect to REST API
72
- """
73
-
74
- def __init__(
75
- self,
76
- *,
77
- server_url: str,
78
- site_id: Optional[str],
79
- user: Optional[str],
80
- password: Optional[str],
81
- token_name: Optional[str],
82
- token: Optional[str],
83
- ):
84
- self.user = user
85
- self.site_id = site_id or _DEFAULT_SERVER_SITE_ID
86
- self.server_url = server_url
87
- self.password = password
88
- self.token_name = token_name
89
- self.token = token
32
+ password: Optional[str] = None
33
+ token: Optional[str] = None
34
+ token_name: Optional[str] = None
35
+ user: Optional[str] = None
90
36
 
37
+ @field_validator("site_id", mode="before")
91
38
  @classmethod
92
- def from_env(cls, kwargs: dict) -> "TableauRevampCredentials":
93
- return TableauRevampCredentials(
94
- server_url=get_value(
95
- CredentialsKey.TABLEAU_SERVER_URL,
96
- kwargs,
97
- optional=False,
98
- ),
99
- site_id=get_value(CredentialsKey.TABLEAU_SITE_ID, kwargs),
100
- user=get_value(CredentialsKey.TABLEAU_USER, kwargs),
101
- password=get_value(CredentialsKey.TABLEAU_PASSWORD, kwargs),
102
- token_name=get_value(CredentialsKey.TABLEAU_TOKEN_NAME, kwargs),
103
- token=get_value(CredentialsKey.TABLEAU_TOKEN, kwargs),
104
- )
39
+ def _check_site_id(cls, site_id: Optional[str]) -> str:
40
+ if not site_id or site_id.lower() == _DEFAULT_SITE_ID_USER_INPUT:
41
+ return _DEFAULT_SERVER_SITE_ID
42
+ return site_id
43
+
44
+ @model_validator(mode="after")
45
+ def _check_user_xor_pat_login(self) -> "TableauRevampCredentials":
46
+ """
47
+ Checks that credentials are correctly input, it means either:
48
+ - User and password are filled
49
+ - Token and Token name are filled
50
+ """
51
+ user_login = self.password and self.user
52
+ pat_login = self.token_name and self.token
53
+ if not user_login and not pat_login:
54
+ raise ValueError("Either token or user identification is required")
55
+ if user_login and pat_login:
56
+ raise ValueError("Can't have both token and user identification")
57
+ return self
@@ -28,6 +28,7 @@ from .utils import build_path, tag_label
28
28
 
29
29
  logger = logging.getLogger(__name__)
30
30
 
31
+ _DATABRICKS_CLIENT_TIMEOUT = 60
31
32
  _MAX_NUMBER_OF_LINEAGE_ERRORS = 1000
32
33
  _MAX_THREADS = 10
33
34
  _RETRY_ATTEMPTS = 3
@@ -98,6 +99,8 @@ class DatabricksClient(APIClient):
98
99
  self._db_blocked = db_blocked
99
100
  self._has_table_tags = has_table_tags
100
101
  self._has_column_tags = has_column_tags
102
+
103
+ self._timeout = _DATABRICKS_CLIENT_TIMEOUT
101
104
  self.formatter = DatabricksFormatter()
102
105
 
103
106
  def execute_sql(
@@ -28,7 +28,6 @@ information_tables AS (
28
28
  i.table_id || '.' || c.column_name AS column_id,
29
29
  c.data_type,
30
30
  c.ordinal_position,
31
- c.column_default,
32
31
  c.is_nullable,
33
32
  c.character_maximum_length,
34
33
  c.character_octet_length,
@@ -59,7 +58,6 @@ raw_tables AS (
59
58
  a.attname AS column_name,
60
59
  c.oid::TEXT || '.' || a.attname AS column_id,
61
60
  a.attnum AS ordinal_position,
62
- ad.adsrc AS column_default,
63
61
  CASE
64
62
  WHEN t.typname = 'bpchar' THEN 'char'
65
63
  ELSE t.typname
@@ -90,7 +88,6 @@ tables AS (
90
88
  COALESCE(i.data_type, r.data_type) AS data_type,
91
89
  COALESCE(i.ordinal_position, r.ordinal_position) AS ordinal_position,
92
90
  COALESCE(i.is_nullable, r.is_nullable) AS is_nullable,
93
- COALESCE(i.column_default, r.column_default) AS column_default,
94
91
  i.character_maximum_length::INT AS character_maximum_length,
95
92
  i.character_octet_length::INT AS character_octet_length,
96
93
  i.numeric_precision::INT AS numeric_precision,
@@ -117,7 +114,6 @@ views_late_binding AS (
117
114
  c.data_type,
118
115
  c.ordinal_position,
119
116
  'YES' AS is_nullable,
120
- NULL::TEXT AS column_default,
121
117
  NULL::INT AS character_maximum_length,
122
118
  NULL::INT AS character_octet_length,
123
119
  NULL::INT AS numeric_precision,
@@ -162,7 +158,6 @@ external_columns AS (
162
158
  c.external_type AS data_type,
163
159
  MIN(c.columnnum) AS ordinal_position,
164
160
  CASE c.is_nullable WHEN 'false' THEN 'NO' ELSE 'YES' END AS is_nullable,
165
- NULL AS column_default,
166
161
  NULL AS character_maximum_length,
167
162
  NULL AS character_octet_length,
168
163
  NULL AS numeric_precision,