castor-extractor 0.24.22__py3-none-any.whl → 0.24.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +20 -0
- castor_extractor/transformation/coalesce/client/client.py +14 -3
- castor_extractor/transformation/dbt/client.py +15 -7
- castor_extractor/transformation/dbt/client_test.py +5 -5
- castor_extractor/utils/__init__.py +1 -0
- castor_extractor/utils/client/api/client.py +14 -6
- castor_extractor/utils/salesforce/client.py +25 -0
- castor_extractor/utils/url.py +48 -0
- castor_extractor/utils/url_test.py +55 -0
- castor_extractor/visualization/powerbi/client/client.py +23 -2
- castor_extractor/visualization/powerbi/client/client_test.py +1 -1
- castor_extractor/visualization/strategy/assets.py +7 -1
- castor_extractor/visualization/strategy/client/client.py +89 -117
- castor_extractor/visualization/strategy/client/properties.py +145 -0
- {castor_extractor-0.24.22.dist-info → castor_extractor-0.24.27.dist-info}/METADATA +21 -1
- {castor_extractor-0.24.22.dist-info → castor_extractor-0.24.27.dist-info}/RECORD +19 -16
- {castor_extractor-0.24.22.dist-info → castor_extractor-0.24.27.dist-info}/LICENCE +0 -0
- {castor_extractor-0.24.22.dist-info → castor_extractor-0.24.27.dist-info}/WHEEL +0 -0
- {castor_extractor-0.24.22.dist-info → castor_extractor-0.24.27.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
|
@@ -1,5 +1,25 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.24.27 - 2025-06-20
|
|
4
|
+
|
|
5
|
+
* Strategy: extract logical tables
|
|
6
|
+
|
|
7
|
+
## 0.24.26 - 2025-06-16
|
|
8
|
+
|
|
9
|
+
* Coalesce: increase _MAX_ERRORS client parameter
|
|
10
|
+
|
|
11
|
+
## 0.24.25 - 2025-06-12
|
|
12
|
+
|
|
13
|
+
* DBT: Fix API base url
|
|
14
|
+
|
|
15
|
+
## 0.24.24 - 2025-06-06
|
|
16
|
+
|
|
17
|
+
* Power BI: handle rate limit issues when extracting pages
|
|
18
|
+
|
|
19
|
+
## 0.24.23 - 2025-06-05
|
|
20
|
+
|
|
21
|
+
* Salesforce: print response's error message when authentication fails
|
|
22
|
+
|
|
3
23
|
## 0.24.22 - 2025-05-27
|
|
4
24
|
|
|
5
25
|
* Add retry for `Request.Timeout` on **ApiClient**
|
|
@@ -1,6 +1,9 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from http import HTTPStatus
|
|
2
3
|
from typing import Iterator, Optional
|
|
3
4
|
|
|
5
|
+
from requests import ConnectionError
|
|
6
|
+
|
|
4
7
|
from ....utils import (
|
|
5
8
|
APIClient,
|
|
6
9
|
BearerAuth,
|
|
@@ -16,7 +19,9 @@ from .type import NodeIDToNamesMapping
|
|
|
16
19
|
from .utils import column_names_per_node, is_test, test_names_per_node
|
|
17
20
|
|
|
18
21
|
_LIMIT_MAX = 1_000
|
|
19
|
-
_MAX_ERRORS =
|
|
22
|
+
_MAX_ERRORS = 200
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
20
25
|
|
|
21
26
|
|
|
22
27
|
def _run_result_payload(result: dict, query_result: dict) -> dict:
|
|
@@ -80,8 +85,14 @@ class CoalesceClient(APIClient):
|
|
|
80
85
|
result = self._get(endpoint=endpoint)
|
|
81
86
|
nodes: list[dict] = []
|
|
82
87
|
for node in result["data"]:
|
|
83
|
-
|
|
84
|
-
|
|
88
|
+
try:
|
|
89
|
+
details = self._node_details(environment_id, node["id"])
|
|
90
|
+
nodes.append({**node, **details})
|
|
91
|
+
except ConnectionError as e:
|
|
92
|
+
node_id = node["id"]
|
|
93
|
+
message = f"ConnectionError, environment: {environment_id}, node: {node_id}"
|
|
94
|
+
logger.warning(message)
|
|
95
|
+
raise e
|
|
85
96
|
return nodes
|
|
86
97
|
|
|
87
98
|
def _fetch_all_nodes(self) -> SerializedAsset:
|
|
@@ -8,12 +8,13 @@ from typing import Literal, Optional
|
|
|
8
8
|
import requests
|
|
9
9
|
from dateutil.parser import parse
|
|
10
10
|
|
|
11
|
+
from ...utils.url import add_path
|
|
11
12
|
from .credentials import DbtCredentials
|
|
12
13
|
|
|
13
14
|
logger = logging.getLogger(__name__)
|
|
14
15
|
|
|
15
16
|
|
|
16
|
-
_URL_SUFFIX = "/api/v2/accounts"
|
|
17
|
+
_URL_SUFFIX = "/api/v2/accounts/"
|
|
17
18
|
|
|
18
19
|
_DATA_KEY = "data"
|
|
19
20
|
_SUCCESSFUL_RUN_STATUS = 10
|
|
@@ -52,7 +53,7 @@ class DbtClient:
|
|
|
52
53
|
self._credentials = credentials
|
|
53
54
|
self._account_url = _account_url(self._credentials.host)
|
|
54
55
|
self._session = requests.Session()
|
|
55
|
-
self._account_id:
|
|
56
|
+
self._account_id: str = self._infer_account_id()
|
|
56
57
|
|
|
57
58
|
def _headers(self, content_type: ContentType) -> dict:
|
|
58
59
|
return {
|
|
@@ -88,16 +89,16 @@ class DbtClient:
|
|
|
88
89
|
return result[_DATA_KEY]
|
|
89
90
|
return result
|
|
90
91
|
|
|
91
|
-
def _infer_account_id(self) ->
|
|
92
|
+
def _infer_account_id(self) -> str:
|
|
92
93
|
result = self._call(url=self._account_url)
|
|
93
|
-
return result[0]["id"]
|
|
94
|
+
return str(result[0]["id"])
|
|
94
95
|
|
|
95
96
|
def list_job_identifiers(self) -> set[int]:
|
|
96
97
|
"""
|
|
97
98
|
Return the IDs of all non-deleted jobs for this account
|
|
98
99
|
https://docs.getdbt.com/dbt-cloud/api-v2-legacy#tag/Jobs/operation/listJobsForAccount
|
|
99
100
|
"""
|
|
100
|
-
url =
|
|
101
|
+
url = add_path(self._account_url, self._account_id, "jobs", "/")
|
|
101
102
|
jobs = self._call(url)
|
|
102
103
|
return {job["id"] for job in jobs if not _is_deleted(job)}
|
|
103
104
|
|
|
@@ -110,7 +111,7 @@ class DbtClient:
|
|
|
110
111
|
Extract the last successful run id, optionally filtered on a given datetime range
|
|
111
112
|
https://docs.getdbt.com/dbt-cloud/api-v2#tag/Runs/operation/listRunsForAccount
|
|
112
113
|
"""
|
|
113
|
-
url =
|
|
114
|
+
url = add_path(self._account_url, self._account_id, "runs", "/")
|
|
114
115
|
|
|
115
116
|
params = {
|
|
116
117
|
"job_definition_id": job_id or self._credentials.job_id,
|
|
@@ -142,7 +143,14 @@ class DbtClient:
|
|
|
142
143
|
Fetch dbt manifest or run results
|
|
143
144
|
https://docs.getdbt.com/dbt-cloud/api-v2-legacy#tag/Runs/operation/getArtifactsByRunId
|
|
144
145
|
"""
|
|
145
|
-
url =
|
|
146
|
+
url = add_path(
|
|
147
|
+
self._account_url,
|
|
148
|
+
self._account_id,
|
|
149
|
+
"runs",
|
|
150
|
+
str(run_id),
|
|
151
|
+
"artifacts",
|
|
152
|
+
artifact,
|
|
153
|
+
)
|
|
146
154
|
logger.info(
|
|
147
155
|
f"Extracting {artifact} from run id {run_id} with url {url}"
|
|
148
156
|
)
|
|
@@ -46,7 +46,7 @@ def test_DbtClient_last_run():
|
|
|
46
46
|
mock_response_default_job = [{"id": 1, "finished_at": _OLD_DATE_STR}]
|
|
47
47
|
mock_response_job_42 = [{"id": 2, "finished_at": _RECENT_DATE_STR}]
|
|
48
48
|
|
|
49
|
-
with patch(infer_path, return_value=40), patch(call_path) as mocked_call:
|
|
49
|
+
with patch(infer_path, return_value="40"), patch(call_path) as mocked_call:
|
|
50
50
|
credentials = DbtCredentials(token="some-token", job_id=default_job_id)
|
|
51
51
|
|
|
52
52
|
dbt_client = DbtClient(credentials=credentials)
|
|
@@ -85,7 +85,7 @@ def test_DbtClient_list_job_identifiers():
|
|
|
85
85
|
{"id": 395, "state": 1},
|
|
86
86
|
]
|
|
87
87
|
|
|
88
|
-
with patch(infer_path, return_value=40), patch(call_path) as mocked_call:
|
|
88
|
+
with patch(infer_path, return_value="40"), patch(call_path) as mocked_call:
|
|
89
89
|
mocked_call.return_value = jobs
|
|
90
90
|
credentials = DbtCredentials(token="some-token", job_id="1")
|
|
91
91
|
dbt_client = DbtClient(credentials=credentials)
|
|
@@ -100,7 +100,7 @@ def test_DbtClient_fetch_artifacts():
|
|
|
100
100
|
run_id = 12345
|
|
101
101
|
url = "https://cloud.getdbt.com/api/v2/accounts/40/runs/{}/artifacts/{}"
|
|
102
102
|
|
|
103
|
-
with patch(infer_path, return_value=40), patch(call_path) as mocked_call:
|
|
103
|
+
with patch(infer_path, return_value="40"), patch(call_path) as mocked_call:
|
|
104
104
|
credentials = DbtCredentials(token="some-token", job_id="1")
|
|
105
105
|
dbt_client = DbtClient(credentials=credentials)
|
|
106
106
|
|
|
@@ -123,7 +123,7 @@ def test_DbtClient_fetch_artifacts():
|
|
|
123
123
|
|
|
124
124
|
def test___account_url():
|
|
125
125
|
base_url = "https://cloud.getdbt.com"
|
|
126
|
-
assert _account_url(base_url) == "https://cloud.getdbt.com/api/v2/accounts"
|
|
126
|
+
assert _account_url(base_url) == "https://cloud.getdbt.com/api/v2/accounts/"
|
|
127
127
|
|
|
128
128
|
base_url = "https://emea.dbt.com/"
|
|
129
|
-
assert _account_url(base_url) == "https://emea.dbt.com/api/v2/accounts"
|
|
129
|
+
assert _account_url(base_url) == "https://emea.dbt.com/api/v2/accounts/"
|
|
@@ -52,6 +52,7 @@ from .time import (
|
|
|
52
52
|
yesterday,
|
|
53
53
|
)
|
|
54
54
|
from .type import Callback, Getter, JsonType, SerializedAsset
|
|
55
|
+
from .url import add_path as add_path_to_url, url_from
|
|
55
56
|
from .validation import clean_path, validate_baseurl
|
|
56
57
|
from .write import (
|
|
57
58
|
get_output_filename,
|
|
@@ -76,11 +76,6 @@ class APIClient:
|
|
|
76
76
|
self._auth = auth
|
|
77
77
|
self._safe_mode = safe_mode
|
|
78
78
|
|
|
79
|
-
@retry(
|
|
80
|
-
exceptions=_TIMEOUT_RETRY_EXCEPTIONS,
|
|
81
|
-
max_retries=_TIMEOUT_RETRY_COUNT,
|
|
82
|
-
base_ms=_TIMEOUT_RETRY_BASE_MS,
|
|
83
|
-
)
|
|
84
79
|
def _call(
|
|
85
80
|
self,
|
|
86
81
|
method: HttpMethod,
|
|
@@ -90,6 +85,7 @@ class APIClient:
|
|
|
90
85
|
params: Optional[dict] = None,
|
|
91
86
|
data: Optional[dict] = None,
|
|
92
87
|
pagination_params: Optional[dict] = None,
|
|
88
|
+
retry_on_timeout: bool = True,
|
|
93
89
|
) -> Response:
|
|
94
90
|
headers = headers or {}
|
|
95
91
|
|
|
@@ -102,7 +98,17 @@ class APIClient:
|
|
|
102
98
|
|
|
103
99
|
url = build_url(self._host, endpoint)
|
|
104
100
|
|
|
105
|
-
|
|
101
|
+
if retry_on_timeout:
|
|
102
|
+
retry_wrapper = retry(
|
|
103
|
+
exceptions=_TIMEOUT_RETRY_EXCEPTIONS,
|
|
104
|
+
max_retries=_TIMEOUT_RETRY_COUNT,
|
|
105
|
+
base_ms=_TIMEOUT_RETRY_BASE_MS,
|
|
106
|
+
)
|
|
107
|
+
request_fn = retry_wrapper(requests.request)
|
|
108
|
+
else:
|
|
109
|
+
request_fn = requests.request
|
|
110
|
+
|
|
111
|
+
return request_fn(
|
|
106
112
|
method=method,
|
|
107
113
|
url=url,
|
|
108
114
|
auth=self._auth,
|
|
@@ -128,6 +134,7 @@ class APIClient:
|
|
|
128
134
|
params: Optional[dict] = None,
|
|
129
135
|
data: Optional[dict] = None,
|
|
130
136
|
pagination_params: Optional[dict] = None,
|
|
137
|
+
retry_on_timeout: bool = True,
|
|
131
138
|
):
|
|
132
139
|
response = self._call(
|
|
133
140
|
method="GET",
|
|
@@ -136,6 +143,7 @@ class APIClient:
|
|
|
136
143
|
data=data,
|
|
137
144
|
pagination_params=pagination_params,
|
|
138
145
|
headers=headers,
|
|
146
|
+
retry_on_timeout=retry_on_timeout,
|
|
139
147
|
)
|
|
140
148
|
if response.status_code == HTTPStatus.UNAUTHORIZED:
|
|
141
149
|
self._auth.refresh_token()
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from collections.abc import Iterator
|
|
3
3
|
from functools import partial
|
|
4
|
+
from http import HTTPStatus
|
|
4
5
|
from typing import Optional
|
|
5
6
|
|
|
6
7
|
import requests
|
|
8
|
+
from requests import HTTPError, Response
|
|
7
9
|
|
|
8
10
|
from ...utils import (
|
|
9
11
|
APIClient,
|
|
@@ -21,6 +23,21 @@ logger = logging.getLogger(__name__)
|
|
|
21
23
|
SALESFORCE_TIMEOUT_S = 120
|
|
22
24
|
|
|
23
25
|
|
|
26
|
+
class SalesforceBadRequestError(HTTPError):
|
|
27
|
+
"""
|
|
28
|
+
Custom Exception to print the response's text when an error occurs
|
|
29
|
+
during Salesforce's authentication.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self, response: Response):
|
|
33
|
+
text = response.text
|
|
34
|
+
message = (
|
|
35
|
+
f"{response.status_code} Client Error: {response.reason} for url: {response.url}"
|
|
36
|
+
f"\nResponse text: {text}"
|
|
37
|
+
)
|
|
38
|
+
super().__init__(message, response=response)
|
|
39
|
+
|
|
40
|
+
|
|
24
41
|
class SalesforceAuth(BearerAuth):
|
|
25
42
|
_AUTH_ENDPOINT = "services/oauth2/token"
|
|
26
43
|
|
|
@@ -29,8 +46,16 @@ class SalesforceAuth(BearerAuth):
|
|
|
29
46
|
self._token_payload = credentials.token_request_payload()
|
|
30
47
|
|
|
31
48
|
def fetch_token(self) -> Optional[str]:
|
|
49
|
+
"""
|
|
50
|
+
Fetches the access token from Salesforce using the provided credentials.
|
|
51
|
+
A custom Exception is raised if the request fails with a 400 status code.
|
|
52
|
+
"""
|
|
32
53
|
url = build_url(self._host, self._AUTH_ENDPOINT)
|
|
33
54
|
response = requests.post(url, "POST", params=self._token_payload)
|
|
55
|
+
|
|
56
|
+
if response.status_code == HTTPStatus.BAD_REQUEST:
|
|
57
|
+
raise SalesforceBadRequestError(response)
|
|
58
|
+
|
|
34
59
|
handled_response = handle_response(response)
|
|
35
60
|
return handled_response["access_token"]
|
|
36
61
|
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from urllib.parse import urlsplit, urlunsplit
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def url_from(
|
|
5
|
+
scheme: str = "",
|
|
6
|
+
netloc: str = "",
|
|
7
|
+
path: str = "",
|
|
8
|
+
query: str = "",
|
|
9
|
+
fragment: str = "",
|
|
10
|
+
) -> str:
|
|
11
|
+
"""Constructs an url from part"""
|
|
12
|
+
return urlunsplit((scheme, netloc, path, query, fragment))
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def add_path(base_url: str, *paths: str) -> str:
|
|
16
|
+
"""Adds a path from a base_url."""
|
|
17
|
+
|
|
18
|
+
if not is_valid(base_url):
|
|
19
|
+
raise ValueError(f"Invalid base_url: {base_url}")
|
|
20
|
+
base_url = _format_base_url(base_url)
|
|
21
|
+
split = urlsplit(base_url)
|
|
22
|
+
|
|
23
|
+
return url_from(
|
|
24
|
+
split.scheme,
|
|
25
|
+
split.netloc,
|
|
26
|
+
"/".join([split.path] + [p.strip("/") for p in paths]),
|
|
27
|
+
split.query,
|
|
28
|
+
split.fragment,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _format_base_url(url: str) -> str:
|
|
33
|
+
"""Remove trailing slash in base url, if applicable."""
|
|
34
|
+
if url.endswith("/"):
|
|
35
|
+
return url[:-1]
|
|
36
|
+
return url
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def is_valid(
|
|
40
|
+
url: str,
|
|
41
|
+
valid_schemes: tuple[str, ...] = ("http", "https"),
|
|
42
|
+
) -> bool:
|
|
43
|
+
"""
|
|
44
|
+
Simple url validation that ensures the scheme and that there is an hostname.
|
|
45
|
+
Malformatted url can pass this check such as http://http://toto.com
|
|
46
|
+
"""
|
|
47
|
+
split = urlsplit(url)
|
|
48
|
+
return split.scheme in valid_schemes and bool(split.netloc)
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
from pytest import raises
|
|
2
|
+
|
|
3
|
+
from ..utils.url import (
|
|
4
|
+
add_path,
|
|
5
|
+
is_valid,
|
|
6
|
+
url_from,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def test_add_path():
|
|
11
|
+
base = "https://test.com"
|
|
12
|
+
|
|
13
|
+
# simple
|
|
14
|
+
assert add_path(base, "toto") == f"{base}/toto"
|
|
15
|
+
|
|
16
|
+
# multiple parts
|
|
17
|
+
assert add_path(base, "to", "ta") == f"{base}/to/ta"
|
|
18
|
+
|
|
19
|
+
# multiple parts with slash
|
|
20
|
+
assert add_path(base, "a/b", "/c/d") == f"{base}/a/b/c/d"
|
|
21
|
+
|
|
22
|
+
# base with path
|
|
23
|
+
assert add_path(f"{base}/my/path", "/1/2/", "3") == f"{base}/my/path/1/2/3"
|
|
24
|
+
|
|
25
|
+
# base with query string and fragment
|
|
26
|
+
assert add_path(f"{base}?q=2#frag", "1/2") == f"{base}/1/2?q=2#frag"
|
|
27
|
+
|
|
28
|
+
# bad base url
|
|
29
|
+
with raises(ValueError):
|
|
30
|
+
add_path("toto", "toto")
|
|
31
|
+
|
|
32
|
+
# trailing slash
|
|
33
|
+
base = "https://test.com/"
|
|
34
|
+
|
|
35
|
+
# multiple parts with slash
|
|
36
|
+
assert add_path(base, "a/b", "/c/d") == "https://test.com/a/b/c/d"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_url_is_valid():
|
|
40
|
+
# valid
|
|
41
|
+
assert is_valid("https://google.com")
|
|
42
|
+
assert is_valid("http://user:pass@test.com:444/my/path?my=query#fragment")
|
|
43
|
+
assert is_valid("ftp://hello.com", valid_schemes=("ftp",))
|
|
44
|
+
|
|
45
|
+
# invalid
|
|
46
|
+
assert not is_valid("hello.com")
|
|
47
|
+
assert not is_valid("ftp://hello.com")
|
|
48
|
+
assert not is_valid("http://")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_url_from():
|
|
52
|
+
assert url_from() == ""
|
|
53
|
+
assert url_from("http") == "http://"
|
|
54
|
+
assert url_from("https", "google.com") == "https://google.com"
|
|
55
|
+
assert url_from(netloc="te.st", query="q=3") == "//te.st?q=3"
|
|
@@ -2,6 +2,7 @@ import logging
|
|
|
2
2
|
from collections.abc import Iterator
|
|
3
3
|
from datetime import date
|
|
4
4
|
from functools import partial
|
|
5
|
+
from http import HTTPStatus
|
|
5
6
|
from time import sleep
|
|
6
7
|
from typing import Optional, Union
|
|
7
8
|
|
|
@@ -11,6 +12,7 @@ from requests import HTTPError
|
|
|
11
12
|
from ....utils import (
|
|
12
13
|
APIClient,
|
|
13
14
|
fetch_all_pages,
|
|
15
|
+
retry_request,
|
|
14
16
|
)
|
|
15
17
|
from ..assets import PowerBiAsset
|
|
16
18
|
from .authentication import PowerBiBearerAuth
|
|
@@ -27,6 +29,9 @@ METADATA_BATCH_SIZE = 100
|
|
|
27
29
|
POWERBI_SCAN_STATUS_DONE = "Succeeded"
|
|
28
30
|
POWERBI_SCAN_SLEEP_S = 1
|
|
29
31
|
|
|
32
|
+
MAX_RETRY_PAGES = 1
|
|
33
|
+
RETRY_PAGES_TIMEOUT_MS = 35 * 1000 # 35 seconds
|
|
34
|
+
|
|
30
35
|
logger = logging.getLogger(__name__)
|
|
31
36
|
|
|
32
37
|
|
|
@@ -71,6 +76,23 @@ class PowerbiClient(APIClient):
|
|
|
71
76
|
"""
|
|
72
77
|
yield from self._get(self.endpoint_factory.dashboards())[Keys.VALUE]
|
|
73
78
|
|
|
79
|
+
@retry_request(
|
|
80
|
+
status_codes=(HTTPStatus.TOO_MANY_REQUESTS,),
|
|
81
|
+
max_retries=MAX_RETRY_PAGES,
|
|
82
|
+
base_ms=RETRY_PAGES_TIMEOUT_MS,
|
|
83
|
+
)
|
|
84
|
+
def _pages(self, report_id: str) -> Iterator[dict]:
|
|
85
|
+
"""
|
|
86
|
+
Extracts the pages of a report.
|
|
87
|
+
This endpoint is very flaky and frequently returns 400 and 404 errors.
|
|
88
|
+
After around 50 requests, it hits the rate limit and returns 429 Too Many Requests,
|
|
89
|
+
which is why we retry it after a short delay.
|
|
90
|
+
Timeouts are also common; we must skip them because the extraction task
|
|
91
|
+
might take too long otherwise.
|
|
92
|
+
"""
|
|
93
|
+
pages_endpoint = self.endpoint_factory.pages(report_id)
|
|
94
|
+
return self._get(pages_endpoint, retry_on_timeout=False)[Keys.VALUE]
|
|
95
|
+
|
|
74
96
|
def _reports(self) -> Iterator[dict]:
|
|
75
97
|
"""
|
|
76
98
|
Returns a list of reports for the organization.
|
|
@@ -83,8 +105,7 @@ class PowerbiClient(APIClient):
|
|
|
83
105
|
report_id = report.get(Keys.ID)
|
|
84
106
|
|
|
85
107
|
try:
|
|
86
|
-
|
|
87
|
-
pages = self._get(pages_endpoint)[Keys.VALUE]
|
|
108
|
+
pages = self._pages(report_id)
|
|
88
109
|
report["pages"] = pages
|
|
89
110
|
except (requests.HTTPError, requests.exceptions.Timeout) as e:
|
|
90
111
|
logger.debug(e)
|
|
@@ -85,7 +85,7 @@ def test__reports(power_bi_client):
|
|
|
85
85
|
reports = list(power_bi_client._reports())
|
|
86
86
|
calls = [
|
|
87
87
|
call(ENDPOINT_FACTORY.reports()),
|
|
88
|
-
call(ENDPOINT_FACTORY.pages("1")),
|
|
88
|
+
call(ENDPOINT_FACTORY.pages("1"), retry_on_timeout=False),
|
|
89
89
|
]
|
|
90
90
|
mocked_get.assert_has_calls(calls)
|
|
91
91
|
assert reports == [
|
|
@@ -1,14 +1,20 @@
|
|
|
1
|
-
from ...types import ExternalAsset
|
|
1
|
+
from ...types import ExternalAsset, classproperty
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
class StrategyAsset(ExternalAsset):
|
|
5
5
|
"""Strategy assets that can be extracted"""
|
|
6
6
|
|
|
7
7
|
ATTRIBUTE = "attribute"
|
|
8
|
+
COLUMN = "column"
|
|
8
9
|
CUBE = "cube"
|
|
9
10
|
DASHBOARD = "dashboard"
|
|
10
11
|
DOCUMENT = "document"
|
|
11
12
|
FACT = "fact"
|
|
13
|
+
LOGICAL_TABLE = "logical_table"
|
|
12
14
|
METRIC = "metric"
|
|
13
15
|
REPORT = "report"
|
|
14
16
|
USER = "user"
|
|
17
|
+
|
|
18
|
+
@classproperty
|
|
19
|
+
def optional(cls) -> set["StrategyAsset"]:
|
|
20
|
+
return {StrategyAsset.COLUMN}
|
|
@@ -1,14 +1,18 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from collections.abc import Iterator
|
|
3
|
-
from enum import Enum
|
|
4
3
|
from typing import Any, Callable, Optional
|
|
5
4
|
from urllib.parse import urlparse
|
|
6
5
|
|
|
7
6
|
from mstrio.connection import Connection # type: ignore
|
|
8
7
|
from mstrio.helpers import IServerError # type: ignore
|
|
9
8
|
from mstrio.modeling import ( # type: ignore
|
|
9
|
+
Attribute,
|
|
10
|
+
LogicalTable,
|
|
11
|
+
PhysicalTable,
|
|
12
|
+
PhysicalTableType,
|
|
10
13
|
list_attributes,
|
|
11
14
|
list_facts,
|
|
15
|
+
list_logical_tables,
|
|
12
16
|
list_metrics,
|
|
13
17
|
)
|
|
14
18
|
from mstrio.project_objects import ( # type: ignore
|
|
@@ -19,93 +23,21 @@ from mstrio.project_objects import ( # type: ignore
|
|
|
19
23
|
list_reports,
|
|
20
24
|
)
|
|
21
25
|
from mstrio.server import Environment # type: ignore
|
|
22
|
-
from mstrio.types import ObjectSubTypes, ObjectTypes # type: ignore
|
|
23
26
|
from mstrio.users_and_groups import User, list_users # type: ignore
|
|
24
27
|
from mstrio.utils.entity import Entity # type: ignore
|
|
25
|
-
from mstrio.utils.helper import is_dashboard # type: ignore
|
|
26
|
-
from pydantic import BaseModel, ConfigDict
|
|
27
28
|
|
|
28
29
|
from ..assets import StrategyAsset
|
|
29
30
|
from .credentials import StrategyCredentials
|
|
31
|
+
from .properties import (
|
|
32
|
+
column_properties,
|
|
33
|
+
format_url,
|
|
34
|
+
list_dependencies,
|
|
35
|
+
lookup_table_id,
|
|
36
|
+
safe_get_property,
|
|
37
|
+
)
|
|
30
38
|
|
|
31
39
|
logger = logging.getLogger(__name__)
|
|
32
40
|
|
|
33
|
-
_BATCH_SIZE: int = 100
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class URLTemplates(Enum):
|
|
37
|
-
DASHBOARD = "https://{hostname}/MicroStrategyLibrary/app/{project_id}/{id_}"
|
|
38
|
-
DOCUMENT = "https://{hostname}/MicroStrategy/servlet/mstrWeb?documentID={id_}&projectID={project_id}"
|
|
39
|
-
REPORT = "https://{hostname}/MicroStrategy/servlet/mstrWeb?reportID={id_}&projectID={project_id}"
|
|
40
|
-
FOLDER = "https://{hostname}/MicroStrategy/servlet/mstrWeb?folderID={id_}&projectID={project_id}"
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def _is_dashboard(entity: Entity) -> bool:
|
|
44
|
-
"""
|
|
45
|
-
Returns True if the entity is a Dashboard. They can only be distinguished
|
|
46
|
-
from Documents by checking the `view_media` property.
|
|
47
|
-
"""
|
|
48
|
-
is_type_document = entity.type == ObjectTypes.DOCUMENT_DEFINITION
|
|
49
|
-
return is_type_document and is_dashboard(entity.view_media)
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
def _is_report(entity: Entity) -> bool:
|
|
53
|
-
"""
|
|
54
|
-
Returns True if the entity is a Report. Cubes share the same type as Reports,
|
|
55
|
-
so the subtype must be checked.
|
|
56
|
-
"""
|
|
57
|
-
is_type_report = entity.type == ObjectTypes.REPORT_DEFINITION
|
|
58
|
-
is_subtype_cube = entity.subtype == ObjectSubTypes.OLAP_CUBE.value
|
|
59
|
-
return is_type_report and not is_subtype_cube
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def _safe_get_attribute(entity: Entity, attribute: str) -> Optional[str]:
|
|
63
|
-
"""
|
|
64
|
-
Some properties may raise an error. Example: retrieving a Report's `sql` fails if the Report has not been published.
|
|
65
|
-
This safely returns the attribute value, or None if the retrieval fails.
|
|
66
|
-
"""
|
|
67
|
-
try:
|
|
68
|
-
value = getattr(entity, attribute)
|
|
69
|
-
except IServerError as e:
|
|
70
|
-
logger.error(f"Could not get {attribute} for entity {entity.id}: {e}")
|
|
71
|
-
value = None
|
|
72
|
-
return value
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
class Dependency(BaseModel):
|
|
76
|
-
id: str
|
|
77
|
-
name: str
|
|
78
|
-
subtype: int
|
|
79
|
-
type: int
|
|
80
|
-
|
|
81
|
-
model_config = ConfigDict(extra="ignore")
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
def _list_dependencies(entity: Entity) -> list[dict]:
|
|
85
|
-
"""Lists the entity's dependencies, keeping only relevant fields."""
|
|
86
|
-
dependencies: list[dict] = []
|
|
87
|
-
|
|
88
|
-
offset = 0
|
|
89
|
-
while True:
|
|
90
|
-
batch = entity.list_dependencies(offset=offset, limit=_BATCH_SIZE)
|
|
91
|
-
dependencies.extend(batch)
|
|
92
|
-
if len(batch) < _BATCH_SIZE:
|
|
93
|
-
break
|
|
94
|
-
offset += _BATCH_SIZE
|
|
95
|
-
|
|
96
|
-
return [
|
|
97
|
-
Dependency(**dependency).model_dump() for dependency in dependencies
|
|
98
|
-
]
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
def _level_1_folder_id(folders: list[dict]) -> str:
|
|
102
|
-
"""Searches for the first enclosing folder and returns its ID."""
|
|
103
|
-
for folder in folders:
|
|
104
|
-
if folder["level"] == 1:
|
|
105
|
-
return folder["id"]
|
|
106
|
-
|
|
107
|
-
raise ValueError("No level 1 folder found")
|
|
108
|
-
|
|
109
41
|
|
|
110
42
|
class StrategyClient:
|
|
111
43
|
"""Connect to Strategy through mstrio-py and fetch main assets."""
|
|
@@ -116,6 +48,7 @@ class StrategyClient:
|
|
|
116
48
|
base_url=self.base_url,
|
|
117
49
|
username=credentials.username,
|
|
118
50
|
password=credentials.password,
|
|
51
|
+
verbose=False,
|
|
119
52
|
)
|
|
120
53
|
|
|
121
54
|
self.hostname = urlparse(self.base_url).hostname
|
|
@@ -129,36 +62,6 @@ class StrategyClient:
|
|
|
129
62
|
def close(self):
|
|
130
63
|
self.connection.close()
|
|
131
64
|
|
|
132
|
-
def _url(self, entity: Entity) -> str:
|
|
133
|
-
"""
|
|
134
|
-
Formats the right URL.
|
|
135
|
-
* Dashboards : viewed in MicroStrategy
|
|
136
|
-
* Reports and Documents : viewed in MicroStrategy Web
|
|
137
|
-
* other (i.e. Cubes): the URL leads to the folder in MicroStrategy Web
|
|
138
|
-
"""
|
|
139
|
-
if _is_dashboard(entity):
|
|
140
|
-
id_ = entity.id
|
|
141
|
-
template = URLTemplates.DASHBOARD
|
|
142
|
-
|
|
143
|
-
elif entity.type == ObjectTypes.DOCUMENT_DEFINITION:
|
|
144
|
-
id_ = entity.id
|
|
145
|
-
template = URLTemplates.DOCUMENT
|
|
146
|
-
|
|
147
|
-
elif _is_report(entity):
|
|
148
|
-
id_ = entity.id
|
|
149
|
-
template = URLTemplates.REPORT
|
|
150
|
-
|
|
151
|
-
else:
|
|
152
|
-
# default to folder URL
|
|
153
|
-
id_ = _level_1_folder_id(entity.ancestors)
|
|
154
|
-
template = URLTemplates.FOLDER
|
|
155
|
-
|
|
156
|
-
return template.value.format(
|
|
157
|
-
hostname=self.hostname,
|
|
158
|
-
id_=id_,
|
|
159
|
-
project_id=entity.project_id,
|
|
160
|
-
)
|
|
161
|
-
|
|
162
65
|
def _common_entity_properties(
|
|
163
66
|
self,
|
|
164
67
|
entity: Entity,
|
|
@@ -169,7 +72,7 @@ class StrategyClient:
|
|
|
169
72
|
Returns the entity's properties, including its dependencies
|
|
170
73
|
and optional URL and/or description.
|
|
171
74
|
"""
|
|
172
|
-
dependencies =
|
|
75
|
+
dependencies = list_dependencies(entity)
|
|
173
76
|
owner_id = entity.owner.id if isinstance(entity.owner, User) else None
|
|
174
77
|
properties = {
|
|
175
78
|
"dependencies": dependencies,
|
|
@@ -182,23 +85,80 @@ class StrategyClient:
|
|
|
182
85
|
}
|
|
183
86
|
|
|
184
87
|
if with_url:
|
|
185
|
-
|
|
88
|
+
assert self.hostname
|
|
89
|
+
properties["url"] = format_url(
|
|
90
|
+
entity=entity, hostname=self.hostname
|
|
91
|
+
)
|
|
186
92
|
|
|
187
93
|
if with_description:
|
|
188
|
-
properties["description"] =
|
|
189
|
-
|
|
94
|
+
properties["description"] = safe_get_property(entity, "description")
|
|
95
|
+
|
|
96
|
+
return properties
|
|
97
|
+
|
|
98
|
+
def _attributes_properties(self, attribute: Attribute) -> dict[str, Any]:
|
|
99
|
+
"""
|
|
100
|
+
Attributes have a lookup table, which we need to compute the table lineage.
|
|
101
|
+
"""
|
|
102
|
+
return {
|
|
103
|
+
**self._common_entity_properties(attribute, with_url=False),
|
|
104
|
+
"lookup_table_id": lookup_table_id(attribute),
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
def _physical_table_properties(
|
|
108
|
+
self, table: Optional[PhysicalTable]
|
|
109
|
+
) -> Optional[dict[str, Any]]:
|
|
110
|
+
"""
|
|
111
|
+
Returns the properties of the physical table, including its columns.
|
|
112
|
+
A physical table can have 1 of these types:
|
|
113
|
+
* "normal": meaning it matches 1 warehouse table
|
|
114
|
+
* "sql": it is based on an SQL statement
|
|
115
|
+
Other types are not supported (and they technically shouldn't be possible.)
|
|
116
|
+
"""
|
|
117
|
+
if not table:
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
properties = {
|
|
121
|
+
"id": table.id,
|
|
122
|
+
"name": table.name,
|
|
123
|
+
"type": table.table_type.value,
|
|
124
|
+
"columns": column_properties(table.columns),
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if table.table_type == PhysicalTableType.SQL:
|
|
128
|
+
physical_table = PhysicalTable(
|
|
129
|
+
connection=self.connection,
|
|
130
|
+
id=table.id,
|
|
190
131
|
)
|
|
132
|
+
properties["sql_statement"] = physical_table.sql_statement
|
|
133
|
+
|
|
134
|
+
elif table.table_type == PhysicalTableType.NORMAL:
|
|
135
|
+
properties["table_prefix"] = table.table_prefix
|
|
136
|
+
properties["table_name"] = table.table_name
|
|
191
137
|
|
|
192
138
|
return properties
|
|
193
139
|
|
|
140
|
+
def _logical_table_properties(self, table: LogicalTable) -> dict[str, Any]:
|
|
141
|
+
"""
|
|
142
|
+
Returns properties for:
|
|
143
|
+
* the logical table itself
|
|
144
|
+
* its physical table (though it may not be accessible)
|
|
145
|
+
* the columns of the physical table
|
|
146
|
+
"""
|
|
147
|
+
physical_table = safe_get_property(table, "physical_table")
|
|
148
|
+
return {
|
|
149
|
+
"id": table.id,
|
|
150
|
+
"name": table.name,
|
|
151
|
+
"physical_table": self._physical_table_properties(physical_table),
|
|
152
|
+
}
|
|
153
|
+
|
|
194
154
|
def _report_properties(self, report: Report) -> dict[str, Any]:
|
|
195
155
|
"""
|
|
196
156
|
Report properties contain an optional SQL source query. Due to a typing
|
|
197
157
|
bug in the mstrio package, the typing must be ignored.
|
|
198
158
|
"""
|
|
199
159
|
properties = self._common_entity_properties(report) # type: ignore
|
|
200
|
-
properties["url"] = self.
|
|
201
|
-
properties["sql"] =
|
|
160
|
+
properties["url"] = format_url(entity=report, hostname=self.hostname) # type: ignore
|
|
161
|
+
properties["sql"] = safe_get_property(report, "sql") # type: ignore
|
|
202
162
|
return properties
|
|
203
163
|
|
|
204
164
|
@staticmethod
|
|
@@ -243,7 +203,7 @@ class StrategyClient:
|
|
|
243
203
|
def _fetch_attributes(self) -> Iterator[dict[str, Any]]:
|
|
244
204
|
return self._fetch_entities(
|
|
245
205
|
list_attributes,
|
|
246
|
-
|
|
206
|
+
custom_property_extractor=self._attributes_properties,
|
|
247
207
|
)
|
|
248
208
|
|
|
249
209
|
def _fetch_cubes(self) -> Iterator[dict[str, Any]]:
|
|
@@ -263,6 +223,15 @@ class StrategyClient:
|
|
|
263
223
|
with_description=False,
|
|
264
224
|
)
|
|
265
225
|
|
|
226
|
+
def _fetch_logical_tables(self) -> Iterator[dict[str, Any]]:
|
|
227
|
+
"""
|
|
228
|
+
Yields all logical tables, including their physical tables and their columns.
|
|
229
|
+
"""
|
|
230
|
+
return self._fetch_entities(
|
|
231
|
+
list_logical_tables,
|
|
232
|
+
custom_property_extractor=self._logical_table_properties,
|
|
233
|
+
)
|
|
234
|
+
|
|
266
235
|
def _fetch_metrics(self) -> Iterator[dict[str, Any]]:
|
|
267
236
|
return self._fetch_entities(
|
|
268
237
|
list_metrics,
|
|
@@ -298,6 +267,9 @@ class StrategyClient:
|
|
|
298
267
|
elif asset == StrategyAsset.FACT:
|
|
299
268
|
yield from self._fetch_facts()
|
|
300
269
|
|
|
270
|
+
elif asset == StrategyAsset.LOGICAL_TABLE:
|
|
271
|
+
yield from self._fetch_logical_tables()
|
|
272
|
+
|
|
301
273
|
elif asset == StrategyAsset.METRIC:
|
|
302
274
|
yield from self._fetch_metrics()
|
|
303
275
|
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from typing import Any, Optional
|
|
4
|
+
|
|
5
|
+
from mstrio.helpers import IServerError # type: ignore
|
|
6
|
+
from mstrio.modeling import ( # type: ignore
|
|
7
|
+
Attribute,
|
|
8
|
+
TableColumn,
|
|
9
|
+
)
|
|
10
|
+
from mstrio.types import ObjectSubTypes, ObjectTypes # type: ignore
|
|
11
|
+
from mstrio.utils.entity import Entity # type: ignore
|
|
12
|
+
from mstrio.utils.helper import is_dashboard # type: ignore
|
|
13
|
+
from pydantic import BaseModel, ConfigDict
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
_BATCH_SIZE: int = 100
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class URLTemplates(Enum):
|
|
21
|
+
DASHBOARD = "https://{hostname}/MicroStrategyLibrary/app/{project_id}/{id_}"
|
|
22
|
+
DOCUMENT = "https://{hostname}/MicroStrategy/servlet/mstrWeb?documentID={id_}&projectID={project_id}"
|
|
23
|
+
REPORT = "https://{hostname}/MicroStrategy/servlet/mstrWeb?reportID={id_}&projectID={project_id}"
|
|
24
|
+
FOLDER = "https://{hostname}/MicroStrategy/servlet/mstrWeb?folderID={id_}&projectID={project_id}"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class Dependency(BaseModel):
|
|
28
|
+
id: str
|
|
29
|
+
name: str
|
|
30
|
+
subtype: int
|
|
31
|
+
type: int
|
|
32
|
+
|
|
33
|
+
model_config = ConfigDict(extra="ignore")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def list_dependencies(entity: Entity) -> list[dict]:
|
|
37
|
+
"""Lists the entity's dependencies, keeping only relevant fields."""
|
|
38
|
+
dependencies: list[dict] = []
|
|
39
|
+
|
|
40
|
+
offset = 0
|
|
41
|
+
while True:
|
|
42
|
+
batch = entity.list_dependencies(offset=offset, limit=_BATCH_SIZE)
|
|
43
|
+
dependencies.extend(batch)
|
|
44
|
+
if len(batch) < _BATCH_SIZE:
|
|
45
|
+
break
|
|
46
|
+
offset += _BATCH_SIZE
|
|
47
|
+
|
|
48
|
+
return [
|
|
49
|
+
Dependency(**dependency).model_dump() for dependency in dependencies
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _is_dashboard(entity: Entity) -> bool:
|
|
54
|
+
"""
|
|
55
|
+
Returns True if the entity is a Dashboard. They can only be distinguished
|
|
56
|
+
from Documents by checking the `view_media` property.
|
|
57
|
+
"""
|
|
58
|
+
is_type_document = entity.type == ObjectTypes.DOCUMENT_DEFINITION
|
|
59
|
+
return is_type_document and is_dashboard(entity.view_media)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _is_report(entity: Entity) -> bool:
|
|
63
|
+
"""
|
|
64
|
+
Returns True if the entity is a Report. Cubes share the same type as Reports,
|
|
65
|
+
so the subtype must be checked.
|
|
66
|
+
"""
|
|
67
|
+
is_type_report = entity.type == ObjectTypes.REPORT_DEFINITION
|
|
68
|
+
is_subtype_cube = entity.subtype == ObjectSubTypes.OLAP_CUBE.value
|
|
69
|
+
return is_type_report and not is_subtype_cube
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def format_url(entity: Entity, hostname: str) -> str:
|
|
73
|
+
"""
|
|
74
|
+
Formats the right URL.
|
|
75
|
+
* Dashboards : viewed in MicroStrategy
|
|
76
|
+
* Reports and Documents : viewed in MicroStrategy Web
|
|
77
|
+
* other (i.e. Cubes): the URL leads to the folder in MicroStrategy Web
|
|
78
|
+
"""
|
|
79
|
+
if _is_dashboard(entity):
|
|
80
|
+
id_ = entity.id
|
|
81
|
+
template = URLTemplates.DASHBOARD
|
|
82
|
+
|
|
83
|
+
elif entity.type == ObjectTypes.DOCUMENT_DEFINITION:
|
|
84
|
+
id_ = entity.id
|
|
85
|
+
template = URLTemplates.DOCUMENT
|
|
86
|
+
|
|
87
|
+
elif _is_report(entity):
|
|
88
|
+
id_ = entity.id
|
|
89
|
+
template = URLTemplates.REPORT
|
|
90
|
+
|
|
91
|
+
else:
|
|
92
|
+
# default to folder URL
|
|
93
|
+
id_ = level_1_folder_id(entity.ancestors)
|
|
94
|
+
template = URLTemplates.FOLDER
|
|
95
|
+
|
|
96
|
+
return template.value.format(
|
|
97
|
+
hostname=hostname,
|
|
98
|
+
id_=id_,
|
|
99
|
+
project_id=entity.project_id,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def safe_get_property(entity: Entity, attribute: str) -> Optional[str]:
|
|
104
|
+
"""
|
|
105
|
+
Some properties may raise an error. Example: retrieving a Report's `sql` fails if the Report has not been published.
|
|
106
|
+
This safely returns the attribute value, or None if the retrieval fails.
|
|
107
|
+
"""
|
|
108
|
+
try:
|
|
109
|
+
value = getattr(entity, attribute)
|
|
110
|
+
except IServerError as e:
|
|
111
|
+
logger.error(f"Could not get {attribute} for entity {entity.id}: {e}")
|
|
112
|
+
value = None
|
|
113
|
+
return value
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def column_properties(columns: list[TableColumn]) -> list[dict[str, Any]]:
|
|
117
|
+
"""Returns the properties of a physical table's columns."""
|
|
118
|
+
properties: list[dict[str, Any]] = []
|
|
119
|
+
|
|
120
|
+
for column in columns:
|
|
121
|
+
column_properties = {
|
|
122
|
+
"id": column.id,
|
|
123
|
+
"name": column.name,
|
|
124
|
+
"column_name": column.column_name,
|
|
125
|
+
}
|
|
126
|
+
properties.append(column_properties)
|
|
127
|
+
|
|
128
|
+
return properties
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def level_1_folder_id(folders: list[dict]) -> str:
|
|
132
|
+
"""Searches for the first enclosing folder and returns its ID."""
|
|
133
|
+
for folder in folders:
|
|
134
|
+
if folder["level"] == 1:
|
|
135
|
+
return folder["id"]
|
|
136
|
+
|
|
137
|
+
raise ValueError("No level 1 folder found")
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def lookup_table_id(attribute: Attribute):
|
|
141
|
+
"""Returns the lookup table's ID, if there is one."""
|
|
142
|
+
lookup_table = attribute.attribute_lookup_table
|
|
143
|
+
if not lookup_table:
|
|
144
|
+
return None
|
|
145
|
+
return lookup_table.object_id
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: castor-extractor
|
|
3
|
-
Version: 0.24.
|
|
3
|
+
Version: 0.24.27
|
|
4
4
|
Summary: Extract your metadata assets.
|
|
5
5
|
Home-page: https://www.castordoc.com/
|
|
6
6
|
License: EULA
|
|
@@ -215,6 +215,26 @@ For any questions or bug report, contact us at [support@coalesce.io](mailto:supp
|
|
|
215
215
|
|
|
216
216
|
# Changelog
|
|
217
217
|
|
|
218
|
+
## 0.24.27 - 2025-06-20
|
|
219
|
+
|
|
220
|
+
* Strategy: extract logical tables
|
|
221
|
+
|
|
222
|
+
## 0.24.26 - 2025-06-16
|
|
223
|
+
|
|
224
|
+
* Coalesce: increase _MAX_ERRORS client parameter
|
|
225
|
+
|
|
226
|
+
## 0.24.25 - 2025-06-12
|
|
227
|
+
|
|
228
|
+
* DBT: Fix API base url
|
|
229
|
+
|
|
230
|
+
## 0.24.24 - 2025-06-06
|
|
231
|
+
|
|
232
|
+
* Power BI: handle rate limit issues when extracting pages
|
|
233
|
+
|
|
234
|
+
## 0.24.23 - 2025-06-05
|
|
235
|
+
|
|
236
|
+
* Salesforce: print response's error message when authentication fails
|
|
237
|
+
|
|
218
238
|
## 0.24.22 - 2025-05-27
|
|
219
239
|
|
|
220
240
|
* Add retry for `Request.Timeout` on **ApiClient**
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=ThBExaezvHQ577dZ2fnUkr6cggKn1PutV0WZYR3sMBc,18305
|
|
2
2
|
Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
|
|
3
3
|
DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
|
|
4
4
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
@@ -76,7 +76,7 @@ castor_extractor/transformation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
|
|
|
76
76
|
castor_extractor/transformation/coalesce/__init__.py,sha256=CW_qdtEfwgJRsCyBlk5hNlxwEO-VV6mBXZvkRbND_J8,112
|
|
77
77
|
castor_extractor/transformation/coalesce/assets.py,sha256=pzccYPP66c9PAnVroemx7-6MeRHw7Ft1OlTC6jIamAA,363
|
|
78
78
|
castor_extractor/transformation/coalesce/client/__init__.py,sha256=VRmVpH29rOghtDQnCN7dAdA0dI0Lxseu4BC8rnwM9dU,80
|
|
79
|
-
castor_extractor/transformation/coalesce/client/client.py,sha256=
|
|
79
|
+
castor_extractor/transformation/coalesce/client/client.py,sha256=7EVJDDxnIm5_uMHLFZ2PD6JzfebVglKST9IiURwn4vs,6524
|
|
80
80
|
castor_extractor/transformation/coalesce/client/credentials.py,sha256=jbJxjbdPspf-dzYKfeb7oqL_8TXd1nvkJrjAcdAnLPc,548
|
|
81
81
|
castor_extractor/transformation/coalesce/client/endpoint.py,sha256=0uLh7dpA1vsR9qr_50SEYV_-heQE4BwED9oNMgYsL-w,1272
|
|
82
82
|
castor_extractor/transformation/coalesce/client/type.py,sha256=oiiVP9NL0ijTXyQmaB8aJVYckc7m-m8ZgMyNIAduUKE,43
|
|
@@ -84,8 +84,8 @@ castor_extractor/transformation/coalesce/client/utils.py,sha256=jbxh3OCbYm3fKZD1
|
|
|
84
84
|
castor_extractor/transformation/coalesce/client/utils_test.py,sha256=Q00Y1n0Q_sZ0LFnYn98yDGFumBsifzVJSc7_3PSBMfI,1543
|
|
85
85
|
castor_extractor/transformation/dbt/__init__.py,sha256=LHQROlMqYWCc7tcmhdjXtROFpJqUvCg9jPC8avHgD4I,107
|
|
86
86
|
castor_extractor/transformation/dbt/assets.py,sha256=JY1nKEGySZ84wNoe7dnizwAYw2q0t8NVaIfqhB2rSw0,148
|
|
87
|
-
castor_extractor/transformation/dbt/client.py,sha256=
|
|
88
|
-
castor_extractor/transformation/dbt/client_test.py,sha256=
|
|
87
|
+
castor_extractor/transformation/dbt/client.py,sha256=BIue1DNAn2b7kHeiXBkGNosq8jZA2DrgjP7Gi5epAPE,5684
|
|
88
|
+
castor_extractor/transformation/dbt/client_test.py,sha256=RLL7y_pLDv2QBM03qBht8yYEooeT_woRADHcb8vgBQ4,4535
|
|
89
89
|
castor_extractor/transformation/dbt/credentials.py,sha256=pGq7GqFQTw9TwN1DXSHC-0yJ2H6B_wMAbHyQTLqJVh0,543
|
|
90
90
|
castor_extractor/types.py,sha256=nHel2hv6NoHmdpOX_heEfO2-DnZPoYA2x0eJdbFvT0s,1276
|
|
91
91
|
castor_extractor/uploader/__init__.py,sha256=A4bq_SrEtKAsl0r_D_duSTvL5WIQjVfsMy7tDx9IKg0,87
|
|
@@ -96,7 +96,7 @@ castor_extractor/uploader/settings.py,sha256=3MvOX-UFRqrLZoiT7wYn9jUGro7NX4RCafY
|
|
|
96
96
|
castor_extractor/uploader/upload.py,sha256=PSQfkO_7LSE0WBo9Tm_hlS2ONepKeB0cBFdJXySnues,4310
|
|
97
97
|
castor_extractor/uploader/upload_test.py,sha256=7fwstdQe7FjuwGilsCdFpEQr1qLoR2WTRUzyy93fISw,402
|
|
98
98
|
castor_extractor/uploader/utils.py,sha256=otAaySj5aeem6f0CTd0Te6ioJ6uP2J1p348j-SdIwDI,802
|
|
99
|
-
castor_extractor/utils/__init__.py,sha256=
|
|
99
|
+
castor_extractor/utils/__init__.py,sha256=_hC54hBfPH41TTuWMsqQcyYVF7SojrOevW3OAv8M05E,1652
|
|
100
100
|
castor_extractor/utils/argument_parser.py,sha256=S4EcIh3wNDjs3fOrQnttCcPsAmG8m_Txl7xvEh0Q37s,283
|
|
101
101
|
castor_extractor/utils/argument_parser_test.py,sha256=wnyLFJ74iEiPxxLSbwFtckR7FIHxsFOVU38ljs9gqRA,633
|
|
102
102
|
castor_extractor/utils/batch.py,sha256=SFlLmJgVjV2nVhIrjVIEp8wJ9du4dKKHq8YVYubnwQQ,448
|
|
@@ -106,7 +106,7 @@ castor_extractor/utils/client/abstract.py,sha256=CWF7_afNpEZ3jor-22wXbKIvM20ukHk
|
|
|
106
106
|
castor_extractor/utils/client/api/__init__.py,sha256=vlG7WXznYgLTn3XyMGsyUkgRkup8FbKM14EXJ8mv-b0,264
|
|
107
107
|
castor_extractor/utils/client/api/auth.py,sha256=lq0K3UEl1vwIIa_vKTdlpIQPdE5K1-5DXmCwO4dKzng,1890
|
|
108
108
|
castor_extractor/utils/client/api/auth_test.py,sha256=LlyXytnatg6ZzR4Zkvzk0BH99FYhHX7qn_nyr2MSnDI,1305
|
|
109
|
-
castor_extractor/utils/client/api/client.py,sha256=
|
|
109
|
+
castor_extractor/utils/client/api/client.py,sha256=qmj7KoNqt6F-cmpdaMiz_aVxzwMCgbDNcgzXSbCdu1Y,5183
|
|
110
110
|
castor_extractor/utils/client/api/client_test.py,sha256=FM3ZxsLLfMOBn44cXX6FIgnA31-5TTNIyp9D4LBwtXE,1222
|
|
111
111
|
castor_extractor/utils/client/api/pagination.py,sha256=ph5TYqPiyFGgygsIhCATAHPIQ9UJNZyiTcqlyRdGEno,2460
|
|
112
112
|
castor_extractor/utils/client/api/pagination_test.py,sha256=jCOgXFXrH-jrCxe2dfk80ZksJF-EtmpJPU11BGabsqk,1385
|
|
@@ -143,7 +143,7 @@ castor_extractor/utils/retry_test.py,sha256=j_6IJStBomEhxmGpIY9IIlESgMxhcDpmIKj2
|
|
|
143
143
|
castor_extractor/utils/safe.py,sha256=gvIMRIoggdVeYMl222IYqXnHVDninDklFMlAHt-WldA,1948
|
|
144
144
|
castor_extractor/utils/safe_test.py,sha256=IHN1Z761tYMFslYC-2HAfkXmFPh4LYSqNLs4QZwykjk,2160
|
|
145
145
|
castor_extractor/utils/salesforce/__init__.py,sha256=fZ2U6t6AFFAIC-DLXvFHBgpBDjTvX0tFgZ8zJoehPAc,88
|
|
146
|
-
castor_extractor/utils/salesforce/client.py,sha256=
|
|
146
|
+
castor_extractor/utils/salesforce/client.py,sha256=wcbJScclvSHjMf6wYNVnHjmpoC22dSshmZW9rDxXKF0,3211
|
|
147
147
|
castor_extractor/utils/salesforce/client_test.py,sha256=T3gUnZ0cRvnL_4dVc4lInRSO9Ti2WeLkLWV1scps4IY,668
|
|
148
148
|
castor_extractor/utils/salesforce/constants.py,sha256=7yPmUeyn4IHQiHLDutXE0L_OBd41E5080vFxqA_s4Dc,58
|
|
149
149
|
castor_extractor/utils/salesforce/credentials.py,sha256=m_11LIaBrYVgH2bLo-QnxaIY5KhEdtfVXz9r2lb_fd0,1123
|
|
@@ -155,6 +155,8 @@ castor_extractor/utils/string_test.py,sha256=u3P2tAPhyfCLvD19rH_JcpHhPuWTHUdg0z_
|
|
|
155
155
|
castor_extractor/utils/time.py,sha256=jmP1QWg4lv21Jp_Oy71lfJ47hjNOSgHiBOFf964RMPU,1732
|
|
156
156
|
castor_extractor/utils/time_test.py,sha256=pH8DSosNlwDYZXZNNjYDcL0WbmZc_c212LEEn88Oqew,647
|
|
157
157
|
castor_extractor/utils/type.py,sha256=Sd8JlEgbGkBUZnRqCUDtREeBkOMTXtlNMyCph90_J0Q,328
|
|
158
|
+
castor_extractor/utils/url.py,sha256=0YaKAz3EC5PgTb5A2TNOlxf1DANK40yw6hs7ArEtJaU,1238
|
|
159
|
+
castor_extractor/utils/url_test.py,sha256=LWzNdOZqjrDeLmvhPBYmP35mzhm7jGAXi021thiro1Y,1425
|
|
158
160
|
castor_extractor/utils/validation.py,sha256=dRvC9SoFVecVZuLQNN3URq37yX2sBSW3-NxIxkcol5o,1894
|
|
159
161
|
castor_extractor/utils/validation_test.py,sha256=A7P6VmI0kYX2aGIeEN12y7LsY7Kpm8pE4bdVFhbBAMw,1184
|
|
160
162
|
castor_extractor/utils/write.py,sha256=Z_RYm47XeHiUPPUMYMuAjQrVZ18CAkL3daQHQG1XPlM,2148
|
|
@@ -236,8 +238,8 @@ castor_extractor/visualization/powerbi/__init__.py,sha256=hoZ73ngLhMc9edqxO9PUIE
|
|
|
236
238
|
castor_extractor/visualization/powerbi/assets.py,sha256=IB_XKwgdN1pZYGZ4RfeHrLjflianTzWf_6tg-4CIwu0,742
|
|
237
239
|
castor_extractor/visualization/powerbi/client/__init__.py,sha256=UPIhMaCCdNxhiLdkItC0IPFE_AMi-SgqI_ahwjB9utI,151
|
|
238
240
|
castor_extractor/visualization/powerbi/client/authentication.py,sha256=cTohunKr1nUDfvxB0sejJSyfE2BdCtwT1WMPecWlbyU,1045
|
|
239
|
-
castor_extractor/visualization/powerbi/client/client.py,sha256=
|
|
240
|
-
castor_extractor/visualization/powerbi/client/client_test.py,sha256=
|
|
241
|
+
castor_extractor/visualization/powerbi/client/client.py,sha256=CWCYmj2spYin74qq9T8v2ZJ5TcxBuEy5EjArhCVZjLM,8141
|
|
242
|
+
castor_extractor/visualization/powerbi/client/client_test.py,sha256=Ox_bHpCSckEpT6IiR7drx2c9fmaVl1btUZxnwEmamGQ,5718
|
|
241
243
|
castor_extractor/visualization/powerbi/client/constants.py,sha256=88R_aGachNNUZh6OSH2fkDwZtY4KTStzKm_g7HNCqqo,387
|
|
242
244
|
castor_extractor/visualization/powerbi/client/credentials.py,sha256=OVWdhZSNODzTdLysY-sbpBZ3uUkLokeayQZnbJAqt2I,1386
|
|
243
245
|
castor_extractor/visualization/powerbi/client/credentials_test.py,sha256=TzFqxsWVQ3sXR_n0bJsexK9Uz7ceXCEPVqDGWTJzW60,993
|
|
@@ -276,10 +278,11 @@ castor_extractor/visualization/sigma/client/endpoints.py,sha256=DBFphbgoH78_MZUG
|
|
|
276
278
|
castor_extractor/visualization/sigma/client/pagination.py,sha256=kNEhNq08tTGbypyMjxs0w4uvDtQc_iaWpOZweaa_FsU,690
|
|
277
279
|
castor_extractor/visualization/sigma/extract.py,sha256=XIT1qsj6g6dgBWP8HPfj_medZexu48EaY9tUwi14gzM,2298
|
|
278
280
|
castor_extractor/visualization/strategy/__init__.py,sha256=HOMv4JxqF5ZmViWi-pDE-PSXJRLTdXal_jtpHG_rlR8,123
|
|
279
|
-
castor_extractor/visualization/strategy/assets.py,sha256=
|
|
281
|
+
castor_extractor/visualization/strategy/assets.py,sha256=yFXF_dX01patC0HQ1eU7Jo_4DZ4m6IJEg0uCB71tMoI,480
|
|
280
282
|
castor_extractor/visualization/strategy/client/__init__.py,sha256=XWP0yF5j6JefDJkDfX-RSJn3HF2ceQ0Yx1PLCfB3BBo,80
|
|
281
|
-
castor_extractor/visualization/strategy/client/client.py,sha256=
|
|
283
|
+
castor_extractor/visualization/strategy/client/client.py,sha256=6DJO0Fh67FXxmwY5h_X9cu5sEq3GhM19b9hwn_fvhSE,9460
|
|
282
284
|
castor_extractor/visualization/strategy/client/credentials.py,sha256=urFfNxWX1JG6wwFMYImufQzHa5g-sgjdlVGzi63owwg,1113
|
|
285
|
+
castor_extractor/visualization/strategy/client/properties.py,sha256=PaCFnrc8aDtIELjZbPoPHuEyN6n4A9Kuw6Rmqjhm6qo,4486
|
|
283
286
|
castor_extractor/visualization/strategy/extract.py,sha256=2fBuvS2xiOGXRpxXnZsE_C3en6t1-BlM5TbusjHyEkg,1166
|
|
284
287
|
castor_extractor/visualization/tableau/__init__.py,sha256=eFI_1hjdkxyUiAYiy3szwyuwn3yJ5C_KbpBU0ySJDcQ,138
|
|
285
288
|
castor_extractor/visualization/tableau/assets.py,sha256=HbCRd8VCj1WBEeqg9jwnygnT7xOFJ6PQD7Lq7sV-XR0,635
|
|
@@ -425,8 +428,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx
|
|
|
425
428
|
castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
|
|
426
429
|
castor_extractor/warehouse/sqlserver/query.py,sha256=g0hPT-RmeGi2DyenAi3o72cTlQsLToXIFYojqc8E5fQ,533
|
|
427
430
|
castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
|
|
428
|
-
castor_extractor-0.24.
|
|
429
|
-
castor_extractor-0.24.
|
|
430
|
-
castor_extractor-0.24.
|
|
431
|
-
castor_extractor-0.24.
|
|
432
|
-
castor_extractor-0.24.
|
|
431
|
+
castor_extractor-0.24.27.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
432
|
+
castor_extractor-0.24.27.dist-info/METADATA,sha256=0-IaR_-6k2rDoWNgoST4QUHVZWsxKGCD8qQytZ6J-Vo,25758
|
|
433
|
+
castor_extractor-0.24.27.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
434
|
+
castor_extractor-0.24.27.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
|
|
435
|
+
castor_extractor-0.24.27.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|