castor-extractor 0.24.10__py3-none-any.whl → 0.24.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +20 -0
- README.md +5 -2
- castor_extractor/commands/extract_confluence.py +29 -0
- castor_extractor/commands/extract_strategy.py +26 -0
- castor_extractor/commands/extract_tableau.py +7 -0
- castor_extractor/knowledge/confluence/client/client.py +58 -2
- castor_extractor/knowledge/confluence/client/client_test.py +91 -0
- castor_extractor/knowledge/confluence/client/endpoints.py +13 -4
- castor_extractor/knowledge/confluence/extract.py +7 -1
- castor_extractor/transformation/coalesce/client/client.py +17 -1
- castor_extractor/visualization/strategy/__init__.py +3 -0
- castor_extractor/visualization/strategy/assets.py +14 -0
- castor_extractor/visualization/strategy/client/__init__.py +2 -0
- castor_extractor/visualization/strategy/client/client.py +313 -0
- castor_extractor/visualization/strategy/client/credentials.py +38 -0
- castor_extractor/visualization/strategy/extract.py +43 -0
- castor_extractor/visualization/tableau/client/client.py +6 -0
- castor_extractor/visualization/tableau/extract.py +2 -0
- castor_extractor/warehouse/redshift/queries/view_ddl.sql +19 -5
- {castor_extractor-0.24.10.dist-info → castor_extractor-0.24.15.dist-info}/METADATA +29 -4
- {castor_extractor-0.24.10.dist-info → castor_extractor-0.24.15.dist-info}/RECORD +24 -16
- {castor_extractor-0.24.10.dist-info → castor_extractor-0.24.15.dist-info}/entry_points.txt +1 -0
- {castor_extractor-0.24.10.dist-info → castor_extractor-0.24.15.dist-info}/LICENCE +0 -0
- {castor_extractor-0.24.10.dist-info → castor_extractor-0.24.15.dist-info}/WHEEL +0 -0
CHANGELOG.md
CHANGED
|
@@ -1,5 +1,25 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.24.15 - 2025-05-12
|
|
4
|
+
|
|
5
|
+
* Tableau: Add argument to skip columns extraction
|
|
6
|
+
|
|
7
|
+
## 0.24.14 - 2025-05-06
|
|
8
|
+
|
|
9
|
+
* Confluence: extract pages per space to allow additional filtering. by default, pages from archived or personal spaces are not extracted.
|
|
10
|
+
|
|
11
|
+
## 0.24.13 - 2025-05-05
|
|
12
|
+
|
|
13
|
+
* Rollback cloud-storage version as it's not compatible with Keboola
|
|
14
|
+
|
|
15
|
+
## 0.24.12 - 2025-05-05
|
|
16
|
+
|
|
17
|
+
* Redshift - fix query definition of materialized views
|
|
18
|
+
|
|
19
|
+
## 0.24.11 - 2025-05-05
|
|
20
|
+
|
|
21
|
+
* add support for Strategy (formerly MicroStrategy)
|
|
22
|
+
|
|
3
23
|
## 0.24.10 - 2025-04-30
|
|
4
24
|
|
|
5
25
|
* Tableau - skip warnings instead of raising an error
|
README.md
CHANGED
|
@@ -37,6 +37,8 @@ It also embeds utilities to help you push your metadata to Castor:
|
|
|
37
37
|
Requirements: **python3.9+**
|
|
38
38
|
<img src="https://upload.wikimedia.org/wikipedia/commons/c/c3/Python-logo-notext.svg" width=20 />
|
|
39
39
|
|
|
40
|
+
**Note:** The Strategy command requires **python3.10+**. All other modules work with python3.9+.
|
|
41
|
+
|
|
40
42
|
### Create castor-env
|
|
41
43
|
|
|
42
44
|
We advise to create a dedicated [Python environment](https://docs.python.org/3/library/venv.html).
|
|
@@ -115,6 +117,7 @@ pip install castor-extractor[postgres]
|
|
|
115
117
|
pip install castor-extractor[redshift]
|
|
116
118
|
pip install castor-extractor[snowflake]
|
|
117
119
|
pip install castor-extractor[sqlserver]
|
|
120
|
+
pip install castor-extractor[strategy]
|
|
118
121
|
pip install castor-extractor[tableau]
|
|
119
122
|
```
|
|
120
123
|
|
|
@@ -138,6 +141,6 @@ export CASTOR_OUTPUT_DIRECTORY="/tmp/castor"
|
|
|
138
141
|
|
|
139
142
|
## Contact
|
|
140
143
|
|
|
141
|
-
For any questions or bug report, contact us at [support@
|
|
144
|
+
For any questions or bug report, contact us at [support@coalesce.io](mailto:support@coalesce.io)
|
|
142
145
|
|
|
143
|
-
[
|
|
146
|
+
[Catalog from Coalesce](https://castordoc.com) helps you find, understand, use your data assets
|
|
@@ -16,4 +16,33 @@ def main():
|
|
|
16
16
|
parser.add_argument("-t", "--token", help="Confluence API token")
|
|
17
17
|
parser.add_argument("-u", "--username", help="Confluence username")
|
|
18
18
|
|
|
19
|
+
parser.add_argument(
|
|
20
|
+
"--include-archived-spaces",
|
|
21
|
+
action="store_true",
|
|
22
|
+
default=False,
|
|
23
|
+
help="Include pages from archived spaces (Optional)",
|
|
24
|
+
)
|
|
25
|
+
parser.add_argument(
|
|
26
|
+
"--include-personal-spaces",
|
|
27
|
+
action="store_true",
|
|
28
|
+
default=False,
|
|
29
|
+
help="Include pages from personal spaces (Optional)",
|
|
30
|
+
)
|
|
31
|
+
parser.add_argument(
|
|
32
|
+
"--space-ids-allowed",
|
|
33
|
+
type=str,
|
|
34
|
+
nargs="+",
|
|
35
|
+
help=(
|
|
36
|
+
"List of Confluence space IDs allowed for extraction (Optional). "
|
|
37
|
+
"Only pages from these Spaces will be extracted. "
|
|
38
|
+
"This overrides any other filtering (archived, personal, etc.)"
|
|
39
|
+
),
|
|
40
|
+
)
|
|
41
|
+
parser.add_argument(
|
|
42
|
+
"--space-ids-blocked",
|
|
43
|
+
type=str,
|
|
44
|
+
nargs="+",
|
|
45
|
+
help="List of Confluence space IDs to exclude fom the extraction (Optional)",
|
|
46
|
+
)
|
|
47
|
+
|
|
19
48
|
confluence.extract_all(**parse_filled_arguments(parser))
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from argparse import ArgumentParser
|
|
3
|
+
|
|
4
|
+
from castor_extractor.utils import parse_filled_arguments # type: ignore
|
|
5
|
+
from castor_extractor.visualization import strategy # type: ignore
|
|
6
|
+
|
|
7
|
+
logging.basicConfig(level=logging.INFO, format="%(levelname)s - %(message)s")
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def main():
|
|
11
|
+
parser = ArgumentParser()
|
|
12
|
+
|
|
13
|
+
parser.add_argument("-u", "--username", help="Strategy username")
|
|
14
|
+
parser.add_argument("-p", "--password", help="Strategy password")
|
|
15
|
+
parser.add_argument("-b", "--base-url", help="Strategy instance URL")
|
|
16
|
+
parser.add_argument("-o", "--output", help="Directory to write to")
|
|
17
|
+
|
|
18
|
+
parser.add_argument(
|
|
19
|
+
"-i",
|
|
20
|
+
"--project-ids",
|
|
21
|
+
nargs="*",
|
|
22
|
+
help="Optional list of project IDs",
|
|
23
|
+
default=None,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
strategy.extract_all(**parse_filled_arguments(parser))
|
|
@@ -21,6 +21,13 @@ def main():
|
|
|
21
21
|
parser.add_argument("-b", "--server-url", help="Tableau server url")
|
|
22
22
|
parser.add_argument("-i", "--site-id", help="Tableau site ID")
|
|
23
23
|
|
|
24
|
+
parser.add_argument(
|
|
25
|
+
"--skip-columns",
|
|
26
|
+
dest="skip_columns",
|
|
27
|
+
action="store_true",
|
|
28
|
+
help="Option to avoid extracting Tableau columns, default to False",
|
|
29
|
+
)
|
|
30
|
+
|
|
24
31
|
parser.add_argument(
|
|
25
32
|
"--with-pulse",
|
|
26
33
|
dest="with_pulse",
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from collections.abc import Iterator
|
|
2
2
|
from functools import partial
|
|
3
|
+
from typing import Optional
|
|
3
4
|
|
|
4
5
|
from ....utils import (
|
|
5
6
|
APIClient,
|
|
@@ -17,12 +18,18 @@ _HEADERS = {
|
|
|
17
18
|
"Accept": "application/json",
|
|
18
19
|
"Content-Type": "application/json",
|
|
19
20
|
}
|
|
21
|
+
_STATUS_ARCHIVED = "archived"
|
|
22
|
+
_TYPE_PERSONAL = "personal"
|
|
20
23
|
|
|
21
24
|
|
|
22
25
|
class ConfluenceClient(APIClient):
|
|
23
26
|
def __init__(
|
|
24
27
|
self,
|
|
25
28
|
credentials: ConfluenceCredentials,
|
|
29
|
+
include_archived_spaces: bool = False,
|
|
30
|
+
include_personal_spaces: bool = False,
|
|
31
|
+
space_ids_allowed: Optional[set[str]] = None,
|
|
32
|
+
space_ids_blocked: Optional[set[str]] = None,
|
|
26
33
|
):
|
|
27
34
|
self.account_id = credentials.account_id
|
|
28
35
|
auth = BasicAuth(
|
|
@@ -34,12 +41,61 @@ class ConfluenceClient(APIClient):
|
|
|
34
41
|
headers=_HEADERS,
|
|
35
42
|
)
|
|
36
43
|
|
|
44
|
+
self.include_archived_spaces = include_archived_spaces
|
|
45
|
+
self.include_personal_spaces = include_personal_spaces
|
|
46
|
+
self.space_ids_allowed = space_ids_allowed or set()
|
|
47
|
+
self.space_ids_blocked = space_ids_blocked or set()
|
|
48
|
+
|
|
37
49
|
def pages(self):
|
|
50
|
+
"""Extracts all pages from all relevant Spaces."""
|
|
51
|
+
for space in self.spaces():
|
|
52
|
+
space_id = space["id"]
|
|
53
|
+
request = partial(
|
|
54
|
+
self._get,
|
|
55
|
+
endpoint=ConfluenceEndpointFactory.pages(space_id),
|
|
56
|
+
)
|
|
57
|
+
yield from fetch_all_pages(request, ConfluencePagination)
|
|
58
|
+
|
|
59
|
+
def spaces(self) -> Iterator[dict]:
|
|
60
|
+
"""
|
|
61
|
+
Returns the spaces meeting the conditions defined by the settings.
|
|
62
|
+
|
|
63
|
+
If `space_ids_allowed` is not empty, only matching spaces are returned.
|
|
64
|
+
|
|
65
|
+
Otherwise, all spaces are filtered by excluding the following:
|
|
66
|
+
* The space is in the blocked list
|
|
67
|
+
* The space is personal (type=personal) and skip_personal_spaces is True
|
|
68
|
+
* The space is archived (status=archived) and skip_archived_spaces is True
|
|
69
|
+
"""
|
|
38
70
|
request = partial(
|
|
39
71
|
self._get,
|
|
40
|
-
endpoint=ConfluenceEndpointFactory.
|
|
72
|
+
endpoint=ConfluenceEndpointFactory.spaces(),
|
|
41
73
|
)
|
|
42
|
-
|
|
74
|
+
spaces = list(fetch_all_pages(request, ConfluencePagination))
|
|
75
|
+
|
|
76
|
+
if self.space_ids_allowed:
|
|
77
|
+
yield from (
|
|
78
|
+
space
|
|
79
|
+
for space in spaces
|
|
80
|
+
if space["id"] in self.space_ids_allowed
|
|
81
|
+
)
|
|
82
|
+
return
|
|
83
|
+
|
|
84
|
+
for space in spaces:
|
|
85
|
+
space_id = space["id"]
|
|
86
|
+
type_ = space["type"]
|
|
87
|
+
status = space["status"]
|
|
88
|
+
|
|
89
|
+
if space_id in self.space_ids_blocked:
|
|
90
|
+
continue
|
|
91
|
+
|
|
92
|
+
if status == _STATUS_ARCHIVED and not self.include_archived_spaces:
|
|
93
|
+
continue
|
|
94
|
+
|
|
95
|
+
if type_ == _TYPE_PERSONAL and not self.include_personal_spaces:
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
yield space
|
|
43
99
|
|
|
44
100
|
def users(self):
|
|
45
101
|
request_body = {"accountIds": [self.account_id]}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
from unittest.mock import MagicMock, patch
|
|
2
|
+
|
|
3
|
+
from .client import ConfluenceClient
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_ConfluenceClient_filtered_spaces_with_allowlist():
|
|
7
|
+
both_blocked_and_allowed_space_id = "789"
|
|
8
|
+
archived_space_id = "934"
|
|
9
|
+
random_space = "1000"
|
|
10
|
+
|
|
11
|
+
spaces = [
|
|
12
|
+
# Both blocked and allowed space. "Allowed" setting takes precedence.
|
|
13
|
+
{
|
|
14
|
+
"id": both_blocked_and_allowed_space_id,
|
|
15
|
+
"type": "global",
|
|
16
|
+
"status": "current",
|
|
17
|
+
},
|
|
18
|
+
{"id": archived_space_id, "type": "global", "status": "archived"},
|
|
19
|
+
{"id": random_space, "type": "global", "status": "current"},
|
|
20
|
+
]
|
|
21
|
+
# the "allowed" list overrides everything else
|
|
22
|
+
client = ConfluenceClient(
|
|
23
|
+
credentials=MagicMock(),
|
|
24
|
+
include_archived_spaces=True,
|
|
25
|
+
space_ids_allowed={
|
|
26
|
+
both_blocked_and_allowed_space_id,
|
|
27
|
+
},
|
|
28
|
+
space_ids_blocked={
|
|
29
|
+
both_blocked_and_allowed_space_id,
|
|
30
|
+
},
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
with (
|
|
34
|
+
patch(
|
|
35
|
+
"source.packages.extractor.castor_extractor.knowledge.confluence.client.client.ConfluenceClient._get"
|
|
36
|
+
),
|
|
37
|
+
patch(
|
|
38
|
+
"source.packages.extractor.castor_extractor.knowledge.confluence.client.client.fetch_all_pages"
|
|
39
|
+
) as mock_fetch_all_pages,
|
|
40
|
+
):
|
|
41
|
+
mock_fetch_all_pages.return_value = spaces
|
|
42
|
+
|
|
43
|
+
filtered_spaces = list(client.spaces())
|
|
44
|
+
|
|
45
|
+
assert len(filtered_spaces) == 1
|
|
46
|
+
filtered_space_ids = {space["id"] for space in filtered_spaces}
|
|
47
|
+
assert set(filtered_space_ids) == {both_blocked_and_allowed_space_id}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def test_ConfluenceClient_filtered_spaces():
|
|
51
|
+
blocked_id = "42"
|
|
52
|
+
personal_id = "666"
|
|
53
|
+
archived_id = "934"
|
|
54
|
+
random_id = "1000"
|
|
55
|
+
|
|
56
|
+
# test the other settings : allow personal spaces & block space "42"
|
|
57
|
+
client = ConfluenceClient(
|
|
58
|
+
credentials=MagicMock(),
|
|
59
|
+
include_archived_spaces=False,
|
|
60
|
+
include_personal_spaces=True,
|
|
61
|
+
space_ids_blocked={blocked_id},
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
spaces = [
|
|
65
|
+
# Blocked space, to be skipped
|
|
66
|
+
{"id": blocked_id, "type": "global", "status": "current"},
|
|
67
|
+
# Archived space, to be skipped
|
|
68
|
+
{"id": archived_id, "type": "collaboration", "status": "archived"},
|
|
69
|
+
# Personal space, to be included
|
|
70
|
+
{"id": personal_id, "type": "personal", "status": "current"},
|
|
71
|
+
# Valid space
|
|
72
|
+
{"id": random_id, "type": "knowledge_base", "status": "current"},
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
with (
|
|
76
|
+
patch(
|
|
77
|
+
"source.packages.extractor.castor_extractor.knowledge.confluence.client.client.ConfluenceClient._get"
|
|
78
|
+
),
|
|
79
|
+
patch(
|
|
80
|
+
"source.packages.extractor.castor_extractor.knowledge.confluence.client.client.fetch_all_pages"
|
|
81
|
+
) as mock_fetch_all_pages,
|
|
82
|
+
):
|
|
83
|
+
mock_fetch_all_pages.return_value = spaces
|
|
84
|
+
|
|
85
|
+
filtered_spaces = list(client.spaces())
|
|
86
|
+
|
|
87
|
+
filtered_space_ids = [space["id"] for space in filtered_spaces]
|
|
88
|
+
|
|
89
|
+
# no duplicates
|
|
90
|
+
assert len(filtered_space_ids) == len(set(filtered_space_ids))
|
|
91
|
+
assert set(filtered_space_ids) == {personal_id, random_id}
|
|
@@ -6,15 +6,24 @@ class ConfluenceEndpointFactory:
|
|
|
6
6
|
|
|
7
7
|
API = "wiki/api/v2/"
|
|
8
8
|
PAGES = "pages"
|
|
9
|
+
SPACES = "spaces"
|
|
9
10
|
USERS = "users-bulk"
|
|
10
11
|
|
|
11
12
|
@classmethod
|
|
12
|
-
def pages(cls) -> str:
|
|
13
|
+
def pages(cls, space_id: str) -> str:
|
|
13
14
|
"""
|
|
14
|
-
Endpoint to fetch all pages.
|
|
15
|
-
More: https://developer.atlassian.com/cloud/confluence/rest/v2/api-group-page/#api-pages-get
|
|
15
|
+
Endpoint to fetch all pages in the given space.
|
|
16
|
+
More: https://developer.atlassian.com/cloud/confluence/rest/v2/api-group-page/#api-spaces-id-pages-get
|
|
16
17
|
"""
|
|
17
|
-
return f"{cls.API}{cls.PAGES}?body-format=atlas_doc_format"
|
|
18
|
+
return f"{cls.API}{cls.SPACES}/{space_id}/{cls.PAGES}?body-format=atlas_doc_format"
|
|
19
|
+
|
|
20
|
+
@classmethod
|
|
21
|
+
def spaces(cls) -> str:
|
|
22
|
+
"""
|
|
23
|
+
Endpoint to fetch all spaces.
|
|
24
|
+
https://developer.atlassian.com/cloud/confluence/rest/v2/api-group-space/#api-spaces-get
|
|
25
|
+
"""
|
|
26
|
+
return f"{cls.API}{cls.SPACES}"
|
|
18
27
|
|
|
19
28
|
@classmethod
|
|
20
29
|
def users(cls) -> str:
|
|
@@ -42,7 +42,13 @@ def extract_all(**kwargs) -> None:
|
|
|
42
42
|
output_directory = kwargs.get("output") or from_env(OUTPUT_DIR)
|
|
43
43
|
|
|
44
44
|
credentials = ConfluenceCredentials(**kwargs)
|
|
45
|
-
client = ConfluenceClient(
|
|
45
|
+
client = ConfluenceClient(
|
|
46
|
+
credentials=credentials,
|
|
47
|
+
include_archived_spaces=kwargs.get("include_archived_spaces") or False,
|
|
48
|
+
include_personal_spaces=kwargs.get("include_personal_spaces") or False,
|
|
49
|
+
space_ids_allowed=kwargs.get("space_ids_allowed"),
|
|
50
|
+
space_ids_blocked=kwargs.get("space_ids_blocked"),
|
|
51
|
+
)
|
|
46
52
|
|
|
47
53
|
ts = current_timestamp()
|
|
48
54
|
|
|
@@ -1,7 +1,15 @@
|
|
|
1
1
|
from http import HTTPStatus
|
|
2
2
|
from typing import Iterator, Optional
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
import requests
|
|
5
|
+
|
|
6
|
+
from ....utils import (
|
|
7
|
+
APIClient,
|
|
8
|
+
BearerAuth,
|
|
9
|
+
RequestSafeMode,
|
|
10
|
+
SerializedAsset,
|
|
11
|
+
retry,
|
|
12
|
+
)
|
|
5
13
|
from ..assets import CoalesceAsset, CoalesceQualityAsset
|
|
6
14
|
from .credentials import CoalesceCredentials
|
|
7
15
|
from .endpoint import (
|
|
@@ -12,6 +20,9 @@ from .utils import column_names_per_node, is_test, test_names_per_node
|
|
|
12
20
|
|
|
13
21
|
_LIMIT_MAX = 1_000
|
|
14
22
|
_MAX_ERRORS = 50
|
|
23
|
+
_RETRY_BASE_MS = 10 * 60 * 1000 # 10 minutes
|
|
24
|
+
_RETRY_COUNT = 2
|
|
25
|
+
_RETRY_EXCEPTIONS = [requests.exceptions.ConnectTimeout]
|
|
15
26
|
|
|
16
27
|
|
|
17
28
|
def _run_result_payload(result: dict, query_result: dict) -> dict:
|
|
@@ -64,6 +75,11 @@ class CoalesceClient(APIClient):
|
|
|
64
75
|
result = self._get(endpoint=endpoint)
|
|
65
76
|
return result["data"]
|
|
66
77
|
|
|
78
|
+
@retry(
|
|
79
|
+
exceptions=_RETRY_EXCEPTIONS,
|
|
80
|
+
max_retries=_RETRY_COUNT,
|
|
81
|
+
base_ms=_RETRY_BASE_MS,
|
|
82
|
+
)
|
|
67
83
|
def _node_details(self, environment_id: int, node_id: str) -> dict:
|
|
68
84
|
endpoint = CoalesceEndpointFactory.nodes(
|
|
69
85
|
environment_id=environment_id, node_id=node_id
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from ...types import ExternalAsset
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class StrategyAsset(ExternalAsset):
|
|
5
|
+
"""Strategy assets that can be extracted"""
|
|
6
|
+
|
|
7
|
+
ATTRIBUTE = "attribute"
|
|
8
|
+
CUBE = "cube"
|
|
9
|
+
DASHBOARD = "dashboard"
|
|
10
|
+
DOCUMENT = "document"
|
|
11
|
+
FACT = "fact"
|
|
12
|
+
METRIC = "metric"
|
|
13
|
+
REPORT = "report"
|
|
14
|
+
USER = "user"
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from collections.abc import Iterator
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import Any, Callable, Optional
|
|
5
|
+
from urllib.parse import urlparse
|
|
6
|
+
|
|
7
|
+
from mstrio.connection import Connection # type: ignore
|
|
8
|
+
from mstrio.helpers import IServerError # type: ignore
|
|
9
|
+
from mstrio.modeling import ( # type: ignore
|
|
10
|
+
list_attributes,
|
|
11
|
+
list_facts,
|
|
12
|
+
list_metrics,
|
|
13
|
+
)
|
|
14
|
+
from mstrio.project_objects import ( # type: ignore
|
|
15
|
+
Report,
|
|
16
|
+
list_dashboards,
|
|
17
|
+
list_documents,
|
|
18
|
+
list_olap_cubes,
|
|
19
|
+
list_reports,
|
|
20
|
+
)
|
|
21
|
+
from mstrio.server import Environment # type: ignore
|
|
22
|
+
from mstrio.types import ObjectSubTypes, ObjectTypes # type: ignore
|
|
23
|
+
from mstrio.users_and_groups import User, list_users # type: ignore
|
|
24
|
+
from mstrio.utils.entity import Entity # type: ignore
|
|
25
|
+
from mstrio.utils.helper import is_dashboard # type: ignore
|
|
26
|
+
from pydantic import BaseModel, ConfigDict
|
|
27
|
+
|
|
28
|
+
from ..assets import StrategyAsset
|
|
29
|
+
from .credentials import StrategyCredentials
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
_BATCH_SIZE: int = 100
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class URLTemplates(Enum):
|
|
37
|
+
DASHBOARD = (
|
|
38
|
+
"https://{hostname}/MicroStrategyLibrarySTD/app/{project_id}/{id_}"
|
|
39
|
+
)
|
|
40
|
+
DOCUMENT = "https://{hostname}/MicroStrategy/servlet/mstrWeb?documentID={id_}&projectID={project_id}"
|
|
41
|
+
REPORT = "https://{hostname}/MicroStrategy/servlet/mstrWeb?reportID={id_}&projectID={project_id}"
|
|
42
|
+
FOLDER = "https://{hostname}/MicroStrategy/servlet/mstrWeb?folderID={id_}&projectID={project_id}"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _is_dashboard(entity: Entity) -> bool:
|
|
46
|
+
"""
|
|
47
|
+
Returns True if the entity is a Dashboard. They can only be distinguished
|
|
48
|
+
from Documents by checking the `view_media` property.
|
|
49
|
+
"""
|
|
50
|
+
is_type_document = entity.type == ObjectTypes.DOCUMENT_DEFINITION
|
|
51
|
+
return is_type_document and is_dashboard(entity.view_media)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _is_report(entity: Entity) -> bool:
|
|
55
|
+
"""
|
|
56
|
+
Returns True if the entity is a Report. Cubes share the same type as Reports,
|
|
57
|
+
so the subtype must be checked.
|
|
58
|
+
"""
|
|
59
|
+
is_type_report = entity.type == ObjectTypes.REPORT_DEFINITION
|
|
60
|
+
is_subtype_cube = entity.subtype == ObjectSubTypes.OLAP_CUBE.value
|
|
61
|
+
return is_type_report and not is_subtype_cube
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _safe_get_attribute(entity: Entity, attribute: str) -> Optional[str]:
|
|
65
|
+
"""
|
|
66
|
+
Some properties may raise an error. Example: retrieving a Report's `sql` fails if the Report has not been published.
|
|
67
|
+
This safely returns the attribute value, or None if the retrieval fails.
|
|
68
|
+
"""
|
|
69
|
+
try:
|
|
70
|
+
value = getattr(entity, attribute)
|
|
71
|
+
except IServerError as e:
|
|
72
|
+
logger.error(f"Could not get {attribute} for entity {entity.id}: {e}")
|
|
73
|
+
value = None
|
|
74
|
+
return value
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class Dependency(BaseModel):
|
|
78
|
+
id: str
|
|
79
|
+
name: str
|
|
80
|
+
subtype: int
|
|
81
|
+
type: int
|
|
82
|
+
|
|
83
|
+
model_config = ConfigDict(extra="ignore")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _list_dependencies(entity: Entity) -> list[dict]:
|
|
87
|
+
"""Lists the entity's dependencies, keeping only relevant fields."""
|
|
88
|
+
dependencies: list[dict] = []
|
|
89
|
+
|
|
90
|
+
offset = 0
|
|
91
|
+
while True:
|
|
92
|
+
batch = entity.list_dependencies(offset=offset, limit=_BATCH_SIZE)
|
|
93
|
+
dependencies.extend(batch)
|
|
94
|
+
if len(batch) < _BATCH_SIZE:
|
|
95
|
+
break
|
|
96
|
+
offset += _BATCH_SIZE
|
|
97
|
+
|
|
98
|
+
return [
|
|
99
|
+
Dependency(**dependency).model_dump() for dependency in dependencies
|
|
100
|
+
]
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _level_1_folder_id(folders: list[dict]) -> str:
|
|
104
|
+
"""Searches for the first enclosing folder and returns its ID."""
|
|
105
|
+
for folder in folders:
|
|
106
|
+
if folder["level"] == 1:
|
|
107
|
+
return folder["id"]
|
|
108
|
+
|
|
109
|
+
raise ValueError("No level 1 folder found")
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class StrategyClient:
|
|
113
|
+
"""Connect to Strategy through mstrio-py and fetch main assets."""
|
|
114
|
+
|
|
115
|
+
def __init__(self, credentials: StrategyCredentials):
|
|
116
|
+
self.base_url = credentials.base_url
|
|
117
|
+
self.connection = Connection(
|
|
118
|
+
base_url=self.base_url,
|
|
119
|
+
username=credentials.username,
|
|
120
|
+
password=credentials.password,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
self.hostname = urlparse(self.base_url).hostname
|
|
124
|
+
|
|
125
|
+
if credentials.project_ids:
|
|
126
|
+
self.project_ids = credentials.project_ids
|
|
127
|
+
else:
|
|
128
|
+
env = Environment(connection=self.connection)
|
|
129
|
+
self.project_ids = [project.id for project in env.list_projects()]
|
|
130
|
+
|
|
131
|
+
def close(self):
|
|
132
|
+
self.connection.close()
|
|
133
|
+
|
|
134
|
+
def _url(self, entity: Entity) -> str:
|
|
135
|
+
"""
|
|
136
|
+
Formats the right URL.
|
|
137
|
+
* Dashboards : viewed in MicroStrategy
|
|
138
|
+
* Reports and Documents : viewed in MicroStrategy Web
|
|
139
|
+
* other (i.e. Cubes): the URL leads to the folder in MicroStrategy Web
|
|
140
|
+
"""
|
|
141
|
+
if _is_dashboard(entity):
|
|
142
|
+
id_ = entity.id
|
|
143
|
+
template = URLTemplates.DASHBOARD
|
|
144
|
+
|
|
145
|
+
elif entity.type == ObjectTypes.DOCUMENT_DEFINITION:
|
|
146
|
+
id_ = entity.id
|
|
147
|
+
template = URLTemplates.DOCUMENT
|
|
148
|
+
|
|
149
|
+
elif _is_report(entity):
|
|
150
|
+
id_ = entity.id
|
|
151
|
+
template = URLTemplates.REPORT
|
|
152
|
+
|
|
153
|
+
else:
|
|
154
|
+
# default to folder URL
|
|
155
|
+
id_ = _level_1_folder_id(entity.ancestors)
|
|
156
|
+
template = URLTemplates.FOLDER
|
|
157
|
+
|
|
158
|
+
return template.value.format(
|
|
159
|
+
hostname=self.hostname,
|
|
160
|
+
id_=id_,
|
|
161
|
+
project_id=entity.project_id,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
def _common_entity_properties(
|
|
165
|
+
self,
|
|
166
|
+
entity: Entity,
|
|
167
|
+
with_url: bool = True,
|
|
168
|
+
with_description: bool = True,
|
|
169
|
+
) -> dict:
|
|
170
|
+
"""
|
|
171
|
+
Returns the entity's properties, including its dependencies
|
|
172
|
+
and optional URL and/or description.
|
|
173
|
+
"""
|
|
174
|
+
dependencies = _list_dependencies(entity)
|
|
175
|
+
owner_id = entity.owner.id if isinstance(entity.owner, User) else None
|
|
176
|
+
properties = {
|
|
177
|
+
"dependencies": dependencies,
|
|
178
|
+
"id": entity.id,
|
|
179
|
+
"location": entity.location,
|
|
180
|
+
"name": entity.name,
|
|
181
|
+
"owner_id": owner_id,
|
|
182
|
+
"subtype": entity.subtype,
|
|
183
|
+
"type": entity.type.value,
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
if with_url:
|
|
187
|
+
properties["url"] = self._url(entity)
|
|
188
|
+
|
|
189
|
+
if with_description:
|
|
190
|
+
properties["description"] = _safe_get_attribute(
|
|
191
|
+
entity, "description"
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
return properties
|
|
195
|
+
|
|
196
|
+
def _report_properties(self, report: Report) -> dict[str, Any]:
|
|
197
|
+
"""
|
|
198
|
+
Report properties contain an optional SQL source query. Due to a typing
|
|
199
|
+
bug in the mstrio package, the typing must be ignored.
|
|
200
|
+
"""
|
|
201
|
+
properties = self._common_entity_properties(report) # type: ignore
|
|
202
|
+
properties["url"] = self._url(report) # type: ignore
|
|
203
|
+
properties["sql"] = _safe_get_attribute(report, "sql") # type: ignore
|
|
204
|
+
return properties
|
|
205
|
+
|
|
206
|
+
@staticmethod
|
|
207
|
+
def _user_properties(user: User) -> dict[str, Any]:
|
|
208
|
+
return {
|
|
209
|
+
"id": user.id,
|
|
210
|
+
"name": user.name,
|
|
211
|
+
"username": user.username,
|
|
212
|
+
"email": user.default_email_address,
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
def _fetch_entities(
|
|
216
|
+
self,
|
|
217
|
+
extract_callback: Callable,
|
|
218
|
+
with_url: bool = True,
|
|
219
|
+
with_description: bool = True,
|
|
220
|
+
custom_property_extractor: Optional[Callable] = None,
|
|
221
|
+
) -> Iterator[dict[str, Any]]:
|
|
222
|
+
"""
|
|
223
|
+
Yields all entities across all projects using the given retrieval function from the mstrio package.
|
|
224
|
+
"""
|
|
225
|
+
for project_id in self.project_ids:
|
|
226
|
+
self.connection.select_project(project_id=project_id)
|
|
227
|
+
|
|
228
|
+
entities = extract_callback(connection=self.connection)
|
|
229
|
+
|
|
230
|
+
for entity in entities:
|
|
231
|
+
try:
|
|
232
|
+
if custom_property_extractor:
|
|
233
|
+
yield custom_property_extractor(entity)
|
|
234
|
+
else:
|
|
235
|
+
yield self._common_entity_properties(
|
|
236
|
+
entity,
|
|
237
|
+
with_url=with_url,
|
|
238
|
+
with_description=with_description,
|
|
239
|
+
)
|
|
240
|
+
except IServerError as e:
|
|
241
|
+
logger.error(
|
|
242
|
+
f"Could not fetch attributes for entity {entity.id}: {e}"
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
def _fetch_attributes(self) -> Iterator[dict[str, Any]]:
|
|
246
|
+
return self._fetch_entities(
|
|
247
|
+
list_attributes,
|
|
248
|
+
with_url=False,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
def _fetch_cubes(self) -> Iterator[dict[str, Any]]:
|
|
252
|
+
return self._fetch_entities(list_olap_cubes)
|
|
253
|
+
|
|
254
|
+
def _fetch_dashboards(self) -> Iterator[dict[str, Any]]:
|
|
255
|
+
return self._fetch_entities(list_dashboards)
|
|
256
|
+
|
|
257
|
+
def _fetch_documents(self) -> Iterator[dict[str, Any]]:
|
|
258
|
+
return self._fetch_entities(list_documents)
|
|
259
|
+
|
|
260
|
+
def _fetch_facts(self) -> Iterator[dict[str, Any]]:
|
|
261
|
+
"""Yields all facts. Descriptions are not needed for this entity type."""
|
|
262
|
+
return self._fetch_entities(
|
|
263
|
+
list_facts,
|
|
264
|
+
with_url=False,
|
|
265
|
+
with_description=False,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
def _fetch_metrics(self) -> Iterator[dict[str, Any]]:
|
|
269
|
+
return self._fetch_entities(
|
|
270
|
+
list_metrics,
|
|
271
|
+
with_url=False,
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
def _fetch_reports(self) -> Iterator[dict[str, Any]]:
|
|
275
|
+
return self._fetch_entities(
|
|
276
|
+
list_reports,
|
|
277
|
+
custom_property_extractor=self._report_properties,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
def _fetch_users(self) -> Iterator[dict[str, Any]]:
|
|
281
|
+
return self._fetch_entities(
|
|
282
|
+
list_users,
|
|
283
|
+
custom_property_extractor=self._user_properties,
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
def fetch(self, asset: StrategyAsset):
|
|
287
|
+
"""Fetch the given asset type from Strategy"""
|
|
288
|
+
if asset == StrategyAsset.ATTRIBUTE:
|
|
289
|
+
yield from self._fetch_attributes()
|
|
290
|
+
|
|
291
|
+
elif asset == StrategyAsset.CUBE:
|
|
292
|
+
yield from self._fetch_cubes()
|
|
293
|
+
|
|
294
|
+
elif asset == StrategyAsset.DASHBOARD:
|
|
295
|
+
yield from self._fetch_dashboards()
|
|
296
|
+
|
|
297
|
+
elif asset == StrategyAsset.DOCUMENT:
|
|
298
|
+
yield from self._fetch_documents()
|
|
299
|
+
|
|
300
|
+
elif asset == StrategyAsset.FACT:
|
|
301
|
+
yield from self._fetch_facts()
|
|
302
|
+
|
|
303
|
+
elif asset == StrategyAsset.METRIC:
|
|
304
|
+
yield from self._fetch_metrics()
|
|
305
|
+
|
|
306
|
+
elif asset == StrategyAsset.REPORT:
|
|
307
|
+
yield from self._fetch_reports()
|
|
308
|
+
|
|
309
|
+
elif asset == StrategyAsset.USER:
|
|
310
|
+
yield from self._fetch_users()
|
|
311
|
+
|
|
312
|
+
else:
|
|
313
|
+
raise NotImplementedError(f"Asset type {asset} not implemented yet")
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from typing import Any, Optional
|
|
2
|
+
|
|
3
|
+
from pydantic import Field, field_validator
|
|
4
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
5
|
+
|
|
6
|
+
STRATEGY_ENV_PREFIX = "CATALOG_STRATEGY_"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class StrategyCredentials(BaseSettings):
|
|
10
|
+
model_config = SettingsConfigDict(
|
|
11
|
+
env_prefix=STRATEGY_ENV_PREFIX,
|
|
12
|
+
extra="ignore",
|
|
13
|
+
populate_by_name=True,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
base_url: str
|
|
17
|
+
password: str = Field(repr=False)
|
|
18
|
+
username: str
|
|
19
|
+
|
|
20
|
+
project_ids: Optional[list[str]] = None
|
|
21
|
+
|
|
22
|
+
@field_validator("project_ids", mode="before")
|
|
23
|
+
@classmethod
|
|
24
|
+
def _check_project_ids(cls, project_ids: Any) -> Optional[list[str]]:
|
|
25
|
+
"""
|
|
26
|
+
The project IDs are optional and can be either a list of strings
|
|
27
|
+
or single string with project IDs separated by commas.
|
|
28
|
+
"""
|
|
29
|
+
if project_ids is None:
|
|
30
|
+
return None
|
|
31
|
+
|
|
32
|
+
if isinstance(project_ids, str):
|
|
33
|
+
return [item.strip() for item in project_ids.split(",")]
|
|
34
|
+
|
|
35
|
+
if isinstance(project_ids, list):
|
|
36
|
+
return project_ids
|
|
37
|
+
|
|
38
|
+
raise ValueError(f"Unexpected type for project_id: {type(project_ids)}")
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from collections.abc import Iterable
|
|
3
|
+
from typing import Union
|
|
4
|
+
|
|
5
|
+
from ...utils import (
|
|
6
|
+
OUTPUT_DIR,
|
|
7
|
+
current_timestamp,
|
|
8
|
+
deep_serialize,
|
|
9
|
+
from_env,
|
|
10
|
+
get_output_filename,
|
|
11
|
+
write_json,
|
|
12
|
+
write_summary,
|
|
13
|
+
)
|
|
14
|
+
from .assets import StrategyAsset
|
|
15
|
+
from .client import StrategyClient, StrategyCredentials
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def iterate_all_data(
|
|
21
|
+
client: StrategyClient,
|
|
22
|
+
) -> Iterable[tuple[str, Union[list, dict]]]:
|
|
23
|
+
"""Iterate over the extracted data from Strategy"""
|
|
24
|
+
|
|
25
|
+
for asset in StrategyAsset:
|
|
26
|
+
logger.info(f"Extracting {asset.value.upper()} from REST API")
|
|
27
|
+
data = client.fetch(asset)
|
|
28
|
+
yield asset.name.lower(), list(deep_serialize(data))
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def extract_all(**kwargs) -> None:
|
|
32
|
+
_output_directory = kwargs.get("output") or from_env(OUTPUT_DIR)
|
|
33
|
+
credentials = StrategyCredentials(**kwargs)
|
|
34
|
+
|
|
35
|
+
client = StrategyClient(credentials=credentials)
|
|
36
|
+
ts = current_timestamp()
|
|
37
|
+
|
|
38
|
+
for key, data in iterate_all_data(client):
|
|
39
|
+
filename = get_output_filename(key, _output_directory, ts)
|
|
40
|
+
write_json(filename, data)
|
|
41
|
+
|
|
42
|
+
client.close()
|
|
43
|
+
write_summary(_output_directory, ts)
|
|
@@ -121,11 +121,13 @@ class TableauClient:
|
|
|
121
121
|
self,
|
|
122
122
|
credentials: TableauCredentials,
|
|
123
123
|
timeout_sec: int = DEFAULT_TIMEOUT_SECONDS,
|
|
124
|
+
with_columns: bool = True,
|
|
124
125
|
with_pulse: bool = False,
|
|
125
126
|
override_page_size: Optional[int] = None,
|
|
126
127
|
):
|
|
127
128
|
self._credentials = credentials
|
|
128
129
|
self._server = _server(credentials.server_url, timeout_sec)
|
|
130
|
+
self._with_columns = with_columns
|
|
129
131
|
self._with_pulse = with_pulse
|
|
130
132
|
|
|
131
133
|
self._client_metadata = TableauClientMetadataApi(
|
|
@@ -215,6 +217,10 @@ class TableauClient:
|
|
|
215
217
|
logger.info(f"Skipping asset {asset} - Tableau Pulse de-activated")
|
|
216
218
|
return []
|
|
217
219
|
|
|
220
|
+
if asset == TableauAsset.COLUMN and not self._with_columns:
|
|
221
|
+
logger.info(f"Skipping asset {asset} - deactivated columns")
|
|
222
|
+
return []
|
|
223
|
+
|
|
218
224
|
logger.info(f"Extracting {asset.name}...")
|
|
219
225
|
|
|
220
226
|
if asset == TableauAsset.DATASOURCE:
|
|
@@ -32,6 +32,7 @@ def extract_all(**kwargs) -> None:
|
|
|
32
32
|
output_directory
|
|
33
33
|
"""
|
|
34
34
|
output_directory = kwargs.get("output") or from_env(OUTPUT_DIR)
|
|
35
|
+
with_columns = not kwargs.get("skip_columns")
|
|
35
36
|
with_pulse = kwargs.get("with_pulse") or False
|
|
36
37
|
page_size = kwargs.get("page_size")
|
|
37
38
|
timestamp = current_timestamp()
|
|
@@ -39,6 +40,7 @@ def extract_all(**kwargs) -> None:
|
|
|
39
40
|
credentials = TableauCredentials(**kwargs)
|
|
40
41
|
client = TableauClient(
|
|
41
42
|
credentials,
|
|
43
|
+
with_columns=with_columns,
|
|
42
44
|
with_pulse=with_pulse,
|
|
43
45
|
override_page_size=page_size,
|
|
44
46
|
)
|
|
@@ -1,13 +1,27 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
/*
|
|
2
|
+
This query was inspired from this thread:
|
|
3
|
+
https://github.com/awslabs/amazon-redshift-utils/blob/master/src/AdminViews/v_generate_view_ddl.sql
|
|
4
|
+
|
|
5
|
+
Notable differences:
|
|
6
|
+
* There is no "--DROP" statement/comment here
|
|
7
|
+
* Left-trimming the view definition is necessary to capture "CREATE" statements starting with whitespaces or line breaks.
|
|
8
|
+
*/
|
|
9
|
+
|
|
3
10
|
SELECT
|
|
4
11
|
CURRENT_DATABASE() AS database_name,
|
|
5
12
|
n.nspname AS schema_name,
|
|
6
13
|
c.relname AS view_name,
|
|
7
14
|
CASE
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
15
|
+
WHEN c.relnatts > 0 THEN
|
|
16
|
+
CASE
|
|
17
|
+
STRPOS(LOWER(LTRIM(pg_get_viewdef(c.oid, TRUE), '\t\r\n ')), 'create')
|
|
18
|
+
WHEN 1 THEN '' -- CREATE statement already present
|
|
19
|
+
ELSE -- No CREATE statement present, so no materialized view anyway
|
|
20
|
+
'CREATE OR REPLACE VIEW ' || QUOTE_IDENT(n.nspname) || '.' || QUOTE_IDENT(c.relname) || ' AS\n'
|
|
21
|
+
END
|
|
22
|
+
|| COALESCE(pg_get_viewdef(c.oid, TRUE), '')
|
|
23
|
+
ELSE COALESCE(pg_get_viewdef(c.oid, TRUE), '')
|
|
24
|
+
END AS view_definition
|
|
11
25
|
FROM
|
|
12
26
|
pg_catalog.pg_class AS c
|
|
13
27
|
INNER JOIN
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: castor-extractor
|
|
3
|
-
Version: 0.24.
|
|
3
|
+
Version: 0.24.15
|
|
4
4
|
Summary: Extract your metadata assets.
|
|
5
5
|
Home-page: https://www.castordoc.com/
|
|
6
6
|
License: EULA
|
|
@@ -28,6 +28,7 @@ Provides-Extra: qlik
|
|
|
28
28
|
Provides-Extra: redshift
|
|
29
29
|
Provides-Extra: snowflake
|
|
30
30
|
Provides-Extra: sqlserver
|
|
31
|
+
Provides-Extra: strategy
|
|
31
32
|
Provides-Extra: tableau
|
|
32
33
|
Requires-Dist: cryptography (>=43.0.0,<44.0.0) ; extra == "snowflake"
|
|
33
34
|
Requires-Dist: databricks-sql-connector (==3.6.0) ; extra == "databricks" or extra == "all"
|
|
@@ -35,11 +36,12 @@ Requires-Dist: google-api-core (>=2.1.1,<3.0.0)
|
|
|
35
36
|
Requires-Dist: google-api-python-client (>=2.121.0,<3.0.0) ; extra == "lookerstudio" or extra == "all"
|
|
36
37
|
Requires-Dist: google-auth (>=2,<3)
|
|
37
38
|
Requires-Dist: google-cloud-core (>=2.1.0,<3.0.0)
|
|
38
|
-
Requires-Dist: google-cloud-storage (>=3
|
|
39
|
+
Requires-Dist: google-cloud-storage (>=2,<3)
|
|
39
40
|
Requires-Dist: google-resumable-media (>=2.0.3,<3.0.0)
|
|
40
41
|
Requires-Dist: googleapis-common-protos (>=1.53.0,<2.0.0)
|
|
41
42
|
Requires-Dist: looker-sdk (>=25.0.0,<26.0.0) ; extra == "looker" or extra == "all"
|
|
42
43
|
Requires-Dist: msal (>=1.20.0,<2.0.0) ; extra == "powerbi" or extra == "all"
|
|
44
|
+
Requires-Dist: mstrio-py (>=11.5.3.101,<12.0.0.0) ; (python_version >= "3.10") and (extra == "strategy" or extra == "all")
|
|
43
45
|
Requires-Dist: numpy (<2) ; extra == "bigquery" or extra == "databricks" or extra == "all"
|
|
44
46
|
Requires-Dist: numpy (>=1.26) ; (python_version >= "3.12" and python_version < "3.13") and (extra == "bigquery" or extra == "databricks" or extra == "all")
|
|
45
47
|
Requires-Dist: pandas (>=2.1) ; (python_version >= "3.12" and python_version < "3.13") and (extra == "databricks" or extra == "all")
|
|
@@ -103,6 +105,8 @@ It also embeds utilities to help you push your metadata to Castor:
|
|
|
103
105
|
Requirements: **python3.9+**
|
|
104
106
|
<img src="https://upload.wikimedia.org/wikipedia/commons/c/c3/Python-logo-notext.svg" width=20 />
|
|
105
107
|
|
|
108
|
+
**Note:** The Strategy command requires **python3.10+**. All other modules work with python3.9+.
|
|
109
|
+
|
|
106
110
|
### Create castor-env
|
|
107
111
|
|
|
108
112
|
We advise to create a dedicated [Python environment](https://docs.python.org/3/library/venv.html).
|
|
@@ -181,6 +185,7 @@ pip install castor-extractor[postgres]
|
|
|
181
185
|
pip install castor-extractor[redshift]
|
|
182
186
|
pip install castor-extractor[snowflake]
|
|
183
187
|
pip install castor-extractor[sqlserver]
|
|
188
|
+
pip install castor-extractor[strategy]
|
|
184
189
|
pip install castor-extractor[tableau]
|
|
185
190
|
```
|
|
186
191
|
|
|
@@ -204,12 +209,32 @@ export CASTOR_OUTPUT_DIRECTORY="/tmp/castor"
|
|
|
204
209
|
|
|
205
210
|
## Contact
|
|
206
211
|
|
|
207
|
-
For any questions or bug report, contact us at [support@
|
|
212
|
+
For any questions or bug report, contact us at [support@coalesce.io](mailto:support@coalesce.io)
|
|
208
213
|
|
|
209
|
-
[
|
|
214
|
+
[Catalog from Coalesce](https://castordoc.com) helps you find, understand, use your data assets
|
|
210
215
|
|
|
211
216
|
# Changelog
|
|
212
217
|
|
|
218
|
+
## 0.24.15 - 2025-05-12
|
|
219
|
+
|
|
220
|
+
* Tableau: Add argument to skip columns extraction
|
|
221
|
+
|
|
222
|
+
## 0.24.14 - 2025-05-06
|
|
223
|
+
|
|
224
|
+
* Confluence: extract pages per space to allow additional filtering. by default, pages from archived or personal spaces are not extracted.
|
|
225
|
+
|
|
226
|
+
## 0.24.13 - 2025-05-05
|
|
227
|
+
|
|
228
|
+
* Rollback cloud-storage version as it's not compatible with Keboola
|
|
229
|
+
|
|
230
|
+
## 0.24.12 - 2025-05-05
|
|
231
|
+
|
|
232
|
+
* Redshift - fix query definition of materialized views
|
|
233
|
+
|
|
234
|
+
## 0.24.11 - 2025-05-05
|
|
235
|
+
|
|
236
|
+
* add support for Strategy (formerly MicroStrategy)
|
|
237
|
+
|
|
213
238
|
## 0.24.10 - 2025-04-30
|
|
214
239
|
|
|
215
240
|
* Tableau - skip warnings instead of raising an error
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=6XUz09FfUZSXFfROVG7BliBupfyr5eeBy0J3cQZVvys,17398
|
|
2
2
|
Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
|
|
3
3
|
DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
|
|
4
4
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
5
|
-
README.md,sha256=
|
|
5
|
+
README.md,sha256=C6hTyZO60T7z7xwHbspHlii384Jn02k0Rycxu3bCX0o,3866
|
|
6
6
|
castor_extractor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
7
|
castor_extractor/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
castor_extractor/commands/extract_bigquery.py,sha256=dU4OiYO1V0n32orvZnMh1_xtFKF_VxHNXcVsH3otY-g,1269
|
|
9
|
-
castor_extractor/commands/extract_confluence.py,sha256=
|
|
9
|
+
castor_extractor/commands/extract_confluence.py,sha256=blYcnDqywXNKRQ1aZAD9FclhLlO7x8Y_tb0lgl85v0w,1641
|
|
10
10
|
castor_extractor/commands/extract_databricks.py,sha256=SVKyoa-BBUQAM6HRHf1Wdg9-tpICic2yyvXQwHcNBhA,1264
|
|
11
11
|
castor_extractor/commands/extract_domo.py,sha256=jvAawUsUTHrwCn_koK6StmQr4n_b5GyvJi6uu6WS0SM,1061
|
|
12
12
|
castor_extractor/commands/extract_looker.py,sha256=cySLiolLCgrREJ9d0kMrJ7P8K3efHTBTzShalWVfI3A,1214
|
|
@@ -25,7 +25,8 @@ castor_extractor/commands/extract_salesforce_reporting.py,sha256=FdANTNiLkIPdm80
|
|
|
25
25
|
castor_extractor/commands/extract_sigma.py,sha256=sxewHcZ1Doq35V2qnpX_zCKKXkrb1_9bYjUMg7BOW-k,643
|
|
26
26
|
castor_extractor/commands/extract_snowflake.py,sha256=GwlrRxwEBjHqGs_3bs5vM9fzmv61_iwvBr1KcIgFgWM,2161
|
|
27
27
|
castor_extractor/commands/extract_sqlserver.py,sha256=lwhbcNChaXHZgMgSOch3faVr7WJw-sDU6GHl3lzBt_0,1141
|
|
28
|
-
castor_extractor/commands/
|
|
28
|
+
castor_extractor/commands/extract_strategy.py,sha256=Q-pUymatPrBFGXobhyUPzFph0-t774-XOpjdCFF1dYo,821
|
|
29
|
+
castor_extractor/commands/extract_tableau.py,sha256=ngujGYohWOqOK1qjIP1Hh951jr0KNKNSeOyoaOnO450,1558
|
|
29
30
|
castor_extractor/commands/extract_thoughtspot.py,sha256=caAYJlH-vK7u5IUB6OKXxcaWfLgc7d_XqnFDWK6YNS4,639
|
|
30
31
|
castor_extractor/commands/file_check.py,sha256=TJx76Ymd0QCECmq35zRJMkPE8DJtSInB28MuSXWk8Ao,2644
|
|
31
32
|
castor_extractor/commands/upload.py,sha256=rLXp7gQ8zb1kLbho4FT87q8eJd8Gvo_TkyIynAaQ-4s,1342
|
|
@@ -44,11 +45,12 @@ castor_extractor/knowledge/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
|
|
|
44
45
|
castor_extractor/knowledge/confluence/__init__.py,sha256=pRT615pMDlB7Ifs09erVn2EdpZHgkvX5selemWU3VPE,129
|
|
45
46
|
castor_extractor/knowledge/confluence/assets.py,sha256=zv2G2LB8H0fKDbVJ4kHrAjbqehXI_K-wgd_ghSXGFvs,144
|
|
46
47
|
castor_extractor/knowledge/confluence/client/__init__.py,sha256=ALAzo0JEhxFzH2FnIO6HmtkAGS2_bGY8KXXMcTGV3aE,84
|
|
47
|
-
castor_extractor/knowledge/confluence/client/client.py,sha256=
|
|
48
|
+
castor_extractor/knowledge/confluence/client/client.py,sha256=ihdagtAEgIcO5MmX5-coGEJkUg7_Tw1_7Vl50NDorhE,3731
|
|
49
|
+
castor_extractor/knowledge/confluence/client/client_test.py,sha256=LTT49ORl0DPTdDpKdREUErnwIA40xPy2C3uwdkVS1I0,3071
|
|
48
50
|
castor_extractor/knowledge/confluence/client/credentials.py,sha256=tqUMw-SVoAi4o6I6OeGk4MeDiIPU3-ihhaomXv4CQ64,419
|
|
49
|
-
castor_extractor/knowledge/confluence/client/endpoints.py,sha256=
|
|
51
|
+
castor_extractor/knowledge/confluence/client/endpoints.py,sha256=ClBzE8a5zqA4ngAecc8vMv9QJCdbtcv3GKuorZ8kOdA,1100
|
|
50
52
|
castor_extractor/knowledge/confluence/client/pagination.py,sha256=ty4meiMEujDVSiQyOJTibd-ReYyDyGezdFuk7EAGtMA,862
|
|
51
|
-
castor_extractor/knowledge/confluence/extract.py,sha256=
|
|
53
|
+
castor_extractor/knowledge/confluence/extract.py,sha256=mOAs5uvjM0LZJzrD36uFAt_nsiqQ48kKTDFIKru3LSo,1858
|
|
52
54
|
castor_extractor/knowledge/notion/__init__.py,sha256=ZDmh0eNSxHf1zVPm0aYlKPci-vzOXhAgdsWjS2hdjh4,117
|
|
53
55
|
castor_extractor/knowledge/notion/assets.py,sha256=QHv1-pomt5UeN_prP2L6t_zJ-tDSqB8LgopkGAODYPQ,164
|
|
54
56
|
castor_extractor/knowledge/notion/client/__init__.py,sha256=CDPorBCethuNTEtpjvHGcWnWeVfqkEq-IbakWjDKATw,76
|
|
@@ -72,7 +74,7 @@ castor_extractor/transformation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
|
|
|
72
74
|
castor_extractor/transformation/coalesce/__init__.py,sha256=CW_qdtEfwgJRsCyBlk5hNlxwEO-VV6mBXZvkRbND_J8,112
|
|
73
75
|
castor_extractor/transformation/coalesce/assets.py,sha256=pzccYPP66c9PAnVroemx7-6MeRHw7Ft1OlTC6jIamAA,363
|
|
74
76
|
castor_extractor/transformation/coalesce/client/__init__.py,sha256=VRmVpH29rOghtDQnCN7dAdA0dI0Lxseu4BC8rnwM9dU,80
|
|
75
|
-
castor_extractor/transformation/coalesce/client/client.py,sha256
|
|
77
|
+
castor_extractor/transformation/coalesce/client/client.py,sha256=-fFxWtDPPruNmDPc6FXft_6RwRKEee4JM-13d90fms0,6442
|
|
76
78
|
castor_extractor/transformation/coalesce/client/credentials.py,sha256=jbJxjbdPspf-dzYKfeb7oqL_8TXd1nvkJrjAcdAnLPc,548
|
|
77
79
|
castor_extractor/transformation/coalesce/client/endpoint.py,sha256=0uLh7dpA1vsR9qr_50SEYV_-heQE4BwED9oNMgYsL-w,1272
|
|
78
80
|
castor_extractor/transformation/coalesce/client/type.py,sha256=oiiVP9NL0ijTXyQmaB8aJVYckc7m-m8ZgMyNIAduUKE,43
|
|
@@ -272,10 +274,16 @@ castor_extractor/visualization/sigma/client/credentials.py,sha256=XddAuQSmCKpxJ7
|
|
|
272
274
|
castor_extractor/visualization/sigma/client/endpoints.py,sha256=DBFphbgoH78_MZUGM_bKBAq28Nl7LWSZ6VRsbxrxtDg,1162
|
|
273
275
|
castor_extractor/visualization/sigma/client/pagination.py,sha256=kNEhNq08tTGbypyMjxs0w4uvDtQc_iaWpOZweaa_FsU,690
|
|
274
276
|
castor_extractor/visualization/sigma/extract.py,sha256=XIT1qsj6g6dgBWP8HPfj_medZexu48EaY9tUwi14gzM,2298
|
|
277
|
+
castor_extractor/visualization/strategy/__init__.py,sha256=HOMv4JxqF5ZmViWi-pDE-PSXJRLTdXal_jtpHG_rlR8,123
|
|
278
|
+
castor_extractor/visualization/strategy/assets.py,sha256=tqB3GOtp-r7IOnYO8UxZgrldoSMImJnv5KeIwDFxg68,302
|
|
279
|
+
castor_extractor/visualization/strategy/client/__init__.py,sha256=XWP0yF5j6JefDJkDfX-RSJn3HF2ceQ0Yx1PLCfB3BBo,80
|
|
280
|
+
castor_extractor/visualization/strategy/client/client.py,sha256=F7taX0jSQpM8R3GOGeUQ7U_bJKkoHTwAc9oyc3ZDxbM,10261
|
|
281
|
+
castor_extractor/visualization/strategy/client/credentials.py,sha256=urFfNxWX1JG6wwFMYImufQzHa5g-sgjdlVGzi63owwg,1113
|
|
282
|
+
castor_extractor/visualization/strategy/extract.py,sha256=2fBuvS2xiOGXRpxXnZsE_C3en6t1-BlM5TbusjHyEkg,1166
|
|
275
283
|
castor_extractor/visualization/tableau/__init__.py,sha256=eFI_1hjdkxyUiAYiy3szwyuwn3yJ5C_KbpBU0ySJDcQ,138
|
|
276
284
|
castor_extractor/visualization/tableau/assets.py,sha256=HbCRd8VCj1WBEeqg9jwnygnT7xOFJ6PQD7Lq7sV-XR0,635
|
|
277
285
|
castor_extractor/visualization/tableau/client/__init__.py,sha256=P8RKFKOC63WkH5hdEytJOwHS9vzQ8GXreLfXZetmMP8,78
|
|
278
|
-
castor_extractor/visualization/tableau/client/client.py,sha256=
|
|
286
|
+
castor_extractor/visualization/tableau/client/client.py,sha256=iJ3Y-vwPvmPyAUTs1PqFJEZelPGiLvsiwXpTI3b5THc,7867
|
|
279
287
|
castor_extractor/visualization/tableau/client/client_metadata_api.py,sha256=ryRq4_qUok8vvWGhj5CNWXtwR2JlUsu1qjsov2KhQTE,6286
|
|
280
288
|
castor_extractor/visualization/tableau/client/client_metadata_api_test.py,sha256=rikyQKDLFYHLJhHJTF3LwWhKJ80svtTsYp5n7n9oTU8,2665
|
|
281
289
|
castor_extractor/visualization/tableau/client/client_rest_api.py,sha256=x4dNw4PPJdalTlGowwkANwqiS2ZhGxzpQytkHq3KbpY,3988
|
|
@@ -285,7 +293,7 @@ castor_extractor/visualization/tableau/client/errors.py,sha256=ecT8Tit5VtzrOBB9y
|
|
|
285
293
|
castor_extractor/visualization/tableau/client/gql_queries.py,sha256=XJAfhpMZ5S7-AhfpOaoHMHCAdil-l5e5xB-CH4NC38M,2177
|
|
286
294
|
castor_extractor/visualization/tableau/client/rest_fields.py,sha256=ZKYYuMxg9PXhczVXaD4rXNk7dYyWJ1_bVM8FLEXju7s,888
|
|
287
295
|
castor_extractor/visualization/tableau/constants.py,sha256=lHGB50FgVNO2nXeIhkvQKivD8ZFBIjDrflgD5cTXKJw,104
|
|
288
|
-
castor_extractor/visualization/tableau/extract.py,sha256=
|
|
296
|
+
castor_extractor/visualization/tableau/extract.py,sha256=hGVr1BZVsHlIgNXOFusRN2YwUUhXvF3reOeN8g1CTEo,1508
|
|
289
297
|
castor_extractor/visualization/thoughtspot/__init__.py,sha256=NhTGUk5Kdt54oCjHYoAt0cLBmVLys5lFYiRANL6wCmI,150
|
|
290
298
|
castor_extractor/visualization/thoughtspot/assets.py,sha256=SAQWPKaD2NTSDg7-GSkcRSSEkKSws0MJfOVcHkdeTSg,276
|
|
291
299
|
castor_extractor/visualization/thoughtspot/client/__init__.py,sha256=svrE2rMxR-OXctjPeAHMEPePlfcra-9KDevTMcHunAA,86
|
|
@@ -375,7 +383,7 @@ castor_extractor/warehouse/redshift/queries/schema.sql,sha256=Mf6nooi2w2PhGxM2_k
|
|
|
375
383
|
castor_extractor/warehouse/redshift/queries/table.sql,sha256=y8CGOwPHH_Mr8g1Zvuz2U5ldL8zuPm5v3M5RPZqIhsE,2645
|
|
376
384
|
castor_extractor/warehouse/redshift/queries/table_freshness.sql,sha256=l61_ysmTEtuMwK9RmYmD5cu0HmD1RXwTEhX0ytBeyxg,726
|
|
377
385
|
castor_extractor/warehouse/redshift/queries/user.sql,sha256=sEXveJAuNvZacvpI6WfwsX6VavoMb2VqYA32f6Dt-_Y,170
|
|
378
|
-
castor_extractor/warehouse/redshift/queries/view_ddl.sql,sha256=
|
|
386
|
+
castor_extractor/warehouse/redshift/queries/view_ddl.sql,sha256=VxrZ6VFHQy46AoIuLTwb2DZ8CWbUM9JLzyfp5jc3m6E,1232
|
|
379
387
|
castor_extractor/warehouse/redshift/query.py,sha256=hQaBHj0OvoEQ_HehU-vPyd5JH7YgndbzVi9-pyA5k_U,1054
|
|
380
388
|
castor_extractor/warehouse/salesforce/__init__.py,sha256=NR4aNea5jeE1xYqeZ_29deeN84CkN0_D_Z7CLQdJvFY,137
|
|
381
389
|
castor_extractor/warehouse/salesforce/client.py,sha256=067ZyccmIYoY6VwLTSneefOJqUpobtnoEzxJMY2oSPs,3268
|
|
@@ -416,8 +424,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx
|
|
|
416
424
|
castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
|
|
417
425
|
castor_extractor/warehouse/sqlserver/query.py,sha256=g0hPT-RmeGi2DyenAi3o72cTlQsLToXIFYojqc8E5fQ,533
|
|
418
426
|
castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
|
|
419
|
-
castor_extractor-0.24.
|
|
420
|
-
castor_extractor-0.24.
|
|
421
|
-
castor_extractor-0.24.
|
|
422
|
-
castor_extractor-0.24.
|
|
423
|
-
castor_extractor-0.24.
|
|
427
|
+
castor_extractor-0.24.15.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
428
|
+
castor_extractor-0.24.15.dist-info/METADATA,sha256=ktAgO-d5jJmInoD_VCLwIT522Qy31paP3Smh_TGa6MI,24851
|
|
429
|
+
castor_extractor-0.24.15.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
430
|
+
castor_extractor-0.24.15.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
|
|
431
|
+
castor_extractor-0.24.15.dist-info/RECORD,,
|
|
@@ -19,6 +19,7 @@ castor-extract-salesforce-viz=castor_extractor.commands.extract_salesforce_repor
|
|
|
19
19
|
castor-extract-sigma=castor_extractor.commands.extract_sigma:main
|
|
20
20
|
castor-extract-snowflake=castor_extractor.commands.extract_snowflake:main
|
|
21
21
|
castor-extract-sqlserver=castor_extractor.commands.extract_sqlserver:main
|
|
22
|
+
castor-extract-strategy=castor_extractor.commands.extract_strategy:main
|
|
22
23
|
castor-extract-tableau=castor_extractor.commands.extract_tableau:main
|
|
23
24
|
castor-extract-thoughtspot=castor_extractor.commands.extract_thoughtspot:main
|
|
24
25
|
castor-file-check=castor_extractor.commands.file_check:main
|
|
File without changes
|
|
File without changes
|