castor-extractor 0.24.13__py3-none-any.whl → 0.24.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +28 -0
- castor_extractor/commands/extract_confluence.py +29 -0
- castor_extractor/commands/extract_powerbi.py +2 -0
- castor_extractor/commands/extract_tableau.py +7 -0
- castor_extractor/knowledge/confluence/assets.py +2 -0
- castor_extractor/knowledge/confluence/client/client.py +114 -4
- castor_extractor/knowledge/confluence/client/client_test.py +121 -0
- castor_extractor/knowledge/confluence/client/endpoints.py +31 -4
- castor_extractor/knowledge/confluence/extract.py +18 -1
- castor_extractor/knowledge/confluence/utils.py +12 -0
- castor_extractor/knowledge/confluence/utils_test.py +30 -0
- castor_extractor/transformation/coalesce/client/client.py +17 -1
- castor_extractor/{utils → transformation}/dbt/client_test.py +1 -3
- castor_extractor/utils/__init__.py +1 -1
- castor_extractor/utils/validation.py +2 -2
- castor_extractor/visualization/powerbi/__init__.py +7 -1
- castor_extractor/visualization/powerbi/client/__init__.py +6 -1
- castor_extractor/visualization/powerbi/client/authentication.py +5 -1
- castor_extractor/visualization/powerbi/client/client.py +13 -9
- castor_extractor/visualization/powerbi/client/client_test.py +12 -7
- castor_extractor/visualization/powerbi/client/credentials.py +15 -0
- castor_extractor/visualization/powerbi/client/endpoints.py +27 -37
- castor_extractor/visualization/strategy/client/client.py +1 -3
- castor_extractor/visualization/tableau/client/client.py +6 -0
- castor_extractor/visualization/tableau/extract.py +2 -0
- {castor_extractor-0.24.13.dist-info → castor_extractor-0.24.20.dist-info}/METADATA +29 -1
- {castor_extractor-0.24.13.dist-info → castor_extractor-0.24.20.dist-info}/RECORD +34 -31
- /castor_extractor/{utils → transformation}/dbt/__init__.py +0 -0
- /castor_extractor/{utils → transformation}/dbt/assets.py +0 -0
- /castor_extractor/{utils → transformation}/dbt/client.py +0 -0
- /castor_extractor/{utils → transformation}/dbt/credentials.py +0 -0
- {castor_extractor-0.24.13.dist-info → castor_extractor-0.24.20.dist-info}/LICENCE +0 -0
- {castor_extractor-0.24.13.dist-info → castor_extractor-0.24.20.dist-info}/WHEEL +0 -0
- {castor_extractor-0.24.13.dist-info → castor_extractor-0.24.20.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
|
@@ -1,5 +1,33 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.24.20 - 2025-05-19
|
|
4
|
+
|
|
5
|
+
* Powerbi: allow custom api base and login url
|
|
6
|
+
|
|
7
|
+
## 0.24.19 - 2025-05-14
|
|
8
|
+
|
|
9
|
+
* Confluence: extract databases
|
|
10
|
+
|
|
11
|
+
## 0.24.18 - 2025-05-13
|
|
12
|
+
|
|
13
|
+
* Improve folder organisation for transformation tools
|
|
14
|
+
|
|
15
|
+
## 0.24.17 - 2025-05-13
|
|
16
|
+
|
|
17
|
+
* Strategy: fix dashboard URL format
|
|
18
|
+
|
|
19
|
+
## 0.24.16 - 2025-05-12
|
|
20
|
+
|
|
21
|
+
* Confluence: extract folders to complete the page hierarchy
|
|
22
|
+
|
|
23
|
+
## 0.24.15 - 2025-05-12
|
|
24
|
+
|
|
25
|
+
* Tableau: Add argument to skip columns extraction
|
|
26
|
+
|
|
27
|
+
## 0.24.14 - 2025-05-06
|
|
28
|
+
|
|
29
|
+
* Confluence: extract pages per space to allow additional filtering. by default, pages from archived or personal spaces are not extracted.
|
|
30
|
+
|
|
3
31
|
## 0.24.13 - 2025-05-05
|
|
4
32
|
|
|
5
33
|
* Rollback cloud-storage version as it's not compatible with Keboola
|
|
@@ -16,4 +16,33 @@ def main():
|
|
|
16
16
|
parser.add_argument("-t", "--token", help="Confluence API token")
|
|
17
17
|
parser.add_argument("-u", "--username", help="Confluence username")
|
|
18
18
|
|
|
19
|
+
parser.add_argument(
|
|
20
|
+
"--include-archived-spaces",
|
|
21
|
+
action="store_true",
|
|
22
|
+
default=False,
|
|
23
|
+
help="Include pages from archived spaces (Optional)",
|
|
24
|
+
)
|
|
25
|
+
parser.add_argument(
|
|
26
|
+
"--include-personal-spaces",
|
|
27
|
+
action="store_true",
|
|
28
|
+
default=False,
|
|
29
|
+
help="Include pages from personal spaces (Optional)",
|
|
30
|
+
)
|
|
31
|
+
parser.add_argument(
|
|
32
|
+
"--space-ids-allowed",
|
|
33
|
+
type=str,
|
|
34
|
+
nargs="+",
|
|
35
|
+
help=(
|
|
36
|
+
"List of Confluence space IDs allowed for extraction (Optional). "
|
|
37
|
+
"Only pages from these Spaces will be extracted. "
|
|
38
|
+
"This overrides any other filtering (archived, personal, etc.)"
|
|
39
|
+
),
|
|
40
|
+
)
|
|
41
|
+
parser.add_argument(
|
|
42
|
+
"--space-ids-blocked",
|
|
43
|
+
type=str,
|
|
44
|
+
nargs="+",
|
|
45
|
+
help="List of Confluence space IDs to exclude fom the extraction (Optional)",
|
|
46
|
+
)
|
|
47
|
+
|
|
19
48
|
confluence.extract_all(**parse_filled_arguments(parser))
|
|
@@ -20,5 +20,7 @@ def main():
|
|
|
20
20
|
nargs="*",
|
|
21
21
|
)
|
|
22
22
|
parser.add_argument("-o", "--output", help="Directory to write to")
|
|
23
|
+
parser.add_argument("-l", "--login_url", help="Login url (Optional)")
|
|
24
|
+
parser.add_argument("-a", "--api_base", help="REST API base (Optional)")
|
|
23
25
|
|
|
24
26
|
powerbi.extract_all(**parse_filled_arguments(parser))
|
|
@@ -21,6 +21,13 @@ def main():
|
|
|
21
21
|
parser.add_argument("-b", "--server-url", help="Tableau server url")
|
|
22
22
|
parser.add_argument("-i", "--site-id", help="Tableau site ID")
|
|
23
23
|
|
|
24
|
+
parser.add_argument(
|
|
25
|
+
"--skip-columns",
|
|
26
|
+
dest="skip_columns",
|
|
27
|
+
action="store_true",
|
|
28
|
+
help="Option to avoid extracting Tableau columns, default to False",
|
|
29
|
+
)
|
|
30
|
+
|
|
24
31
|
parser.add_argument(
|
|
25
32
|
"--with-pulse",
|
|
26
33
|
dest="with_pulse",
|
|
@@ -1,5 +1,7 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from collections.abc import Iterator
|
|
2
3
|
from functools import partial
|
|
4
|
+
from typing import Iterable, Optional
|
|
3
5
|
|
|
4
6
|
from ....utils import (
|
|
5
7
|
APIClient,
|
|
@@ -13,16 +15,24 @@ from .credentials import ConfluenceCredentials
|
|
|
13
15
|
from .endpoints import ConfluenceEndpointFactory
|
|
14
16
|
from .pagination import ConfluencePagination
|
|
15
17
|
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
16
20
|
_HEADERS = {
|
|
17
21
|
"Accept": "application/json",
|
|
18
22
|
"Content-Type": "application/json",
|
|
19
23
|
}
|
|
24
|
+
_STATUS_ARCHIVED = "archived"
|
|
25
|
+
_TYPE_PERSONAL = "personal"
|
|
20
26
|
|
|
21
27
|
|
|
22
28
|
class ConfluenceClient(APIClient):
|
|
23
29
|
def __init__(
|
|
24
30
|
self,
|
|
25
31
|
credentials: ConfluenceCredentials,
|
|
32
|
+
include_archived_spaces: bool = False,
|
|
33
|
+
include_personal_spaces: bool = False,
|
|
34
|
+
space_ids_allowed: Optional[set[str]] = None,
|
|
35
|
+
space_ids_blocked: Optional[set[str]] = None,
|
|
26
36
|
):
|
|
27
37
|
self.account_id = credentials.account_id
|
|
28
38
|
auth = BasicAuth(
|
|
@@ -34,12 +44,98 @@ class ConfluenceClient(APIClient):
|
|
|
34
44
|
headers=_HEADERS,
|
|
35
45
|
)
|
|
36
46
|
|
|
47
|
+
self.include_archived_spaces = include_archived_spaces
|
|
48
|
+
self.include_personal_spaces = include_personal_spaces
|
|
49
|
+
self.space_ids_allowed = space_ids_allowed or set()
|
|
50
|
+
self.space_ids_blocked = space_ids_blocked or set()
|
|
51
|
+
|
|
52
|
+
def databases(self, database_ids: Iterable[str]) -> Iterator[dict]:
|
|
53
|
+
"""
|
|
54
|
+
Extracts all given databases
|
|
55
|
+
"""
|
|
56
|
+
for _id in database_ids:
|
|
57
|
+
database = self._get(
|
|
58
|
+
endpoint=ConfluenceEndpointFactory.database(_id),
|
|
59
|
+
)
|
|
60
|
+
yield database
|
|
61
|
+
|
|
62
|
+
def folders(self, folder_ids: Iterable[str]) -> Iterator[dict]:
|
|
63
|
+
"""
|
|
64
|
+
Extracts all given folders and their parent folders.
|
|
65
|
+
"""
|
|
66
|
+
candidate_ids = set(folder_ids)
|
|
67
|
+
seen = set()
|
|
68
|
+
|
|
69
|
+
while candidate_ids:
|
|
70
|
+
folder_id = candidate_ids.pop()
|
|
71
|
+
if folder_id in seen:
|
|
72
|
+
continue
|
|
73
|
+
|
|
74
|
+
seen.add(folder_id)
|
|
75
|
+
folder = self._get(
|
|
76
|
+
endpoint=ConfluenceEndpointFactory.folder(folder_id),
|
|
77
|
+
)
|
|
78
|
+
yield folder
|
|
79
|
+
|
|
80
|
+
parent_type = folder.get("parentType")
|
|
81
|
+
if parent_type == "folder":
|
|
82
|
+
folder_id = folder["parentId"]
|
|
83
|
+
if folder_id not in seen:
|
|
84
|
+
candidate_ids.add(folder["parentId"])
|
|
85
|
+
|
|
86
|
+
if not parent_type:
|
|
87
|
+
logger.info(f"folder with unknown parent: {folder_id}")
|
|
88
|
+
|
|
37
89
|
def pages(self):
|
|
90
|
+
"""Extracts all pages from all relevant Spaces."""
|
|
91
|
+
for space in self.spaces():
|
|
92
|
+
space_id = space["id"]
|
|
93
|
+
request = partial(
|
|
94
|
+
self._get,
|
|
95
|
+
endpoint=ConfluenceEndpointFactory.pages(space_id),
|
|
96
|
+
)
|
|
97
|
+
yield from fetch_all_pages(request, ConfluencePagination)
|
|
98
|
+
|
|
99
|
+
def spaces(self) -> Iterator[dict]:
|
|
100
|
+
"""
|
|
101
|
+
Returns the spaces meeting the conditions defined by the settings.
|
|
102
|
+
|
|
103
|
+
If `space_ids_allowed` is not empty, only matching spaces are returned.
|
|
104
|
+
|
|
105
|
+
Otherwise, all spaces are filtered by excluding the following:
|
|
106
|
+
* The space is in the blocked list
|
|
107
|
+
* The space is personal (type=personal) and skip_personal_spaces is True
|
|
108
|
+
* The space is archived (status=archived) and skip_archived_spaces is True
|
|
109
|
+
"""
|
|
38
110
|
request = partial(
|
|
39
111
|
self._get,
|
|
40
|
-
endpoint=ConfluenceEndpointFactory.
|
|
112
|
+
endpoint=ConfluenceEndpointFactory.spaces(),
|
|
41
113
|
)
|
|
42
|
-
|
|
114
|
+
spaces = list(fetch_all_pages(request, ConfluencePagination))
|
|
115
|
+
|
|
116
|
+
if self.space_ids_allowed:
|
|
117
|
+
yield from (
|
|
118
|
+
space
|
|
119
|
+
for space in spaces
|
|
120
|
+
if space["id"] in self.space_ids_allowed
|
|
121
|
+
)
|
|
122
|
+
return
|
|
123
|
+
|
|
124
|
+
for space in spaces:
|
|
125
|
+
space_id = space["id"]
|
|
126
|
+
type_ = space["type"]
|
|
127
|
+
status = space["status"]
|
|
128
|
+
|
|
129
|
+
if space_id in self.space_ids_blocked:
|
|
130
|
+
continue
|
|
131
|
+
|
|
132
|
+
if status == _STATUS_ARCHIVED and not self.include_archived_spaces:
|
|
133
|
+
continue
|
|
134
|
+
|
|
135
|
+
if type_ == _TYPE_PERSONAL and not self.include_personal_spaces:
|
|
136
|
+
continue
|
|
137
|
+
|
|
138
|
+
yield space
|
|
43
139
|
|
|
44
140
|
def users(self):
|
|
45
141
|
request_body = {"accountIds": [self.account_id]}
|
|
@@ -50,9 +146,23 @@ class ConfluenceClient(APIClient):
|
|
|
50
146
|
)
|
|
51
147
|
yield from fetch_all_pages(request, ConfluencePagination)
|
|
52
148
|
|
|
53
|
-
def fetch(
|
|
149
|
+
def fetch(
|
|
150
|
+
self,
|
|
151
|
+
asset: ConfluenceAsset,
|
|
152
|
+
*,
|
|
153
|
+
folder_ids: Optional[Iterator[str]] = None,
|
|
154
|
+
database_ids: Optional[Iterator[str]] = None,
|
|
155
|
+
) -> Iterator[dict]:
|
|
54
156
|
"""Returns the needed metadata for the queried asset"""
|
|
55
|
-
if asset == ConfluenceAsset.
|
|
157
|
+
if asset == ConfluenceAsset.FOLDERS:
|
|
158
|
+
assert folder_ids is not None
|
|
159
|
+
yield from self.folders(folder_ids)
|
|
160
|
+
|
|
161
|
+
elif asset == ConfluenceAsset.DATABASES:
|
|
162
|
+
assert database_ids is not None
|
|
163
|
+
yield from self.databases(database_ids)
|
|
164
|
+
|
|
165
|
+
elif asset == ConfluenceAsset.PAGES:
|
|
56
166
|
yield from self.pages()
|
|
57
167
|
|
|
58
168
|
elif asset == ConfluenceAsset.USERS:
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
from unittest.mock import MagicMock, patch
|
|
2
|
+
|
|
3
|
+
from .client import ConfluenceClient
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_ConfluenceClient_folders():
|
|
7
|
+
"""
|
|
8
|
+
Folder 1 -> Page A -> Folder 2 -> Folder 3 -> Folder 4 -> Page B
|
|
9
|
+
-> Page C -> Folder 5 -> Page D
|
|
10
|
+
|
|
11
|
+
After extracting the pages, we should have all IDs of folders that are
|
|
12
|
+
immediate parents of pages. We still need to look out for nested folders.
|
|
13
|
+
"""
|
|
14
|
+
folder_ids = {"1", "3", "4", "5"}
|
|
15
|
+
mock_responses = {
|
|
16
|
+
"1": {"id": "1", "parentType": None, "parentId": None},
|
|
17
|
+
"2": {"id": "2", "parentType": "page", "parentId": "A"},
|
|
18
|
+
"3": {"id": "3", "parentType": "folder", "parentId": "2"},
|
|
19
|
+
"4": {"id": "4", "parentType": "folder", "parentId": "3"},
|
|
20
|
+
"5": {"id": "5", "parentType": "page", "parentId": "C"},
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
def mock_get(endpoint):
|
|
24
|
+
folder_id = endpoint.split("/")[-1]
|
|
25
|
+
return mock_responses[folder_id]
|
|
26
|
+
|
|
27
|
+
client = ConfluenceClient(credentials=MagicMock())
|
|
28
|
+
|
|
29
|
+
with patch.object(client, "_get", side_effect=mock_get):
|
|
30
|
+
result = list(client.folders(folder_ids))
|
|
31
|
+
|
|
32
|
+
assert len(result) == 5
|
|
33
|
+
assert {folder["id"] for folder in result} == set(mock_responses.keys())
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def test_ConfluenceClient_filtered_spaces_with_allowlist():
|
|
37
|
+
both_blocked_and_allowed_space_id = "789"
|
|
38
|
+
archived_space_id = "934"
|
|
39
|
+
random_space = "1000"
|
|
40
|
+
|
|
41
|
+
spaces = [
|
|
42
|
+
# Both blocked and allowed space. "Allowed" setting takes precedence.
|
|
43
|
+
{
|
|
44
|
+
"id": both_blocked_and_allowed_space_id,
|
|
45
|
+
"type": "global",
|
|
46
|
+
"status": "current",
|
|
47
|
+
},
|
|
48
|
+
{"id": archived_space_id, "type": "global", "status": "archived"},
|
|
49
|
+
{"id": random_space, "type": "global", "status": "current"},
|
|
50
|
+
]
|
|
51
|
+
# the "allowed" list overrides everything else
|
|
52
|
+
client = ConfluenceClient(
|
|
53
|
+
credentials=MagicMock(),
|
|
54
|
+
include_archived_spaces=True,
|
|
55
|
+
space_ids_allowed={
|
|
56
|
+
both_blocked_and_allowed_space_id,
|
|
57
|
+
},
|
|
58
|
+
space_ids_blocked={
|
|
59
|
+
both_blocked_and_allowed_space_id,
|
|
60
|
+
},
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
with (
|
|
64
|
+
patch(
|
|
65
|
+
"source.packages.extractor.castor_extractor.knowledge.confluence.client.client.ConfluenceClient._get"
|
|
66
|
+
),
|
|
67
|
+
patch(
|
|
68
|
+
"source.packages.extractor.castor_extractor.knowledge.confluence.client.client.fetch_all_pages"
|
|
69
|
+
) as mock_fetch_all_pages,
|
|
70
|
+
):
|
|
71
|
+
mock_fetch_all_pages.return_value = spaces
|
|
72
|
+
|
|
73
|
+
filtered_spaces = list(client.spaces())
|
|
74
|
+
|
|
75
|
+
assert len(filtered_spaces) == 1
|
|
76
|
+
filtered_space_ids = {space["id"] for space in filtered_spaces}
|
|
77
|
+
assert set(filtered_space_ids) == {both_blocked_and_allowed_space_id}
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def test_ConfluenceClient_filtered_spaces():
|
|
81
|
+
blocked_id = "42"
|
|
82
|
+
personal_id = "666"
|
|
83
|
+
archived_id = "934"
|
|
84
|
+
random_id = "1000"
|
|
85
|
+
|
|
86
|
+
# test the other settings : allow personal spaces & block space "42"
|
|
87
|
+
client = ConfluenceClient(
|
|
88
|
+
credentials=MagicMock(),
|
|
89
|
+
include_archived_spaces=False,
|
|
90
|
+
include_personal_spaces=True,
|
|
91
|
+
space_ids_blocked={blocked_id},
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
spaces = [
|
|
95
|
+
# Blocked space, to be skipped
|
|
96
|
+
{"id": blocked_id, "type": "global", "status": "current"},
|
|
97
|
+
# Archived space, to be skipped
|
|
98
|
+
{"id": archived_id, "type": "collaboration", "status": "archived"},
|
|
99
|
+
# Personal space, to be included
|
|
100
|
+
{"id": personal_id, "type": "personal", "status": "current"},
|
|
101
|
+
# Valid space
|
|
102
|
+
{"id": random_id, "type": "knowledge_base", "status": "current"},
|
|
103
|
+
]
|
|
104
|
+
|
|
105
|
+
with (
|
|
106
|
+
patch(
|
|
107
|
+
"source.packages.extractor.castor_extractor.knowledge.confluence.client.client.ConfluenceClient._get"
|
|
108
|
+
),
|
|
109
|
+
patch(
|
|
110
|
+
"source.packages.extractor.castor_extractor.knowledge.confluence.client.client.fetch_all_pages"
|
|
111
|
+
) as mock_fetch_all_pages,
|
|
112
|
+
):
|
|
113
|
+
mock_fetch_all_pages.return_value = spaces
|
|
114
|
+
|
|
115
|
+
filtered_spaces = list(client.spaces())
|
|
116
|
+
|
|
117
|
+
filtered_space_ids = [space["id"] for space in filtered_spaces]
|
|
118
|
+
|
|
119
|
+
# no duplicates
|
|
120
|
+
assert len(filtered_space_ids) == len(set(filtered_space_ids))
|
|
121
|
+
assert set(filtered_space_ids) == {personal_id, random_id}
|
|
@@ -5,16 +5,43 @@ class ConfluenceEndpointFactory:
|
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
API = "wiki/api/v2/"
|
|
8
|
+
DATABASE = "databases"
|
|
9
|
+
FOLDERS = "folders"
|
|
8
10
|
PAGES = "pages"
|
|
11
|
+
SPACES = "spaces"
|
|
9
12
|
USERS = "users-bulk"
|
|
10
13
|
|
|
11
14
|
@classmethod
|
|
12
|
-
def
|
|
15
|
+
def database(cls, database_id: str) -> str:
|
|
13
16
|
"""
|
|
14
|
-
Endpoint to fetch
|
|
15
|
-
More: https://developer.atlassian.com/cloud/confluence/rest/v2/api-group-
|
|
17
|
+
Endpoint to fetch a database by id.
|
|
18
|
+
More: https://developer.atlassian.com/cloud/confluence/rest/v2/api-group-database/#api-databases-id-get
|
|
16
19
|
"""
|
|
17
|
-
return f"{cls.API}{cls.
|
|
20
|
+
return f"{cls.API}{cls.DATABASE}/{database_id}"
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def folder(cls, folder_id: str) -> str:
|
|
24
|
+
"""
|
|
25
|
+
Endpoint to fetch a folder by id.
|
|
26
|
+
More: https://developer.atlassian.com/cloud/confluence/rest/v2/api-group-folder/#api-folders-id-get
|
|
27
|
+
"""
|
|
28
|
+
return f"{cls.API}{cls.FOLDERS}/{folder_id}"
|
|
29
|
+
|
|
30
|
+
@classmethod
|
|
31
|
+
def pages(cls, space_id: str) -> str:
|
|
32
|
+
"""
|
|
33
|
+
Endpoint to fetch all pages in the given space.
|
|
34
|
+
More: https://developer.atlassian.com/cloud/confluence/rest/v2/api-group-page/#api-spaces-id-pages-get
|
|
35
|
+
"""
|
|
36
|
+
return f"{cls.API}{cls.SPACES}/{space_id}/{cls.PAGES}?body-format=atlas_doc_format"
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
def spaces(cls) -> str:
|
|
40
|
+
"""
|
|
41
|
+
Endpoint to fetch all spaces.
|
|
42
|
+
https://developer.atlassian.com/cloud/confluence/rest/v2/api-group-space/#api-spaces-get
|
|
43
|
+
"""
|
|
44
|
+
return f"{cls.API}{cls.SPACES}"
|
|
18
45
|
|
|
19
46
|
@classmethod
|
|
20
47
|
def users(cls) -> str:
|
|
@@ -13,6 +13,7 @@ from ...utils import (
|
|
|
13
13
|
)
|
|
14
14
|
from .assets import ConfluenceAsset
|
|
15
15
|
from .client import ConfluenceClient, ConfluenceCredentials
|
|
16
|
+
from .utils import pages_to_database_ids, pages_to_folder_ids
|
|
16
17
|
|
|
17
18
|
logger = logging.getLogger(__name__)
|
|
18
19
|
|
|
@@ -32,6 +33,16 @@ def iterate_all_data(
|
|
|
32
33
|
yield ConfluenceAsset.PAGES, pages
|
|
33
34
|
logger.info(f"Extracted {len(pages)} pages from API")
|
|
34
35
|
|
|
36
|
+
folder_ids = pages_to_folder_ids(pages)
|
|
37
|
+
logger.info("Extracting FOLDERS from API")
|
|
38
|
+
folders = list(deep_serialize(client.folders(folder_ids)))
|
|
39
|
+
yield ConfluenceAsset.FOLDERS, folders
|
|
40
|
+
|
|
41
|
+
database_ids = pages_to_database_ids(pages)
|
|
42
|
+
logger.info("Extracting DATABASES from API")
|
|
43
|
+
databases = list(deep_serialize(client.databases(database_ids)))
|
|
44
|
+
yield ConfluenceAsset.DATABASES, databases
|
|
45
|
+
|
|
35
46
|
|
|
36
47
|
def extract_all(**kwargs) -> None:
|
|
37
48
|
"""
|
|
@@ -42,7 +53,13 @@ def extract_all(**kwargs) -> None:
|
|
|
42
53
|
output_directory = kwargs.get("output") or from_env(OUTPUT_DIR)
|
|
43
54
|
|
|
44
55
|
credentials = ConfluenceCredentials(**kwargs)
|
|
45
|
-
client = ConfluenceClient(
|
|
56
|
+
client = ConfluenceClient(
|
|
57
|
+
credentials=credentials,
|
|
58
|
+
include_archived_spaces=kwargs.get("include_archived_spaces") or False,
|
|
59
|
+
include_personal_spaces=kwargs.get("include_personal_spaces") or False,
|
|
60
|
+
space_ids_allowed=kwargs.get("space_ids_allowed"),
|
|
61
|
+
space_ids_blocked=kwargs.get("space_ids_blocked"),
|
|
62
|
+
)
|
|
46
63
|
|
|
47
64
|
ts = current_timestamp()
|
|
48
65
|
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
def pages_to_folder_ids(pages: list[dict]) -> set:
|
|
2
|
+
"""Returns all unique folder parents."""
|
|
3
|
+
return {
|
|
4
|
+
page["parentId"] for page in pages if page["parentType"] == "folder"
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def pages_to_database_ids(pages: list[dict]) -> set:
|
|
9
|
+
"""Returns all unique database parents."""
|
|
10
|
+
return {
|
|
11
|
+
page["parentId"] for page in pages if page["parentType"] == "database"
|
|
12
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from .utils import pages_to_database_ids, pages_to_folder_ids
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_pages_to_folder_ids():
|
|
5
|
+
"""Test the pages_to_folder_ids function."""
|
|
6
|
+
pages = [
|
|
7
|
+
{"id": "9", "parentId": None, "parentType": None},
|
|
8
|
+
{"id": "8", "parentId": "2", "parentType": "folder"},
|
|
9
|
+
{"id": "7", "parentId": "9", "parentType": "page"},
|
|
10
|
+
{"id": "6", "parentId": "4", "parentType": "folder"},
|
|
11
|
+
{"id": "5", "parentId": "4", "parentType": "folder"},
|
|
12
|
+
]
|
|
13
|
+
expected = {"2", "4"}
|
|
14
|
+
result = pages_to_folder_ids(pages)
|
|
15
|
+
assert result == expected
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def test_pages_to_database_id():
|
|
19
|
+
"""Test the pages_to_database_id function."""
|
|
20
|
+
pages = [
|
|
21
|
+
{"id": "1", "parentId": "db1", "parentType": "database"},
|
|
22
|
+
{"id": "2", "parentId": "db2", "parentType": "database"},
|
|
23
|
+
{"id": "3", "parentId": "4", "parentType": "folder"},
|
|
24
|
+
{"id": "4", "parentId": None, "parentType": None},
|
|
25
|
+
{"id": "5", "parentId": "db1", "parentType": "database"},
|
|
26
|
+
{"id": "6", "parentId": "9", "parentType": "page"},
|
|
27
|
+
]
|
|
28
|
+
expected = {"db1", "db2"}
|
|
29
|
+
result = pages_to_database_ids(pages)
|
|
30
|
+
assert result == expected
|
|
@@ -1,7 +1,15 @@
|
|
|
1
1
|
from http import HTTPStatus
|
|
2
2
|
from typing import Iterator, Optional
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
import requests
|
|
5
|
+
|
|
6
|
+
from ....utils import (
|
|
7
|
+
APIClient,
|
|
8
|
+
BearerAuth,
|
|
9
|
+
RequestSafeMode,
|
|
10
|
+
SerializedAsset,
|
|
11
|
+
retry,
|
|
12
|
+
)
|
|
5
13
|
from ..assets import CoalesceAsset, CoalesceQualityAsset
|
|
6
14
|
from .credentials import CoalesceCredentials
|
|
7
15
|
from .endpoint import (
|
|
@@ -12,6 +20,9 @@ from .utils import column_names_per_node, is_test, test_names_per_node
|
|
|
12
20
|
|
|
13
21
|
_LIMIT_MAX = 1_000
|
|
14
22
|
_MAX_ERRORS = 50
|
|
23
|
+
_RETRY_BASE_MS = 10 * 60 * 1000 # 10 minutes
|
|
24
|
+
_RETRY_COUNT = 2
|
|
25
|
+
_RETRY_EXCEPTIONS = [requests.exceptions.ConnectTimeout]
|
|
15
26
|
|
|
16
27
|
|
|
17
28
|
def _run_result_payload(result: dict, query_result: dict) -> dict:
|
|
@@ -64,6 +75,11 @@ class CoalesceClient(APIClient):
|
|
|
64
75
|
result = self._get(endpoint=endpoint)
|
|
65
76
|
return result["data"]
|
|
66
77
|
|
|
78
|
+
@retry(
|
|
79
|
+
exceptions=_RETRY_EXCEPTIONS,
|
|
80
|
+
max_retries=_RETRY_COUNT,
|
|
81
|
+
base_ms=_RETRY_BASE_MS,
|
|
82
|
+
)
|
|
67
83
|
def _node_details(self, environment_id: int, node_id: str) -> dict:
|
|
68
84
|
endpoint = CoalesceEndpointFactory.nodes(
|
|
69
85
|
environment_id=environment_id, node_id=node_id
|
|
@@ -9,9 +9,7 @@ from dateutil.tz import tzutc
|
|
|
9
9
|
from .client import ContentType, DbtClient, DbtRun, _account_url # type: ignore
|
|
10
10
|
from .credentials import DbtCredentials
|
|
11
11
|
|
|
12
|
-
_DBT_CLIENT_PATH =
|
|
13
|
-
"source.packages.extractor.castor_extractor.utils.dbt.client.DbtClient"
|
|
14
|
-
)
|
|
12
|
+
_DBT_CLIENT_PATH = "source.packages.extractor.castor_extractor.transformation.dbt.client.DbtClient"
|
|
15
13
|
_OLD_DATE = datetime(2023, 7, 10, 12, 6, 23, 109171, tzinfo=tzutc())
|
|
16
14
|
_OLD_DATE_STR = "2023-07-10 12:06:23.109171+00:00"
|
|
17
15
|
_RECENT_DATE = datetime(2023, 10, 6, 5, 9, 31, 731991, tzinfo=tzutc())
|
|
@@ -52,7 +52,7 @@ from .time import (
|
|
|
52
52
|
yesterday,
|
|
53
53
|
)
|
|
54
54
|
from .type import Callback, Getter, JsonType, SerializedAsset
|
|
55
|
-
from .validation import validate_baseurl
|
|
55
|
+
from .validation import clean_path, validate_baseurl
|
|
56
56
|
from .write import (
|
|
57
57
|
get_output_filename,
|
|
58
58
|
get_summary_filename,
|
|
@@ -14,7 +14,7 @@ def _preprocess_url(base_url: str) -> str:
|
|
|
14
14
|
return base_url.strip()
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
def
|
|
17
|
+
def clean_path(path: str) -> str:
|
|
18
18
|
return path.rstrip("/")
|
|
19
19
|
|
|
20
20
|
|
|
@@ -44,7 +44,7 @@ def _urlsplit(base_url: str) -> tuple[str, str, str, str, str, str]:
|
|
|
44
44
|
url = urlsplit(base_url)
|
|
45
45
|
|
|
46
46
|
hostname, port = _get_hostname_port(url.netloc)
|
|
47
|
-
path =
|
|
47
|
+
path = clean_path(url.path)
|
|
48
48
|
|
|
49
49
|
return url.scheme, hostname, path, port, url.query, url.fragment
|
|
50
50
|
|
|
@@ -1,3 +1,9 @@
|
|
|
1
1
|
from .assets import PowerBiAsset
|
|
2
|
-
from .client import
|
|
2
|
+
from .client import (
|
|
3
|
+
CLIENT_APP_BASE,
|
|
4
|
+
DEFAULT_SCOPE,
|
|
5
|
+
REST_API_BASE_PATH,
|
|
6
|
+
PowerbiClient,
|
|
7
|
+
PowerbiCredentials,
|
|
8
|
+
)
|
|
3
9
|
from .extract import extract_all
|
|
@@ -9,7 +9,11 @@ from .endpoints import PowerBiEndpointFactory
|
|
|
9
9
|
class PowerBiBearerAuth(BearerAuth):
|
|
10
10
|
def __init__(self, credentials: PowerbiCredentials):
|
|
11
11
|
self.credentials = credentials
|
|
12
|
-
|
|
12
|
+
endpoint_factory = PowerBiEndpointFactory(
|
|
13
|
+
login_url=self.credentials.login_url,
|
|
14
|
+
api_base=self.credentials.api_base,
|
|
15
|
+
)
|
|
16
|
+
authority = endpoint_factory.authority(self.credentials.tenant_id)
|
|
13
17
|
self.app = msal.ConfidentialClientApplication(
|
|
14
18
|
client_id=self.credentials.client_id,
|
|
15
19
|
authority=authority,
|
|
@@ -40,6 +40,10 @@ class PowerbiClient(APIClient):
|
|
|
40
40
|
auth=auth,
|
|
41
41
|
timeout=POWERBI_DEFAULT_TIMEOUT_S,
|
|
42
42
|
)
|
|
43
|
+
self.endpoint_factory = PowerBiEndpointFactory(
|
|
44
|
+
login_url=credentials.login_url,
|
|
45
|
+
api_base=credentials.api_base,
|
|
46
|
+
)
|
|
43
47
|
|
|
44
48
|
def _activity_events(self, day: Optional[date] = None) -> Iterator[dict]:
|
|
45
49
|
"""
|
|
@@ -49,7 +53,7 @@ class PowerbiClient(APIClient):
|
|
|
49
53
|
"""
|
|
50
54
|
request = partial(
|
|
51
55
|
self._get,
|
|
52
|
-
endpoint=
|
|
56
|
+
endpoint=self.endpoint_factory.activity_events(day),
|
|
53
57
|
)
|
|
54
58
|
yield from fetch_all_pages(request, PowerBiPagination)
|
|
55
59
|
|
|
@@ -58,28 +62,28 @@ class PowerbiClient(APIClient):
|
|
|
58
62
|
Returns a list of datasets for the organization.
|
|
59
63
|
https://learn.microsoft.com/en-us/rest/api/power-bi/admin/datasets-get-datasets-as-admin
|
|
60
64
|
"""
|
|
61
|
-
yield from self._get(
|
|
65
|
+
yield from self._get(self.endpoint_factory.datasets())[Keys.VALUE]
|
|
62
66
|
|
|
63
67
|
def _dashboards(self) -> Iterator[dict]:
|
|
64
68
|
"""
|
|
65
69
|
Returns a list of dashboards for the organization.
|
|
66
70
|
https://learn.microsoft.com/en-us/rest/api/power-bi/admin/dashboards-get-dashboards-as-admin
|
|
67
71
|
"""
|
|
68
|
-
yield from self._get(
|
|
72
|
+
yield from self._get(self.endpoint_factory.dashboards())[Keys.VALUE]
|
|
69
73
|
|
|
70
74
|
def _reports(self) -> Iterator[dict]:
|
|
71
75
|
"""
|
|
72
76
|
Returns a list of reports for the organization.
|
|
73
77
|
https://learn.microsoft.com/en-us/rest/api/power-bi/admin/reports-get-reports-as-admin
|
|
74
78
|
"""
|
|
75
|
-
reports_endpoint =
|
|
79
|
+
reports_endpoint = self.endpoint_factory.reports()
|
|
76
80
|
reports = self._get(reports_endpoint)[Keys.VALUE]
|
|
77
81
|
|
|
78
82
|
for report in reports:
|
|
79
83
|
report_id = report.get(Keys.ID)
|
|
80
84
|
|
|
81
85
|
try:
|
|
82
|
-
pages_endpoint =
|
|
86
|
+
pages_endpoint = self.endpoint_factory.pages(report_id)
|
|
83
87
|
pages = self._get(pages_endpoint)[Keys.VALUE]
|
|
84
88
|
report["pages"] = pages
|
|
85
89
|
except (requests.HTTPError, requests.exceptions.Timeout) as e:
|
|
@@ -99,14 +103,14 @@ class PowerbiClient(APIClient):
|
|
|
99
103
|
}
|
|
100
104
|
|
|
101
105
|
response = self._get(
|
|
102
|
-
|
|
106
|
+
self.endpoint_factory.workspace_ids(),
|
|
103
107
|
params=params,
|
|
104
108
|
)
|
|
105
109
|
|
|
106
110
|
return [x[Keys.ID] for x in response]
|
|
107
111
|
|
|
108
112
|
def _get_scan_result(self, scan_id: int) -> Iterator[dict]:
|
|
109
|
-
endpoint =
|
|
113
|
+
endpoint = self.endpoint_factory.metadata_scan_result(scan_id)
|
|
110
114
|
yield from self._get(endpoint)[Keys.WORKSPACES]
|
|
111
115
|
|
|
112
116
|
def _wait_for_scan_result(self, scan_id: int) -> bool:
|
|
@@ -114,7 +118,7 @@ class PowerbiClient(APIClient):
|
|
|
114
118
|
Periodically checks the status of the metadata scan until the results
|
|
115
119
|
are ready.
|
|
116
120
|
"""
|
|
117
|
-
endpoint =
|
|
121
|
+
endpoint = self.endpoint_factory.metadata_scan_status(scan_id)
|
|
118
122
|
total_waiting_time_s = 0
|
|
119
123
|
|
|
120
124
|
while total_waiting_time_s < POWERBI_DEFAULT_TIMEOUT_S:
|
|
@@ -152,7 +156,7 @@ class PowerbiClient(APIClient):
|
|
|
152
156
|
}
|
|
153
157
|
request_body = {"workspaces": workspaces_ids}
|
|
154
158
|
scan_id = self._post(
|
|
155
|
-
|
|
159
|
+
self.endpoint_factory.metadata_create_scan(),
|
|
156
160
|
params=params,
|
|
157
161
|
data=request_body,
|
|
158
162
|
)
|
|
@@ -6,13 +6,18 @@ import pytest
|
|
|
6
6
|
from .authentication import msal
|
|
7
7
|
from .client import PowerbiClient
|
|
8
8
|
from .constants import Keys
|
|
9
|
-
from .credentials import PowerbiCredentials
|
|
9
|
+
from .credentials import CLIENT_APP_BASE, REST_API_BASE_PATH, PowerbiCredentials
|
|
10
10
|
from .endpoints import PowerBiEndpointFactory
|
|
11
11
|
|
|
12
12
|
FAKE_TENANT_ID = "IamFake"
|
|
13
13
|
FAKE_CLIENT_ID = "MeTwo"
|
|
14
14
|
FAKE_SECRET = "MeThree"
|
|
15
15
|
|
|
16
|
+
ENDPOINT_FACTORY = PowerBiEndpointFactory(
|
|
17
|
+
login_url=CLIENT_APP_BASE,
|
|
18
|
+
api_base=REST_API_BASE_PATH + "/", # superfluous "/" to test resiliency
|
|
19
|
+
)
|
|
20
|
+
|
|
16
21
|
|
|
17
22
|
@pytest.fixture
|
|
18
23
|
def mock_msal():
|
|
@@ -55,7 +60,7 @@ def test__datasets(power_bi_client):
|
|
|
55
60
|
with patch.object(power_bi_client, "_get") as mocked_get:
|
|
56
61
|
mocked_get.return_value = {"value": [{"id": 1, "type": "dataset"}]}
|
|
57
62
|
datasets = list(power_bi_client._datasets())
|
|
58
|
-
mocked_get.assert_called_with(
|
|
63
|
+
mocked_get.assert_called_with(ENDPOINT_FACTORY.datasets())
|
|
59
64
|
assert datasets == [{"id": 1, "type": "dataset"}]
|
|
60
65
|
|
|
61
66
|
|
|
@@ -63,7 +68,7 @@ def test__dashboards(power_bi_client):
|
|
|
63
68
|
with patch.object(power_bi_client, "_get") as mocked_get:
|
|
64
69
|
mocked_get.return_value = {"value": [{"id": 1, "type": "dashboard"}]}
|
|
65
70
|
dashboards = list(power_bi_client._dashboards())
|
|
66
|
-
mocked_get.assert_called_with(
|
|
71
|
+
mocked_get.assert_called_with(ENDPOINT_FACTORY.dashboards())
|
|
67
72
|
assert dashboards == [{"id": 1, "type": "dashboard"}]
|
|
68
73
|
|
|
69
74
|
|
|
@@ -79,8 +84,8 @@ def test__reports(power_bi_client):
|
|
|
79
84
|
]
|
|
80
85
|
reports = list(power_bi_client._reports())
|
|
81
86
|
calls = [
|
|
82
|
-
call(
|
|
83
|
-
call(
|
|
87
|
+
call(ENDPOINT_FACTORY.reports()),
|
|
88
|
+
call(ENDPOINT_FACTORY.pages("1")),
|
|
84
89
|
]
|
|
85
90
|
mocked_get.assert_has_calls(calls)
|
|
86
91
|
assert reports == [
|
|
@@ -107,7 +112,7 @@ def test__workspace_ids(power_bi_client):
|
|
|
107
112
|
}
|
|
108
113
|
|
|
109
114
|
mocked_get.assert_called_with(
|
|
110
|
-
|
|
115
|
+
ENDPOINT_FACTORY.workspace_ids(),
|
|
111
116
|
params=params,
|
|
112
117
|
)
|
|
113
118
|
|
|
@@ -160,7 +165,7 @@ def test__activity_events(power_bi_client):
|
|
|
160
165
|
assert result == ["foo", "bar", "baz", "biz"]
|
|
161
166
|
|
|
162
167
|
expected_calls = [
|
|
163
|
-
call(endpoint=
|
|
168
|
+
call(endpoint=ENDPOINT_FACTORY.activity_events(day=day)),
|
|
164
169
|
call(endpoint="https://next-call-1"),
|
|
165
170
|
call(endpoint="https://next-call-2"),
|
|
166
171
|
]
|
|
@@ -6,6 +6,9 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
|
6
6
|
DEFAULT_SCOPE = "https://analysis.windows.net/powerbi/api/.default"
|
|
7
7
|
POWERBI_ENV_PREFIX = "CASTOR_POWERBI_"
|
|
8
8
|
|
|
9
|
+
CLIENT_APP_BASE = "https://login.microsoftonline.com"
|
|
10
|
+
REST_API_BASE_PATH = "https://api.powerbi.com/v1.0/myorg"
|
|
11
|
+
|
|
9
12
|
|
|
10
13
|
class PowerbiCredentials(BaseSettings):
|
|
11
14
|
"""Class to handle PowerBI rest API permissions"""
|
|
@@ -19,9 +22,21 @@ class PowerbiCredentials(BaseSettings):
|
|
|
19
22
|
client_id: str
|
|
20
23
|
tenant_id: str
|
|
21
24
|
secret: str = Field(repr=False)
|
|
25
|
+
api_base: str = REST_API_BASE_PATH
|
|
26
|
+
login_url: str = CLIENT_APP_BASE
|
|
22
27
|
scopes: list[str] = [DEFAULT_SCOPE]
|
|
23
28
|
|
|
24
29
|
@field_validator("scopes", mode="before")
|
|
25
30
|
@classmethod
|
|
26
31
|
def _check_scopes(cls, scopes: Optional[list[str]]) -> list[str]:
|
|
27
32
|
return scopes if scopes is not None else [DEFAULT_SCOPE]
|
|
33
|
+
|
|
34
|
+
@field_validator("login_url", mode="before")
|
|
35
|
+
@classmethod
|
|
36
|
+
def _check_login_url(cls, login_url: Optional[str]) -> str:
|
|
37
|
+
return login_url if login_url is not None else CLIENT_APP_BASE
|
|
38
|
+
|
|
39
|
+
@field_validator("api_base", mode="before")
|
|
40
|
+
@classmethod
|
|
41
|
+
def _check_api_base(cls, api_base: Optional[str]) -> str:
|
|
42
|
+
return api_base if api_base is not None else REST_API_BASE_PATH
|
|
@@ -1,10 +1,7 @@
|
|
|
1
1
|
from datetime import date, datetime
|
|
2
2
|
from typing import Optional
|
|
3
3
|
|
|
4
|
-
from ....utils import at_midnight, format_date, yesterday
|
|
5
|
-
|
|
6
|
-
_CLIENT_APP_BASE = "https://login.microsoftonline.com"
|
|
7
|
-
_REST_API_BASE_PATH = "https://api.powerbi.com/v1.0/myorg"
|
|
4
|
+
from ....utils import at_midnight, clean_path, format_date, yesterday
|
|
8
5
|
|
|
9
6
|
|
|
10
7
|
def _time_filter(day: Optional[date]) -> tuple[datetime, datetime]:
|
|
@@ -15,51 +12,44 @@ def _time_filter(day: Optional[date]) -> tuple[datetime, datetime]:
|
|
|
15
12
|
|
|
16
13
|
|
|
17
14
|
class PowerBiEndpointFactory:
|
|
18
|
-
|
|
19
|
-
|
|
15
|
+
def __init__(self, login_url: str, api_base: str):
|
|
16
|
+
self.app_base = clean_path(login_url)
|
|
17
|
+
self.rest_api_base = clean_path(api_base)
|
|
18
|
+
|
|
19
|
+
def activity_events(self, day: Optional[date]) -> str:
|
|
20
20
|
start, end = _time_filter(day)
|
|
21
|
-
url = f"{
|
|
21
|
+
url = f"{self.rest_api_base}/admin/activityevents"
|
|
22
22
|
url += "?$filter=Activity eq 'viewreport'"
|
|
23
23
|
url += f"&startDateTime='{format_date(start)}'"
|
|
24
24
|
url += f"&endDateTime='{format_date(end)}'"
|
|
25
25
|
return url
|
|
26
26
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
return f"{_CLIENT_APP_BASE}/{tenant_id}"
|
|
27
|
+
def authority(self, tenant_id: str) -> str:
|
|
28
|
+
return f"{self.app_base}/{tenant_id}"
|
|
30
29
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
return f"{_REST_API_BASE_PATH}/admin/dashboards"
|
|
30
|
+
def dashboards(self) -> str:
|
|
31
|
+
return f"{self.rest_api_base}/admin/dashboards"
|
|
34
32
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
return f"{_REST_API_BASE_PATH}/admin/datasets"
|
|
33
|
+
def datasets(self) -> str:
|
|
34
|
+
return f"{self.rest_api_base}/admin/datasets"
|
|
38
35
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
return f"{_REST_API_BASE_PATH}/admin/groups"
|
|
36
|
+
def groups(self) -> str:
|
|
37
|
+
return f"{self.rest_api_base}/admin/groups"
|
|
42
38
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
return f"{_REST_API_BASE_PATH}/admin/workspaces/getInfo"
|
|
39
|
+
def metadata_create_scan(self) -> str:
|
|
40
|
+
return f"{self.rest_api_base}/admin/workspaces/getInfo"
|
|
46
41
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
return f"{_REST_API_BASE_PATH}/admin/workspaces/scanResult/{scan_id}"
|
|
42
|
+
def metadata_scan_result(self, scan_id: int) -> str:
|
|
43
|
+
return f"{self.rest_api_base}/admin/workspaces/scanResult/{scan_id}"
|
|
50
44
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
return f"{_REST_API_BASE_PATH}/admin/workspaces/scanStatus/{scan_id}"
|
|
45
|
+
def metadata_scan_status(self, scan_id: int) -> str:
|
|
46
|
+
return f"{self.rest_api_base}/admin/workspaces/scanStatus/{scan_id}"
|
|
54
47
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
return f"{_REST_API_BASE_PATH}/admin/reports/{report_id}/pages"
|
|
48
|
+
def pages(self, report_id: str) -> str:
|
|
49
|
+
return f"{self.rest_api_base}/admin/reports/{report_id}/pages"
|
|
58
50
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
return f"{_REST_API_BASE_PATH}/admin/reports"
|
|
51
|
+
def reports(self) -> str:
|
|
52
|
+
return f"{self.rest_api_base}/admin/reports"
|
|
62
53
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
return f"{_REST_API_BASE_PATH}/admin/workspaces/modified"
|
|
54
|
+
def workspace_ids(self) -> str:
|
|
55
|
+
return f"{self.rest_api_base}/admin/workspaces/modified"
|
|
@@ -34,9 +34,7 @@ _BATCH_SIZE: int = 100
|
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
class URLTemplates(Enum):
|
|
37
|
-
DASHBOARD =
|
|
38
|
-
"https://{hostname}/MicroStrategyLibrarySTD/app/{project_id}/{id_}"
|
|
39
|
-
)
|
|
37
|
+
DASHBOARD = "https://{hostname}/MicroStrategyLibrary/app/{project_id}/{id_}"
|
|
40
38
|
DOCUMENT = "https://{hostname}/MicroStrategy/servlet/mstrWeb?documentID={id_}&projectID={project_id}"
|
|
41
39
|
REPORT = "https://{hostname}/MicroStrategy/servlet/mstrWeb?reportID={id_}&projectID={project_id}"
|
|
42
40
|
FOLDER = "https://{hostname}/MicroStrategy/servlet/mstrWeb?folderID={id_}&projectID={project_id}"
|
|
@@ -121,11 +121,13 @@ class TableauClient:
|
|
|
121
121
|
self,
|
|
122
122
|
credentials: TableauCredentials,
|
|
123
123
|
timeout_sec: int = DEFAULT_TIMEOUT_SECONDS,
|
|
124
|
+
with_columns: bool = True,
|
|
124
125
|
with_pulse: bool = False,
|
|
125
126
|
override_page_size: Optional[int] = None,
|
|
126
127
|
):
|
|
127
128
|
self._credentials = credentials
|
|
128
129
|
self._server = _server(credentials.server_url, timeout_sec)
|
|
130
|
+
self._with_columns = with_columns
|
|
129
131
|
self._with_pulse = with_pulse
|
|
130
132
|
|
|
131
133
|
self._client_metadata = TableauClientMetadataApi(
|
|
@@ -215,6 +217,10 @@ class TableauClient:
|
|
|
215
217
|
logger.info(f"Skipping asset {asset} - Tableau Pulse de-activated")
|
|
216
218
|
return []
|
|
217
219
|
|
|
220
|
+
if asset == TableauAsset.COLUMN and not self._with_columns:
|
|
221
|
+
logger.info(f"Skipping asset {asset} - deactivated columns")
|
|
222
|
+
return []
|
|
223
|
+
|
|
218
224
|
logger.info(f"Extracting {asset.name}...")
|
|
219
225
|
|
|
220
226
|
if asset == TableauAsset.DATASOURCE:
|
|
@@ -32,6 +32,7 @@ def extract_all(**kwargs) -> None:
|
|
|
32
32
|
output_directory
|
|
33
33
|
"""
|
|
34
34
|
output_directory = kwargs.get("output") or from_env(OUTPUT_DIR)
|
|
35
|
+
with_columns = not kwargs.get("skip_columns")
|
|
35
36
|
with_pulse = kwargs.get("with_pulse") or False
|
|
36
37
|
page_size = kwargs.get("page_size")
|
|
37
38
|
timestamp = current_timestamp()
|
|
@@ -39,6 +40,7 @@ def extract_all(**kwargs) -> None:
|
|
|
39
40
|
credentials = TableauCredentials(**kwargs)
|
|
40
41
|
client = TableauClient(
|
|
41
42
|
credentials,
|
|
43
|
+
with_columns=with_columns,
|
|
42
44
|
with_pulse=with_pulse,
|
|
43
45
|
override_page_size=page_size,
|
|
44
46
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: castor-extractor
|
|
3
|
-
Version: 0.24.
|
|
3
|
+
Version: 0.24.20
|
|
4
4
|
Summary: Extract your metadata assets.
|
|
5
5
|
Home-page: https://www.castordoc.com/
|
|
6
6
|
License: EULA
|
|
@@ -215,6 +215,34 @@ For any questions or bug report, contact us at [support@coalesce.io](mailto:supp
|
|
|
215
215
|
|
|
216
216
|
# Changelog
|
|
217
217
|
|
|
218
|
+
## 0.24.20 - 2025-05-19
|
|
219
|
+
|
|
220
|
+
* Powerbi: allow custom api base and login url
|
|
221
|
+
|
|
222
|
+
## 0.24.19 - 2025-05-14
|
|
223
|
+
|
|
224
|
+
* Confluence: extract databases
|
|
225
|
+
|
|
226
|
+
## 0.24.18 - 2025-05-13
|
|
227
|
+
|
|
228
|
+
* Improve folder organisation for transformation tools
|
|
229
|
+
|
|
230
|
+
## 0.24.17 - 2025-05-13
|
|
231
|
+
|
|
232
|
+
* Strategy: fix dashboard URL format
|
|
233
|
+
|
|
234
|
+
## 0.24.16 - 2025-05-12
|
|
235
|
+
|
|
236
|
+
* Confluence: extract folders to complete the page hierarchy
|
|
237
|
+
|
|
238
|
+
## 0.24.15 - 2025-05-12
|
|
239
|
+
|
|
240
|
+
* Tableau: Add argument to skip columns extraction
|
|
241
|
+
|
|
242
|
+
## 0.24.14 - 2025-05-06
|
|
243
|
+
|
|
244
|
+
* Confluence: extract pages per space to allow additional filtering. by default, pages from archived or personal spaces are not extracted.
|
|
245
|
+
|
|
218
246
|
## 0.24.13 - 2025-05-05
|
|
219
247
|
|
|
220
248
|
* Rollback cloud-storage version as it's not compatible with Keboola
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=NxSwszCNlJ8oD2ffivq3g75DK436mue9WwilR0r6bE4,17760
|
|
2
2
|
Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
|
|
3
3
|
DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
|
|
4
4
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
@@ -6,7 +6,7 @@ README.md,sha256=C6hTyZO60T7z7xwHbspHlii384Jn02k0Rycxu3bCX0o,3866
|
|
|
6
6
|
castor_extractor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
7
|
castor_extractor/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
castor_extractor/commands/extract_bigquery.py,sha256=dU4OiYO1V0n32orvZnMh1_xtFKF_VxHNXcVsH3otY-g,1269
|
|
9
|
-
castor_extractor/commands/extract_confluence.py,sha256=
|
|
9
|
+
castor_extractor/commands/extract_confluence.py,sha256=blYcnDqywXNKRQ1aZAD9FclhLlO7x8Y_tb0lgl85v0w,1641
|
|
10
10
|
castor_extractor/commands/extract_databricks.py,sha256=SVKyoa-BBUQAM6HRHf1Wdg9-tpICic2yyvXQwHcNBhA,1264
|
|
11
11
|
castor_extractor/commands/extract_domo.py,sha256=jvAawUsUTHrwCn_koK6StmQr4n_b5GyvJi6uu6WS0SM,1061
|
|
12
12
|
castor_extractor/commands/extract_looker.py,sha256=cySLiolLCgrREJ9d0kMrJ7P8K3efHTBTzShalWVfI3A,1214
|
|
@@ -17,7 +17,7 @@ castor_extractor/commands/extract_mode.py,sha256=Q4iO-VAKMg4zFPejhAO-foZibL5Ht3j
|
|
|
17
17
|
castor_extractor/commands/extract_mysql.py,sha256=7AH5qMzeLTsENCOeJwtesrWg8Vo8MCEq8fx2YT74Mcw,1034
|
|
18
18
|
castor_extractor/commands/extract_notion.py,sha256=uaxcF3_bT7D_-JxnIW0F7VVDphI_ZgOfQQxZzoLXo_M,504
|
|
19
19
|
castor_extractor/commands/extract_postgres.py,sha256=pX0RnCPi4nw6QQ6wiAuZ_Xt3ZbDuMUG9aQKuqFgJtAU,1154
|
|
20
|
-
castor_extractor/commands/extract_powerbi.py,sha256=
|
|
20
|
+
castor_extractor/commands/extract_powerbi.py,sha256=RKkw9H2ZsbJ4xLE84bmNFUgYUjlrLmSXahQSVrQr_Bc,934
|
|
21
21
|
castor_extractor/commands/extract_qlik.py,sha256=VBe_xFKh_nR0QSFFIncAaC8yDqBeMa6VunBAga7AeGg,891
|
|
22
22
|
castor_extractor/commands/extract_redshift.py,sha256=zRBg2D_ft4GLdPSdmetRcgQVAA80DXtdRSYsQhAWIik,1334
|
|
23
23
|
castor_extractor/commands/extract_salesforce.py,sha256=3j3YTmMkPAwocR-B1ozJQai0UIZPtpmAyWj-hHvdWn4,1226
|
|
@@ -26,7 +26,7 @@ castor_extractor/commands/extract_sigma.py,sha256=sxewHcZ1Doq35V2qnpX_zCKKXkrb1_
|
|
|
26
26
|
castor_extractor/commands/extract_snowflake.py,sha256=GwlrRxwEBjHqGs_3bs5vM9fzmv61_iwvBr1KcIgFgWM,2161
|
|
27
27
|
castor_extractor/commands/extract_sqlserver.py,sha256=lwhbcNChaXHZgMgSOch3faVr7WJw-sDU6GHl3lzBt_0,1141
|
|
28
28
|
castor_extractor/commands/extract_strategy.py,sha256=Q-pUymatPrBFGXobhyUPzFph0-t774-XOpjdCFF1dYo,821
|
|
29
|
-
castor_extractor/commands/extract_tableau.py,sha256=
|
|
29
|
+
castor_extractor/commands/extract_tableau.py,sha256=ngujGYohWOqOK1qjIP1Hh951jr0KNKNSeOyoaOnO450,1558
|
|
30
30
|
castor_extractor/commands/extract_thoughtspot.py,sha256=caAYJlH-vK7u5IUB6OKXxcaWfLgc7d_XqnFDWK6YNS4,639
|
|
31
31
|
castor_extractor/commands/file_check.py,sha256=TJx76Ymd0QCECmq35zRJMkPE8DJtSInB28MuSXWk8Ao,2644
|
|
32
32
|
castor_extractor/commands/upload.py,sha256=rLXp7gQ8zb1kLbho4FT87q8eJd8Gvo_TkyIynAaQ-4s,1342
|
|
@@ -43,13 +43,16 @@ castor_extractor/file_checker/templates/__init__.py,sha256=StVLm4ZGyGVmPzarxEaDR
|
|
|
43
43
|
castor_extractor/file_checker/templates/generic_warehouse.py,sha256=S5qFIkbfihdWz16y4HSiTfDH1SmGl40u3kZ706sFBVI,2939
|
|
44
44
|
castor_extractor/knowledge/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
45
45
|
castor_extractor/knowledge/confluence/__init__.py,sha256=pRT615pMDlB7Ifs09erVn2EdpZHgkvX5selemWU3VPE,129
|
|
46
|
-
castor_extractor/knowledge/confluence/assets.py,sha256=
|
|
46
|
+
castor_extractor/knowledge/confluence/assets.py,sha256=ZRmRnJpbujL8LMQD3FrcFME-pEQM7G1lCZ1abTsx5OU,196
|
|
47
47
|
castor_extractor/knowledge/confluence/client/__init__.py,sha256=ALAzo0JEhxFzH2FnIO6HmtkAGS2_bGY8KXXMcTGV3aE,84
|
|
48
|
-
castor_extractor/knowledge/confluence/client/client.py,sha256=
|
|
48
|
+
castor_extractor/knowledge/confluence/client/client.py,sha256=Ysl4KDo4ISXuLvUwEZrowGiaUHPC5cli7zN5Vtte_Jc,5392
|
|
49
|
+
castor_extractor/knowledge/confluence/client/client_test.py,sha256=O4YarFiZbm2z_H0zPzPi-awRhsGEY1iOwwHHTv9gCVA,4177
|
|
49
50
|
castor_extractor/knowledge/confluence/client/credentials.py,sha256=tqUMw-SVoAi4o6I6OeGk4MeDiIPU3-ihhaomXv4CQ64,419
|
|
50
|
-
castor_extractor/knowledge/confluence/client/endpoints.py,sha256=
|
|
51
|
+
castor_extractor/knowledge/confluence/client/endpoints.py,sha256=q5JCybVPtSs4rEXmS5IdkJyFQZsx2ff838mJyxryEFo,1742
|
|
51
52
|
castor_extractor/knowledge/confluence/client/pagination.py,sha256=ty4meiMEujDVSiQyOJTibd-ReYyDyGezdFuk7EAGtMA,862
|
|
52
|
-
castor_extractor/knowledge/confluence/extract.py,sha256=
|
|
53
|
+
castor_extractor/knowledge/confluence/extract.py,sha256=Ba1IDDFkZUYJ2HDWNcXNJ1fzFYOTcE1sTDgqFAK9-fA,2332
|
|
54
|
+
castor_extractor/knowledge/confluence/utils.py,sha256=-lcpWY8oacDdg537s5-LsY8B3L7ZKGXT6GCd3yvVw-A,392
|
|
55
|
+
castor_extractor/knowledge/confluence/utils_test.py,sha256=NYlbKUSRjuZ8FaRMqhKNQCW3KmMAhqIVaMAHNNpobsk,1152
|
|
53
56
|
castor_extractor/knowledge/notion/__init__.py,sha256=ZDmh0eNSxHf1zVPm0aYlKPci-vzOXhAgdsWjS2hdjh4,117
|
|
54
57
|
castor_extractor/knowledge/notion/assets.py,sha256=QHv1-pomt5UeN_prP2L6t_zJ-tDSqB8LgopkGAODYPQ,164
|
|
55
58
|
castor_extractor/knowledge/notion/client/__init__.py,sha256=CDPorBCethuNTEtpjvHGcWnWeVfqkEq-IbakWjDKATw,76
|
|
@@ -73,12 +76,17 @@ castor_extractor/transformation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
|
|
|
73
76
|
castor_extractor/transformation/coalesce/__init__.py,sha256=CW_qdtEfwgJRsCyBlk5hNlxwEO-VV6mBXZvkRbND_J8,112
|
|
74
77
|
castor_extractor/transformation/coalesce/assets.py,sha256=pzccYPP66c9PAnVroemx7-6MeRHw7Ft1OlTC6jIamAA,363
|
|
75
78
|
castor_extractor/transformation/coalesce/client/__init__.py,sha256=VRmVpH29rOghtDQnCN7dAdA0dI0Lxseu4BC8rnwM9dU,80
|
|
76
|
-
castor_extractor/transformation/coalesce/client/client.py,sha256
|
|
79
|
+
castor_extractor/transformation/coalesce/client/client.py,sha256=-fFxWtDPPruNmDPc6FXft_6RwRKEee4JM-13d90fms0,6442
|
|
77
80
|
castor_extractor/transformation/coalesce/client/credentials.py,sha256=jbJxjbdPspf-dzYKfeb7oqL_8TXd1nvkJrjAcdAnLPc,548
|
|
78
81
|
castor_extractor/transformation/coalesce/client/endpoint.py,sha256=0uLh7dpA1vsR9qr_50SEYV_-heQE4BwED9oNMgYsL-w,1272
|
|
79
82
|
castor_extractor/transformation/coalesce/client/type.py,sha256=oiiVP9NL0ijTXyQmaB8aJVYckc7m-m8ZgMyNIAduUKE,43
|
|
80
83
|
castor_extractor/transformation/coalesce/client/utils.py,sha256=jbxh3OCbYm3fKZD1QfqX5zm1ZD_jFIrpUQsX8paRP7g,1627
|
|
81
84
|
castor_extractor/transformation/coalesce/client/utils_test.py,sha256=Q00Y1n0Q_sZ0LFnYn98yDGFumBsifzVJSc7_3PSBMfI,1543
|
|
85
|
+
castor_extractor/transformation/dbt/__init__.py,sha256=LHQROlMqYWCc7tcmhdjXtROFpJqUvCg9jPC8avHgD4I,107
|
|
86
|
+
castor_extractor/transformation/dbt/assets.py,sha256=JY1nKEGySZ84wNoe7dnizwAYw2q0t8NVaIfqhB2rSw0,148
|
|
87
|
+
castor_extractor/transformation/dbt/client.py,sha256=KSlMHaesDOTYjTKs_ZFSYG1Udxjj5jn0m5zaCf5AZZk,5526
|
|
88
|
+
castor_extractor/transformation/dbt/client_test.py,sha256=YK86romOhTRFqzA2Gs262mDUfmvz0cmhiN3jC9nB8P8,4527
|
|
89
|
+
castor_extractor/transformation/dbt/credentials.py,sha256=pGq7GqFQTw9TwN1DXSHC-0yJ2H6B_wMAbHyQTLqJVh0,543
|
|
82
90
|
castor_extractor/types.py,sha256=nHel2hv6NoHmdpOX_heEfO2-DnZPoYA2x0eJdbFvT0s,1276
|
|
83
91
|
castor_extractor/uploader/__init__.py,sha256=A4bq_SrEtKAsl0r_D_duSTvL5WIQjVfsMy7tDx9IKg0,87
|
|
84
92
|
castor_extractor/uploader/constant.py,sha256=yTigLHDlYwoRr6CpFIl7ReElFsQd4H-qkluMZJPWSx0,865
|
|
@@ -88,7 +96,7 @@ castor_extractor/uploader/settings.py,sha256=3MvOX-UFRqrLZoiT7wYn9jUGro7NX4RCafY
|
|
|
88
96
|
castor_extractor/uploader/upload.py,sha256=PSQfkO_7LSE0WBo9Tm_hlS2ONepKeB0cBFdJXySnues,4310
|
|
89
97
|
castor_extractor/uploader/upload_test.py,sha256=7fwstdQe7FjuwGilsCdFpEQr1qLoR2WTRUzyy93fISw,402
|
|
90
98
|
castor_extractor/uploader/utils.py,sha256=otAaySj5aeem6f0CTd0Te6ioJ6uP2J1p348j-SdIwDI,802
|
|
91
|
-
castor_extractor/utils/__init__.py,sha256=
|
|
99
|
+
castor_extractor/utils/__init__.py,sha256=ybzci46Myi9LABZGBK2qXOiGd00llcpmBpvAp5LEVHc,1597
|
|
92
100
|
castor_extractor/utils/argument_parser.py,sha256=S4EcIh3wNDjs3fOrQnttCcPsAmG8m_Txl7xvEh0Q37s,283
|
|
93
101
|
castor_extractor/utils/argument_parser_test.py,sha256=wnyLFJ74iEiPxxLSbwFtckR7FIHxsFOVU38ljs9gqRA,633
|
|
94
102
|
castor_extractor/utils/batch.py,sha256=SFlLmJgVjV2nVhIrjVIEp8wJ9du4dKKHq8YVYubnwQQ,448
|
|
@@ -113,11 +121,6 @@ castor_extractor/utils/client/uri_test.py,sha256=1XKF6qSseCeD4G4ckaNO07JXfGbt7XU
|
|
|
113
121
|
castor_extractor/utils/collection.py,sha256=FiIJWZZ865oqNjtTm40gQ13R9zh--W2W5YsMBZJf2bk,2334
|
|
114
122
|
castor_extractor/utils/collection_test.py,sha256=XJAGo0Veg0H8wZRCESIkU2t8bXxTNET0BdosomO3-Ls,2104
|
|
115
123
|
castor_extractor/utils/constants.py,sha256=qBQprS9U66mS-RIBXiLujdTSV3WvGv40Bc0khP4Abdk,39
|
|
116
|
-
castor_extractor/utils/dbt/__init__.py,sha256=LHQROlMqYWCc7tcmhdjXtROFpJqUvCg9jPC8avHgD4I,107
|
|
117
|
-
castor_extractor/utils/dbt/assets.py,sha256=JY1nKEGySZ84wNoe7dnizwAYw2q0t8NVaIfqhB2rSw0,148
|
|
118
|
-
castor_extractor/utils/dbt/client.py,sha256=KSlMHaesDOTYjTKs_ZFSYG1Udxjj5jn0m5zaCf5AZZk,5526
|
|
119
|
-
castor_extractor/utils/dbt/client_test.py,sha256=9mHhFLyQ-NrjyRKy0kCEp4hgMj2um7HrhTd452oyRbM,4526
|
|
120
|
-
castor_extractor/utils/dbt/credentials.py,sha256=pGq7GqFQTw9TwN1DXSHC-0yJ2H6B_wMAbHyQTLqJVh0,543
|
|
121
124
|
castor_extractor/utils/deprecate.py,sha256=aBIN2QqZUx5CBNZMFfOUhi8QqtPqRcJtmrN6xqfm-y8,805
|
|
122
125
|
castor_extractor/utils/env.py,sha256=TqdtB50U8LE0993WhhEhpy89TJrHbjtIKjvg6KQ-5q0,596
|
|
123
126
|
castor_extractor/utils/files.py,sha256=qKbfu5FRjsQdKnRmaJNd5EdX_F6gf5C5tV8LdoYKxs0,1527
|
|
@@ -152,7 +155,7 @@ castor_extractor/utils/string_test.py,sha256=u3P2tAPhyfCLvD19rH_JcpHhPuWTHUdg0z_
|
|
|
152
155
|
castor_extractor/utils/time.py,sha256=jmP1QWg4lv21Jp_Oy71lfJ47hjNOSgHiBOFf964RMPU,1732
|
|
153
156
|
castor_extractor/utils/time_test.py,sha256=pH8DSosNlwDYZXZNNjYDcL0WbmZc_c212LEEn88Oqew,647
|
|
154
157
|
castor_extractor/utils/type.py,sha256=Sd8JlEgbGkBUZnRqCUDtREeBkOMTXtlNMyCph90_J0Q,328
|
|
155
|
-
castor_extractor/utils/validation.py,sha256=
|
|
158
|
+
castor_extractor/utils/validation.py,sha256=dRvC9SoFVecVZuLQNN3URq37yX2sBSW3-NxIxkcol5o,1894
|
|
156
159
|
castor_extractor/utils/validation_test.py,sha256=A7P6VmI0kYX2aGIeEN12y7LsY7Kpm8pE4bdVFhbBAMw,1184
|
|
157
160
|
castor_extractor/utils/write.py,sha256=Z_RYm47XeHiUPPUMYMuAjQrVZ18CAkL3daQHQG1XPlM,2148
|
|
158
161
|
castor_extractor/visualization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -230,16 +233,16 @@ castor_extractor/visualization/mode/client/constants.py,sha256=_Si5AF6VnpoSfnNNg
|
|
|
230
233
|
castor_extractor/visualization/mode/client/credentials.py,sha256=ptIpCCpoNt06yYaWQgl3Xu78_jVMoqsqWAGqQXVFZlo,606
|
|
231
234
|
castor_extractor/visualization/mode/errors.py,sha256=SKpFT2AiLOuWx2VRLyO7jbAiKcGDFXXrsebpNEKtr0E,1495
|
|
232
235
|
castor_extractor/visualization/mode/extract.py,sha256=PmLWWjUwplQh3TNMemiGwyFdxMcKVMvumZPxSMLJAwk,1625
|
|
233
|
-
castor_extractor/visualization/powerbi/__init__.py,sha256=
|
|
236
|
+
castor_extractor/visualization/powerbi/__init__.py,sha256=hoZ73ngLhMc9edqxO9PUIE3FABQlvcfY2W8fuc6DEjY,197
|
|
234
237
|
castor_extractor/visualization/powerbi/assets.py,sha256=IB_XKwgdN1pZYGZ4RfeHrLjflianTzWf_6tg-4CIwu0,742
|
|
235
|
-
castor_extractor/visualization/powerbi/client/__init__.py,sha256=
|
|
236
|
-
castor_extractor/visualization/powerbi/client/authentication.py,sha256=
|
|
237
|
-
castor_extractor/visualization/powerbi/client/client.py,sha256=
|
|
238
|
-
castor_extractor/visualization/powerbi/client/client_test.py,sha256=
|
|
238
|
+
castor_extractor/visualization/powerbi/client/__init__.py,sha256=UPIhMaCCdNxhiLdkItC0IPFE_AMi-SgqI_ahwjB9utI,151
|
|
239
|
+
castor_extractor/visualization/powerbi/client/authentication.py,sha256=cTohunKr1nUDfvxB0sejJSyfE2BdCtwT1WMPecWlbyU,1045
|
|
240
|
+
castor_extractor/visualization/powerbi/client/client.py,sha256=MbqqUF4yadjbAQ_I0iwOmdqR0qC1L3yfbDGgIZqZ0hQ,7348
|
|
241
|
+
castor_extractor/visualization/powerbi/client/client_test.py,sha256=MhQfg6kj__zpARXfv9-VrJXt1fXj6Eri91y8KA9Sn9E,5694
|
|
239
242
|
castor_extractor/visualization/powerbi/client/constants.py,sha256=88R_aGachNNUZh6OSH2fkDwZtY4KTStzKm_g7HNCqqo,387
|
|
240
|
-
castor_extractor/visualization/powerbi/client/credentials.py,sha256=
|
|
243
|
+
castor_extractor/visualization/powerbi/client/credentials.py,sha256=OVWdhZSNODzTdLysY-sbpBZ3uUkLokeayQZnbJAqt2I,1386
|
|
241
244
|
castor_extractor/visualization/powerbi/client/credentials_test.py,sha256=TzFqxsWVQ3sXR_n0bJsexK9Uz7ceXCEPVqDGWTJzW60,993
|
|
242
|
-
castor_extractor/visualization/powerbi/client/endpoints.py,sha256=
|
|
245
|
+
castor_extractor/visualization/powerbi/client/endpoints.py,sha256=38ZETzSSnNq3vA9O6nLZQ8T1BVE01R9CjMC03-PRXsM,1911
|
|
243
246
|
castor_extractor/visualization/powerbi/client/pagination.py,sha256=OZMjoDQPRGMoWd9QcKKrPh3aErJR20SHlrTqY_siLkk,755
|
|
244
247
|
castor_extractor/visualization/powerbi/extract.py,sha256=Z5KbqMhMnqjWcnzged2G1-Gf6GYWJobTL9_TpAdgb8o,1309
|
|
245
248
|
castor_extractor/visualization/qlik/__init__.py,sha256=u6lIfm_WOykBwt6SlaB7C0Dtx37XBliUbM5oWv26gC8,177
|
|
@@ -276,13 +279,13 @@ castor_extractor/visualization/sigma/extract.py,sha256=XIT1qsj6g6dgBWP8HPfj_medZ
|
|
|
276
279
|
castor_extractor/visualization/strategy/__init__.py,sha256=HOMv4JxqF5ZmViWi-pDE-PSXJRLTdXal_jtpHG_rlR8,123
|
|
277
280
|
castor_extractor/visualization/strategy/assets.py,sha256=tqB3GOtp-r7IOnYO8UxZgrldoSMImJnv5KeIwDFxg68,302
|
|
278
281
|
castor_extractor/visualization/strategy/client/__init__.py,sha256=XWP0yF5j6JefDJkDfX-RSJn3HF2ceQ0Yx1PLCfB3BBo,80
|
|
279
|
-
castor_extractor/visualization/strategy/client/client.py,sha256=
|
|
282
|
+
castor_extractor/visualization/strategy/client/client.py,sha256=_K7JkatG0DYtbQOJULTNYKHWuBZ11KMR_rQjx8LiR5c,10242
|
|
280
283
|
castor_extractor/visualization/strategy/client/credentials.py,sha256=urFfNxWX1JG6wwFMYImufQzHa5g-sgjdlVGzi63owwg,1113
|
|
281
284
|
castor_extractor/visualization/strategy/extract.py,sha256=2fBuvS2xiOGXRpxXnZsE_C3en6t1-BlM5TbusjHyEkg,1166
|
|
282
285
|
castor_extractor/visualization/tableau/__init__.py,sha256=eFI_1hjdkxyUiAYiy3szwyuwn3yJ5C_KbpBU0ySJDcQ,138
|
|
283
286
|
castor_extractor/visualization/tableau/assets.py,sha256=HbCRd8VCj1WBEeqg9jwnygnT7xOFJ6PQD7Lq7sV-XR0,635
|
|
284
287
|
castor_extractor/visualization/tableau/client/__init__.py,sha256=P8RKFKOC63WkH5hdEytJOwHS9vzQ8GXreLfXZetmMP8,78
|
|
285
|
-
castor_extractor/visualization/tableau/client/client.py,sha256=
|
|
288
|
+
castor_extractor/visualization/tableau/client/client.py,sha256=iJ3Y-vwPvmPyAUTs1PqFJEZelPGiLvsiwXpTI3b5THc,7867
|
|
286
289
|
castor_extractor/visualization/tableau/client/client_metadata_api.py,sha256=ryRq4_qUok8vvWGhj5CNWXtwR2JlUsu1qjsov2KhQTE,6286
|
|
287
290
|
castor_extractor/visualization/tableau/client/client_metadata_api_test.py,sha256=rikyQKDLFYHLJhHJTF3LwWhKJ80svtTsYp5n7n9oTU8,2665
|
|
288
291
|
castor_extractor/visualization/tableau/client/client_rest_api.py,sha256=x4dNw4PPJdalTlGowwkANwqiS2ZhGxzpQytkHq3KbpY,3988
|
|
@@ -292,7 +295,7 @@ castor_extractor/visualization/tableau/client/errors.py,sha256=ecT8Tit5VtzrOBB9y
|
|
|
292
295
|
castor_extractor/visualization/tableau/client/gql_queries.py,sha256=XJAfhpMZ5S7-AhfpOaoHMHCAdil-l5e5xB-CH4NC38M,2177
|
|
293
296
|
castor_extractor/visualization/tableau/client/rest_fields.py,sha256=ZKYYuMxg9PXhczVXaD4rXNk7dYyWJ1_bVM8FLEXju7s,888
|
|
294
297
|
castor_extractor/visualization/tableau/constants.py,sha256=lHGB50FgVNO2nXeIhkvQKivD8ZFBIjDrflgD5cTXKJw,104
|
|
295
|
-
castor_extractor/visualization/tableau/extract.py,sha256=
|
|
298
|
+
castor_extractor/visualization/tableau/extract.py,sha256=hGVr1BZVsHlIgNXOFusRN2YwUUhXvF3reOeN8g1CTEo,1508
|
|
296
299
|
castor_extractor/visualization/thoughtspot/__init__.py,sha256=NhTGUk5Kdt54oCjHYoAt0cLBmVLys5lFYiRANL6wCmI,150
|
|
297
300
|
castor_extractor/visualization/thoughtspot/assets.py,sha256=SAQWPKaD2NTSDg7-GSkcRSSEkKSws0MJfOVcHkdeTSg,276
|
|
298
301
|
castor_extractor/visualization/thoughtspot/client/__init__.py,sha256=svrE2rMxR-OXctjPeAHMEPePlfcra-9KDevTMcHunAA,86
|
|
@@ -423,8 +426,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx
|
|
|
423
426
|
castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
|
|
424
427
|
castor_extractor/warehouse/sqlserver/query.py,sha256=g0hPT-RmeGi2DyenAi3o72cTlQsLToXIFYojqc8E5fQ,533
|
|
425
428
|
castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
|
|
426
|
-
castor_extractor-0.24.
|
|
427
|
-
castor_extractor-0.24.
|
|
428
|
-
castor_extractor-0.24.
|
|
429
|
-
castor_extractor-0.24.
|
|
430
|
-
castor_extractor-0.24.
|
|
429
|
+
castor_extractor-0.24.20.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
430
|
+
castor_extractor-0.24.20.dist-info/METADATA,sha256=DvgjbhUmiYXzV9e3MXsEGuSfgOog38LrEBNmqFIAcyI,25213
|
|
431
|
+
castor_extractor-0.24.20.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
432
|
+
castor_extractor-0.24.20.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
|
|
433
|
+
castor_extractor-0.24.20.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|