castor-extractor 0.24.36__py3-none-any.whl → 0.24.38__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +8 -0
- castor_extractor/commands/upload.py +10 -0
- castor_extractor/uploader/__init__.py +1 -0
- castor_extractor/uploader/constant.py +5 -1
- castor_extractor/uploader/enums.py +8 -0
- castor_extractor/uploader/settings.py +2 -0
- castor_extractor/uploader/upload.py +26 -5
- castor_extractor/uploader/upload_test.py +5 -3
- castor_extractor/visualization/sigma/assets.py +3 -0
- castor_extractor/visualization/sigma/client/client.py +42 -5
- castor_extractor/visualization/sigma/client/endpoints.py +17 -0
- castor_extractor/visualization/sigma/client/sources_transformer.py +94 -0
- castor_extractor/visualization/sigma/client/sources_transformer_test.py +101 -0
- castor_extractor/visualization/sigma/extract.py +17 -1
- {castor_extractor-0.24.36.dist-info → castor_extractor-0.24.38.dist-info}/METADATA +9 -1
- {castor_extractor-0.24.36.dist-info → castor_extractor-0.24.38.dist-info}/RECORD +19 -16
- {castor_extractor-0.24.36.dist-info → castor_extractor-0.24.38.dist-info}/LICENCE +0 -0
- {castor_extractor-0.24.36.dist-info → castor_extractor-0.24.38.dist-info}/WHEEL +0 -0
- {castor_extractor-0.24.36.dist-info → castor_extractor-0.24.38.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
|
@@ -3,6 +3,7 @@ from argparse import ArgumentParser
|
|
|
3
3
|
|
|
4
4
|
from castor_extractor.uploader import ( # type: ignore
|
|
5
5
|
FileType,
|
|
6
|
+
Zone,
|
|
6
7
|
upload_any,
|
|
7
8
|
)
|
|
8
9
|
from castor_extractor.utils import parse_filled_arguments # type: ignore
|
|
@@ -40,6 +41,15 @@ def _args() -> ArgumentParser:
|
|
|
40
41
|
),
|
|
41
42
|
choices=supported_file_type,
|
|
42
43
|
)
|
|
44
|
+
supported_zones = [zone.value for zone in Zone]
|
|
45
|
+
parser.add_argument(
|
|
46
|
+
"-z",
|
|
47
|
+
"--zone",
|
|
48
|
+
help="geographic zone to upload, currently supported are {}, defaults to EU".format(
|
|
49
|
+
supported_zones,
|
|
50
|
+
),
|
|
51
|
+
choices=supported_zones,
|
|
52
|
+
)
|
|
43
53
|
return parser
|
|
44
54
|
|
|
45
55
|
|
|
@@ -1,9 +1,13 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
2
|
|
|
3
3
|
from ..utils import RetryStrategy
|
|
4
|
+
from .enums import Zone
|
|
4
5
|
|
|
5
6
|
# url of the gcs proxy
|
|
6
|
-
|
|
7
|
+
INGEST_URLS = {
|
|
8
|
+
Zone.EU: "https://ingest.castordoc.com",
|
|
9
|
+
Zone.US: "https://ingest.us.castordoc.com",
|
|
10
|
+
}
|
|
7
11
|
|
|
8
12
|
RETRY_BASE_MS = 10_000
|
|
9
13
|
RETRY_JITTER_MS = 1_000
|
|
@@ -4,6 +4,7 @@ from pydantic import UUID4, Field
|
|
|
4
4
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
5
5
|
|
|
6
6
|
from .constant import FileType
|
|
7
|
+
from .enums import Zone
|
|
7
8
|
|
|
8
9
|
UPLOADER_ENV_PREFIX = "CASTOR_UPLOADER_"
|
|
9
10
|
|
|
@@ -22,3 +23,4 @@ class UploaderSettings(BaseSettings):
|
|
|
22
23
|
file_type: FileType
|
|
23
24
|
source_id: UUID4
|
|
24
25
|
token: str = Field(repr=False)
|
|
26
|
+
zone: Optional[Zone] = Zone.EU
|
|
@@ -10,13 +10,14 @@ import requests
|
|
|
10
10
|
|
|
11
11
|
from ..utils.retry import retry
|
|
12
12
|
from .constant import (
|
|
13
|
-
|
|
13
|
+
INGEST_URLS,
|
|
14
14
|
PATH_TEMPLATES,
|
|
15
15
|
RETRY_BASE_MS,
|
|
16
16
|
RETRY_JITTER_MS,
|
|
17
17
|
RETRY_STRATEGY,
|
|
18
18
|
FileType,
|
|
19
19
|
)
|
|
20
|
+
from .enums import Zone
|
|
20
21
|
from .env import get_blob_env
|
|
21
22
|
from .settings import UploaderSettings
|
|
22
23
|
from .utils import iter_files
|
|
@@ -33,6 +34,7 @@ def _path_and_url(
|
|
|
33
34
|
source_id: UUID,
|
|
34
35
|
file_type: FileType,
|
|
35
36
|
file_path: str,
|
|
37
|
+
zone: Zone,
|
|
36
38
|
) -> tuple[str, str]:
|
|
37
39
|
now = datetime.utcnow()
|
|
38
40
|
timestamp = int(now.timestamp())
|
|
@@ -44,7 +46,7 @@ def _path_and_url(
|
|
|
44
46
|
filename=filename,
|
|
45
47
|
)
|
|
46
48
|
|
|
47
|
-
url = f"{
|
|
49
|
+
url = f"{INGEST_URLS[zone]}/{path}"
|
|
48
50
|
|
|
49
51
|
return path, url
|
|
50
52
|
|
|
@@ -61,13 +63,16 @@ def _upload(
|
|
|
61
63
|
source_id: UUID,
|
|
62
64
|
file_path: str,
|
|
63
65
|
file_type: FileType,
|
|
66
|
+
zone: Optional[Zone] = Zone.EU,
|
|
64
67
|
) -> None:
|
|
65
68
|
"""
|
|
66
69
|
Upload the given file to Google Cloud Storage (GCS)
|
|
67
70
|
- Don't call GCS API directly
|
|
68
71
|
- Call the ingestion proxy which handles authorisation and uploading
|
|
69
72
|
"""
|
|
70
|
-
|
|
73
|
+
if not zone:
|
|
74
|
+
zone = Zone.EU
|
|
75
|
+
path, url = _path_and_url(source_id, file_type, file_path, zone)
|
|
71
76
|
headers = _headers(token)
|
|
72
77
|
timeout, max_retries = get_blob_env()
|
|
73
78
|
|
|
@@ -97,6 +102,7 @@ def _upload(
|
|
|
97
102
|
def upload_manifest(
|
|
98
103
|
token: str,
|
|
99
104
|
source_id: UUID,
|
|
105
|
+
zone: Optional[Zone],
|
|
100
106
|
file_path: Optional[str] = None,
|
|
101
107
|
) -> None:
|
|
102
108
|
"""
|
|
@@ -106,13 +112,20 @@ def upload_manifest(
|
|
|
106
112
|
"""
|
|
107
113
|
if not file_path:
|
|
108
114
|
raise ValueError("file path is needed to upload a manifest")
|
|
109
|
-
_upload(
|
|
115
|
+
_upload(
|
|
116
|
+
token=token,
|
|
117
|
+
source_id=source_id,
|
|
118
|
+
file_path=file_path,
|
|
119
|
+
file_type=FileType.DBT,
|
|
120
|
+
zone=zone,
|
|
121
|
+
)
|
|
110
122
|
|
|
111
123
|
|
|
112
124
|
def upload(
|
|
113
125
|
token: str,
|
|
114
126
|
source_id: UUID,
|
|
115
127
|
file_type: FileType,
|
|
128
|
+
zone: Optional[Zone],
|
|
116
129
|
file_path: Optional[str] = None,
|
|
117
130
|
directory_path: Optional[str] = None,
|
|
118
131
|
) -> None:
|
|
@@ -133,7 +146,13 @@ def upload(
|
|
|
133
146
|
raise ValueError(message)
|
|
134
147
|
|
|
135
148
|
for file_ in files:
|
|
136
|
-
_upload(
|
|
149
|
+
_upload(
|
|
150
|
+
token=token,
|
|
151
|
+
source_id=source_id,
|
|
152
|
+
file_path=file_,
|
|
153
|
+
file_type=file_type,
|
|
154
|
+
zone=zone,
|
|
155
|
+
)
|
|
137
156
|
|
|
138
157
|
|
|
139
158
|
def upload_any(**kwargs) -> None:
|
|
@@ -156,6 +175,7 @@ def upload_any(**kwargs) -> None:
|
|
|
156
175
|
token=settings.token,
|
|
157
176
|
source_id=settings.source_id,
|
|
158
177
|
file_path=settings.file_path,
|
|
178
|
+
zone=settings.zone,
|
|
159
179
|
)
|
|
160
180
|
return None
|
|
161
181
|
|
|
@@ -165,4 +185,5 @@ def upload_any(**kwargs) -> None:
|
|
|
165
185
|
file_type=file_type,
|
|
166
186
|
file_path=settings.file_path,
|
|
167
187
|
directory_path=settings.directory_path,
|
|
188
|
+
zone=settings.zone,
|
|
168
189
|
)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from uuid import UUID
|
|
2
2
|
|
|
3
|
-
from .constant import
|
|
3
|
+
from .constant import INGEST_URLS, FileType
|
|
4
|
+
from .enums import Zone
|
|
4
5
|
from .upload import _path_and_url
|
|
5
6
|
|
|
6
7
|
|
|
@@ -8,7 +9,8 @@ def test__path():
|
|
|
8
9
|
source_id = UUID("399a8b22-3187-11ec-8d3d-0242ac130003")
|
|
9
10
|
file_type = FileType.VIZ
|
|
10
11
|
file_path = "filename"
|
|
12
|
+
zone = Zone.EU
|
|
11
13
|
|
|
12
|
-
path, url = _path_and_url(source_id, file_type, file_path)
|
|
14
|
+
path, url = _path_and_url(source_id, file_type, file_path, zone)
|
|
13
15
|
assert path == f"visualization-{source_id}/{file_path}"
|
|
14
|
-
assert url == f"{
|
|
16
|
+
assert url == f"{INGEST_URLS[Zone.EU]}/{path}"
|
|
@@ -4,10 +4,13 @@ from ...types import ExternalAsset
|
|
|
4
4
|
class SigmaAsset(ExternalAsset):
|
|
5
5
|
"""Sigma assets"""
|
|
6
6
|
|
|
7
|
+
DATAMODELS = "datamodels"
|
|
7
8
|
DATASETS = "datasets"
|
|
9
|
+
DATASET_SOURCES = "dataset_sources"
|
|
8
10
|
ELEMENTS = "elements"
|
|
9
11
|
FILES = "files"
|
|
10
12
|
LINEAGES = "lineages"
|
|
11
13
|
MEMBERS = "members"
|
|
12
14
|
QUERIES = "queries"
|
|
13
15
|
WORKBOOKS = "workbooks"
|
|
16
|
+
WORKBOOK_SOURCES = "workbook_sources"
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from collections.abc import Iterator
|
|
2
3
|
from concurrent.futures import ThreadPoolExecutor
|
|
3
4
|
from functools import partial
|
|
@@ -24,6 +25,9 @@ from .pagination import (
|
|
|
24
25
|
SIGMA_QUERIES_PAGINATION_LIMIT,
|
|
25
26
|
SigmaPagination,
|
|
26
27
|
)
|
|
28
|
+
from .sources_transformer import SigmaSourcesTransformer
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
27
31
|
|
|
28
32
|
_CONTENT_TYPE = "application/x-www-form-urlencoded"
|
|
29
33
|
|
|
@@ -135,6 +139,12 @@ class SigmaClient(APIClient):
|
|
|
135
139
|
params={"limit": limit},
|
|
136
140
|
)
|
|
137
141
|
|
|
142
|
+
def _get_all_datamodels(self) -> Iterator[dict]:
|
|
143
|
+
request = self._get_paginated(
|
|
144
|
+
endpoint=SigmaEndpointFactory.datamodels()
|
|
145
|
+
)
|
|
146
|
+
yield from fetch_all_pages(request, SigmaPagination)
|
|
147
|
+
|
|
138
148
|
def _get_all_datasets(self) -> Iterator[dict]:
|
|
139
149
|
request = self._get_paginated(endpoint=SigmaEndpointFactory.datasets())
|
|
140
150
|
yield from fetch_all_pages(request, SigmaPagination)
|
|
@@ -275,18 +285,36 @@ class SigmaClient(APIClient):
|
|
|
275
285
|
|
|
276
286
|
yield from self._yield_deduplicated_queries(queries, workbook_id)
|
|
277
287
|
|
|
288
|
+
def _get_all_dataset_sources(self, datasets: list[dict]) -> Iterator[dict]:
|
|
289
|
+
yield from SigmaSourcesTransformer(self).get_dataset_sources(datasets)
|
|
290
|
+
|
|
291
|
+
def _get_all_workbook_sources(
|
|
292
|
+
self, workbooks: list[dict]
|
|
293
|
+
) -> Iterator[dict]:
|
|
294
|
+
yield from SigmaSourcesTransformer(self).get_workbook_sources(workbooks)
|
|
295
|
+
|
|
278
296
|
def fetch(
|
|
279
297
|
self,
|
|
280
298
|
asset: SigmaAsset,
|
|
281
|
-
|
|
299
|
+
datasets: Optional[list[dict]] = None,
|
|
282
300
|
elements: Optional[list[dict]] = None,
|
|
301
|
+
workbooks: Optional[list[dict]] = None,
|
|
283
302
|
) -> Iterator[dict]:
|
|
284
303
|
"""Returns the needed metadata for the queried asset"""
|
|
285
|
-
if asset == SigmaAsset.
|
|
304
|
+
if asset == SigmaAsset.DATAMODELS:
|
|
305
|
+
yield from self._get_all_datamodels()
|
|
306
|
+
|
|
307
|
+
elif asset == SigmaAsset.DATASETS:
|
|
286
308
|
yield from self._get_all_datasets()
|
|
287
309
|
|
|
310
|
+
elif asset == SigmaAsset.DATASET_SOURCES:
|
|
311
|
+
if datasets is None:
|
|
312
|
+
raise ValueError("Missing datasets to extract dataset sources")
|
|
313
|
+
|
|
314
|
+
yield from self._get_all_dataset_sources(datasets)
|
|
315
|
+
|
|
288
316
|
elif asset == SigmaAsset.ELEMENTS:
|
|
289
|
-
if
|
|
317
|
+
if workbooks is None:
|
|
290
318
|
raise ValueError("Missing workbooks to extract elements")
|
|
291
319
|
|
|
292
320
|
yield from self._get_all_elements(workbooks)
|
|
@@ -295,15 +323,16 @@ class SigmaClient(APIClient):
|
|
|
295
323
|
yield from self._get_all_files()
|
|
296
324
|
|
|
297
325
|
elif asset == SigmaAsset.LINEAGES:
|
|
298
|
-
if
|
|
326
|
+
if elements is None:
|
|
299
327
|
raise ValueError("Missing elements to extract lineage")
|
|
328
|
+
|
|
300
329
|
yield from self._get_all_lineages(elements)
|
|
301
330
|
|
|
302
331
|
elif asset == SigmaAsset.MEMBERS:
|
|
303
332
|
yield from self._get_all_members()
|
|
304
333
|
|
|
305
334
|
elif asset == SigmaAsset.QUERIES:
|
|
306
|
-
if
|
|
335
|
+
if workbooks is None:
|
|
307
336
|
raise ValueError("Missing workbooks to extract queries")
|
|
308
337
|
|
|
309
338
|
yield from self._get_all_queries(workbooks)
|
|
@@ -311,5 +340,13 @@ class SigmaClient(APIClient):
|
|
|
311
340
|
elif asset == SigmaAsset.WORKBOOKS:
|
|
312
341
|
yield from self._get_all_workbooks()
|
|
313
342
|
|
|
343
|
+
elif asset == SigmaAsset.WORKBOOK_SOURCES:
|
|
344
|
+
if workbooks is None:
|
|
345
|
+
raise ValueError(
|
|
346
|
+
"Missing workbooks to extract workbook sources"
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
yield from self._get_all_workbook_sources(workbooks)
|
|
350
|
+
|
|
314
351
|
else:
|
|
315
352
|
raise ValueError(f"This asset {asset} is unknown")
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
class SigmaEndpointFactory:
|
|
2
2
|
"""Wrapper class around all endpoints we're using"""
|
|
3
3
|
|
|
4
|
+
DATAMODELS = "dataModels"
|
|
4
5
|
DATASETS = "datasets"
|
|
5
6
|
FILES = "files"
|
|
6
7
|
MEMBERS = "members"
|
|
@@ -10,10 +11,22 @@ class SigmaEndpointFactory:
|
|
|
10
11
|
def authentication(cls) -> str:
|
|
11
12
|
return "v2/auth/token"
|
|
12
13
|
|
|
14
|
+
@classmethod
|
|
15
|
+
def connection_path(cls, inode_id: str) -> str:
|
|
16
|
+
return f"v2/connections/paths/{inode_id}"
|
|
17
|
+
|
|
18
|
+
@classmethod
|
|
19
|
+
def datamodels(cls) -> str:
|
|
20
|
+
return f"v2/{cls.DATAMODELS}"
|
|
21
|
+
|
|
13
22
|
@classmethod
|
|
14
23
|
def datasets(cls) -> str:
|
|
15
24
|
return f"v2/{cls.DATASETS}"
|
|
16
25
|
|
|
26
|
+
@classmethod
|
|
27
|
+
def dataset_sources(cls, dataset_id: str) -> str:
|
|
28
|
+
return f"v2/{cls.DATASETS}/{dataset_id}/sources"
|
|
29
|
+
|
|
17
30
|
@classmethod
|
|
18
31
|
def elements(cls, workbook_id: str, page_id: str) -> str:
|
|
19
32
|
return f"v2/{cls.WORKBOOKS}/{workbook_id}/pages/{page_id}/elements"
|
|
@@ -41,3 +54,7 @@ class SigmaEndpointFactory:
|
|
|
41
54
|
@classmethod
|
|
42
55
|
def workbooks(cls) -> str:
|
|
43
56
|
return f"v2/{cls.WORKBOOKS}"
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def workbook_sources(cls, workbook_id: str) -> str:
|
|
60
|
+
return f"v2/{cls.WORKBOOKS}/{workbook_id}/sources"
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import TYPE_CHECKING, Callable, Iterator
|
|
3
|
+
|
|
4
|
+
from .endpoints import SigmaEndpointFactory
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
from .client import SigmaClient
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SigmaSourcesTransformer:
|
|
13
|
+
"""Retrieves asset sources and enhances them with additional information."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, api_client: "SigmaClient"):
|
|
16
|
+
self.api_client = api_client
|
|
17
|
+
|
|
18
|
+
def _map_table_id_to_connection_path(
|
|
19
|
+
self, all_sources: list
|
|
20
|
+
) -> dict[str, dict]:
|
|
21
|
+
"""Maps a table id to its connection and path information."""
|
|
22
|
+
logger.info("Mapping table ids to connection and path information")
|
|
23
|
+
|
|
24
|
+
unique_table_ids = {
|
|
25
|
+
source["inodeId"]
|
|
26
|
+
for asset_sources in all_sources
|
|
27
|
+
for source in asset_sources["sources"]
|
|
28
|
+
if source["type"] == "table"
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
return {
|
|
32
|
+
table_id: self.api_client._get(
|
|
33
|
+
endpoint=SigmaEndpointFactory.connection_path(table_id)
|
|
34
|
+
)
|
|
35
|
+
for table_id in unique_table_ids
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
@staticmethod
|
|
39
|
+
def _enhance_table_source(source: dict, table_to_path: dict) -> dict:
|
|
40
|
+
"""
|
|
41
|
+
Combines a single table source with its connection and path information.
|
|
42
|
+
"""
|
|
43
|
+
if source["type"] != "table":
|
|
44
|
+
return source
|
|
45
|
+
|
|
46
|
+
path_info = table_to_path.get(source["inodeId"], {})
|
|
47
|
+
source["connectionId"] = path_info.get("connectionId")
|
|
48
|
+
source["path"] = path_info.get("path")
|
|
49
|
+
return source
|
|
50
|
+
|
|
51
|
+
def _transform_sources(
|
|
52
|
+
self, all_sources: list, table_to_path: dict
|
|
53
|
+
) -> Iterator[dict]:
|
|
54
|
+
"""
|
|
55
|
+
Yields all sources, with table sources being enhanced with additional information.
|
|
56
|
+
"""
|
|
57
|
+
logger.info("Merging sources with table information")
|
|
58
|
+
|
|
59
|
+
for asset_sources in all_sources:
|
|
60
|
+
enhanced_sources = [
|
|
61
|
+
self._enhance_table_source(source, table_to_path)
|
|
62
|
+
for source in asset_sources["sources"]
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
yield {
|
|
66
|
+
"asset_id": asset_sources["asset_id"],
|
|
67
|
+
"sources": enhanced_sources,
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
def _get_all_sources(
|
|
71
|
+
self, endpoint: Callable[[str], str], asset_ids: set[str]
|
|
72
|
+
) -> Iterator[dict]:
|
|
73
|
+
"""Returns transformed sources for the given assets"""
|
|
74
|
+
all_sources = []
|
|
75
|
+
|
|
76
|
+
for asset_id in asset_ids:
|
|
77
|
+
sources = self.api_client._get(endpoint=endpoint(asset_id))
|
|
78
|
+
all_sources.append({"asset_id": asset_id, "sources": sources})
|
|
79
|
+
|
|
80
|
+
table_to_path = self._map_table_id_to_connection_path(all_sources)
|
|
81
|
+
|
|
82
|
+
yield from self._transform_sources(all_sources, table_to_path)
|
|
83
|
+
|
|
84
|
+
def get_dataset_sources(self, datasets: list[dict]) -> Iterator[dict]:
|
|
85
|
+
asset_ids = {dataset["datasetId"] for dataset in datasets}
|
|
86
|
+
yield from self._get_all_sources(
|
|
87
|
+
endpoint=SigmaEndpointFactory.dataset_sources, asset_ids=asset_ids
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
def get_workbook_sources(self, workbooks: list[dict]) -> Iterator[dict]:
|
|
91
|
+
asset_ids = {workbook["workbookId"] for workbook in workbooks}
|
|
92
|
+
yield from self._get_all_sources(
|
|
93
|
+
endpoint=SigmaEndpointFactory.workbook_sources, asset_ids=asset_ids
|
|
94
|
+
)
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
from unittest.mock import Mock
|
|
2
|
+
|
|
3
|
+
from .sources_transformer import SigmaSourcesTransformer
|
|
4
|
+
|
|
5
|
+
_ALL_SOURCES = [
|
|
6
|
+
{
|
|
7
|
+
"asset_id": "asset1",
|
|
8
|
+
"sources": [
|
|
9
|
+
{"type": "dataset", "inodeId": "1234"}, # non-table source
|
|
10
|
+
{"type": "table", "inodeId": "table1"},
|
|
11
|
+
{"type": "table", "inodeId": "table2"},
|
|
12
|
+
],
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
"asset_id": "asset2",
|
|
16
|
+
"sources": [
|
|
17
|
+
{"type": "table", "inodeId": "table1"}, # repeated source
|
|
18
|
+
],
|
|
19
|
+
},
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
_TABLE_TO_PATH = {
|
|
24
|
+
"table1": {
|
|
25
|
+
"connectionId": "conn1",
|
|
26
|
+
"path": ["db", "schema", "table1"],
|
|
27
|
+
},
|
|
28
|
+
"table2": {
|
|
29
|
+
"connectionId": "conn2",
|
|
30
|
+
"path": ["db", "schema", "table2"],
|
|
31
|
+
},
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test__map_table_id_to_connection_path():
|
|
36
|
+
transformer = SigmaSourcesTransformer(api_client=Mock())
|
|
37
|
+
|
|
38
|
+
def mock_get(endpoint):
|
|
39
|
+
if "table1" in endpoint:
|
|
40
|
+
return _TABLE_TO_PATH["table1"]
|
|
41
|
+
elif "table2" in endpoint:
|
|
42
|
+
return _TABLE_TO_PATH["table2"]
|
|
43
|
+
else:
|
|
44
|
+
raise ValueError(f"Unexpected endpoint: {endpoint}")
|
|
45
|
+
|
|
46
|
+
transformer.api_client._get.side_effect = mock_get
|
|
47
|
+
|
|
48
|
+
result = transformer._map_table_id_to_connection_path(_ALL_SOURCES)
|
|
49
|
+
|
|
50
|
+
assert len(result) == 2
|
|
51
|
+
assert result["table1"] == {
|
|
52
|
+
"connectionId": "conn1",
|
|
53
|
+
"path": ["db", "schema", "table1"],
|
|
54
|
+
}
|
|
55
|
+
assert result["table2"] == {
|
|
56
|
+
"connectionId": "conn2",
|
|
57
|
+
"path": ["db", "schema", "table2"],
|
|
58
|
+
}
|
|
59
|
+
assert transformer.api_client._get.call_count == 2
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def test__transform_sources():
|
|
63
|
+
transformer = SigmaSourcesTransformer(api_client=Mock())
|
|
64
|
+
|
|
65
|
+
result = list(transformer._transform_sources(_ALL_SOURCES, _TABLE_TO_PATH))
|
|
66
|
+
|
|
67
|
+
assert len(result) == 2
|
|
68
|
+
|
|
69
|
+
asset_1_results = result[0]
|
|
70
|
+
assert len(asset_1_results["sources"]) == 3
|
|
71
|
+
actual_sources = sorted(
|
|
72
|
+
asset_1_results["sources"], key=lambda x: x["inodeId"]
|
|
73
|
+
)
|
|
74
|
+
expected_sources = [
|
|
75
|
+
{"type": "dataset", "inodeId": "1234"},
|
|
76
|
+
{
|
|
77
|
+
"type": "table",
|
|
78
|
+
"inodeId": "table1",
|
|
79
|
+
"connectionId": "conn1",
|
|
80
|
+
"path": ["db", "schema", "table1"],
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
"type": "table",
|
|
84
|
+
"inodeId": "table2",
|
|
85
|
+
"connectionId": "conn2",
|
|
86
|
+
"path": ["db", "schema", "table2"],
|
|
87
|
+
},
|
|
88
|
+
]
|
|
89
|
+
expected_sources = sorted(expected_sources, key=lambda x: x["inodeId"])
|
|
90
|
+
assert actual_sources == expected_sources
|
|
91
|
+
|
|
92
|
+
asset_2_results = result[1]
|
|
93
|
+
assert asset_2_results["asset_id"] == "asset2"
|
|
94
|
+
assert asset_2_results["sources"] == [
|
|
95
|
+
{
|
|
96
|
+
"type": "table",
|
|
97
|
+
"inodeId": "table1",
|
|
98
|
+
"connectionId": "conn1",
|
|
99
|
+
"path": ["db", "schema", "table1"],
|
|
100
|
+
}
|
|
101
|
+
]
|
|
@@ -22,14 +22,30 @@ def iterate_all_data(
|
|
|
22
22
|
) -> Iterable[tuple[SigmaAsset, Union[list, Iterator, dict]]]:
|
|
23
23
|
"""Iterate over the extracted data from Sigma"""
|
|
24
24
|
|
|
25
|
+
logger.info("Extracting DATA MODELS from API")
|
|
26
|
+
datamodels = client.fetch(SigmaAsset.DATAMODELS)
|
|
27
|
+
yield SigmaAsset.DATASETS, list(deep_serialize(datamodels))
|
|
28
|
+
|
|
25
29
|
logger.info("Extracting DATASETS from API")
|
|
26
|
-
datasets = client.fetch(SigmaAsset.DATASETS)
|
|
30
|
+
datasets = list(client.fetch(SigmaAsset.DATASETS))
|
|
27
31
|
yield SigmaAsset.DATASETS, list(deep_serialize(datasets))
|
|
28
32
|
|
|
33
|
+
logger.info("Extracting DATASET SOURCES from API")
|
|
34
|
+
dataset_sources = client.fetch(
|
|
35
|
+
SigmaAsset.DATASET_SOURCES, datasets=datasets
|
|
36
|
+
)
|
|
37
|
+
yield SigmaAsset.DATASET_SOURCES, list(deep_serialize(dataset_sources))
|
|
38
|
+
|
|
29
39
|
logger.info("Extracting WORKBOOKS from API")
|
|
30
40
|
workbooks = list(client.fetch(SigmaAsset.WORKBOOKS))
|
|
31
41
|
yield SigmaAsset.WORKBOOKS, list(deep_serialize(workbooks))
|
|
32
42
|
|
|
43
|
+
logger.info("Extracting WORKBOOK SOURCES from API")
|
|
44
|
+
workbook_sources = client.fetch(
|
|
45
|
+
SigmaAsset.WORKBOOK_SOURCES, workbooks=workbooks
|
|
46
|
+
)
|
|
47
|
+
yield SigmaAsset.WORKBOOKS, list(deep_serialize(workbook_sources))
|
|
48
|
+
|
|
33
49
|
logger.info("Extracting FILES from API")
|
|
34
50
|
files = client.fetch(SigmaAsset.FILES)
|
|
35
51
|
yield SigmaAsset.FILES, list(deep_serialize(files))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: castor-extractor
|
|
3
|
-
Version: 0.24.
|
|
3
|
+
Version: 0.24.38
|
|
4
4
|
Summary: Extract your metadata assets.
|
|
5
5
|
Home-page: https://www.castordoc.com/
|
|
6
6
|
License: EULA
|
|
@@ -215,6 +215,14 @@ For any questions or bug report, contact us at [support@coalesce.io](mailto:supp
|
|
|
215
215
|
|
|
216
216
|
# Changelog
|
|
217
217
|
|
|
218
|
+
## 0.24.38 - 2025-08-07
|
|
219
|
+
|
|
220
|
+
* Uploader: Support US and EU zones
|
|
221
|
+
|
|
222
|
+
## 0.24.37 - 2025-08-06
|
|
223
|
+
|
|
224
|
+
* Sigma: extract data models, dataset sources and workbook sources
|
|
225
|
+
|
|
218
226
|
## 0.24.36 - 2025-08-04
|
|
219
227
|
|
|
220
228
|
* Sigma:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=cdsC0cY-q3t1K8a-kXhK3OY6y-yrF8uICKb8OqJ3SJo,19185
|
|
2
2
|
Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
|
|
3
3
|
DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
|
|
4
4
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
@@ -29,7 +29,7 @@ castor_extractor/commands/extract_strategy.py,sha256=Q-pUymatPrBFGXobhyUPzFph0-t
|
|
|
29
29
|
castor_extractor/commands/extract_tableau.py,sha256=LNtI29LbVk1vp4RNrn89GmdW6R_7QBYunRmkowDhbco,1982
|
|
30
30
|
castor_extractor/commands/extract_thoughtspot.py,sha256=caAYJlH-vK7u5IUB6OKXxcaWfLgc7d_XqnFDWK6YNS4,639
|
|
31
31
|
castor_extractor/commands/file_check.py,sha256=TJx76Ymd0QCECmq35zRJMkPE8DJtSInB28MuSXWk8Ao,2644
|
|
32
|
-
castor_extractor/commands/upload.py,sha256=
|
|
32
|
+
castor_extractor/commands/upload.py,sha256=sqpEF_qqCNvT_niIrM6jPhzLaFVjtYwpc2iZw540F20,1633
|
|
33
33
|
castor_extractor/file_checker/__init__.py,sha256=OSt6YLhUT42U_Cp3LCLHMVruwDkksL75Ij13X2UPnVk,119
|
|
34
34
|
castor_extractor/file_checker/column.py,sha256=6bJhcW1snYwgHKkqlS0Ak7XLHZr4YBwO46JCIlnQNKg,3086
|
|
35
35
|
castor_extractor/file_checker/column_test.py,sha256=1j8PxvmvmJgpd-mk30iMYOme32ovPSIn4yCXywFoXrg,1935
|
|
@@ -86,13 +86,14 @@ castor_extractor/transformation/dbt/client.py,sha256=BIue1DNAn2b7kHeiXBkGNosq8jZ
|
|
|
86
86
|
castor_extractor/transformation/dbt/client_test.py,sha256=RLL7y_pLDv2QBM03qBht8yYEooeT_woRADHcb8vgBQ4,4535
|
|
87
87
|
castor_extractor/transformation/dbt/credentials.py,sha256=pGq7GqFQTw9TwN1DXSHC-0yJ2H6B_wMAbHyQTLqJVh0,543
|
|
88
88
|
castor_extractor/types.py,sha256=nHel2hv6NoHmdpOX_heEfO2-DnZPoYA2x0eJdbFvT0s,1276
|
|
89
|
-
castor_extractor/uploader/__init__.py,sha256=
|
|
90
|
-
castor_extractor/uploader/constant.py,sha256=
|
|
89
|
+
castor_extractor/uploader/__init__.py,sha256=xe3QHmHb35TILEhr7__nI_0t0tDolpQuujUyd84YcjI,111
|
|
90
|
+
castor_extractor/uploader/constant.py,sha256=ZmQtFx9nnR0GSLZ9k41upzV3ub4FJCUIyojIEVh-qIg,956
|
|
91
|
+
castor_extractor/uploader/enums.py,sha256=s5KVeBZWRDbDu-qOnrJhTSkSqzh0gxv0W1Z4cUsXfb8,109
|
|
91
92
|
castor_extractor/uploader/env.py,sha256=5KiWHV-WTHfF68T_vzI-ypKAxzy9b9fnz2y4T3lH6QY,871
|
|
92
93
|
castor_extractor/uploader/env_test.py,sha256=ClCWWtwd2N-5ClIDUxVMeKkWfhhOTxpppsXUDmdjxSg,472
|
|
93
|
-
castor_extractor/uploader/settings.py,sha256=
|
|
94
|
-
castor_extractor/uploader/upload.py,sha256=
|
|
95
|
-
castor_extractor/uploader/upload_test.py,sha256=
|
|
94
|
+
castor_extractor/uploader/settings.py,sha256=sUZpg9eHemM99DMrBW8bnlMuoTmCmLCKq-D0OCuQbGA,649
|
|
95
|
+
castor_extractor/uploader/upload.py,sha256=b2g9vWWjXWbt8Ms7brTc7OK_I7Z-1VSibNbppGoB2oQ,4764
|
|
96
|
+
castor_extractor/uploader/upload_test.py,sha256=UgN7TnT9Chn6KVzRcAX0Tuvp7-tps3ugxGitlgb9TSY,462
|
|
96
97
|
castor_extractor/uploader/utils.py,sha256=otAaySj5aeem6f0CTd0Te6ioJ6uP2J1p348j-SdIwDI,802
|
|
97
98
|
castor_extractor/utils/__init__.py,sha256=z_BdKTUyuug3I5AzCuSGrAVskfLax4_olfORIjhZw_M,1691
|
|
98
99
|
castor_extractor/utils/argument_parser.py,sha256=S4EcIh3wNDjs3fOrQnttCcPsAmG8m_Txl7xvEh0Q37s,283
|
|
@@ -269,14 +270,16 @@ castor_extractor/visualization/salesforce_reporting/client/rest.py,sha256=AqL1DT
|
|
|
269
270
|
castor_extractor/visualization/salesforce_reporting/client/soql.py,sha256=ytZnX6zE-NoS_Kz12KghMcCM4ukPwhMj6U0rQZ_8Isk,1621
|
|
270
271
|
castor_extractor/visualization/salesforce_reporting/extract.py,sha256=ScStilebLGf4HDTFqhVTQAvv_OrKxc8waycfBKdsVAc,1359
|
|
271
272
|
castor_extractor/visualization/sigma/__init__.py,sha256=GINql4yJLtjfOJgjHaWNpE13cMtnKNytiFRomwav27Q,114
|
|
272
|
-
castor_extractor/visualization/sigma/assets.py,sha256=
|
|
273
|
+
castor_extractor/visualization/sigma/assets.py,sha256=uKGKDaeY1ejc7XGh4eFaNp2ygG7hgca132xsX4eCwKQ,380
|
|
273
274
|
castor_extractor/visualization/sigma/client/__init__.py,sha256=YQv06FBBQHvBMFg_tN0nUcmUp2NCL2s-eFTXG8rXaBg,74
|
|
274
|
-
castor_extractor/visualization/sigma/client/client.py,sha256=
|
|
275
|
+
castor_extractor/visualization/sigma/client/client.py,sha256=VU0BHlug3tCpGA1je0PjEy4hU4TKhCH9UUGi8LRmNy8,11422
|
|
275
276
|
castor_extractor/visualization/sigma/client/client_test.py,sha256=ae0ZOvKutCm44jnrJ-0_A5Y6ZGyDkMf9Ml3eEP8dNkY,581
|
|
276
277
|
castor_extractor/visualization/sigma/client/credentials.py,sha256=XddAuQSmCKpxJ70TQgRnOj0vMPYVtiStk_lMMQ1AiNM,693
|
|
277
|
-
castor_extractor/visualization/sigma/client/endpoints.py,sha256=
|
|
278
|
+
castor_extractor/visualization/sigma/client/endpoints.py,sha256=i7KTKnl2Os6752CdtJl0vPSC_Z6JxmacodV_saOnce0,1662
|
|
278
279
|
castor_extractor/visualization/sigma/client/pagination.py,sha256=2bFA7GiBUUasFtHJKA90516d283p7Pg50-4zw6Fwt8I,726
|
|
279
|
-
castor_extractor/visualization/sigma/
|
|
280
|
+
castor_extractor/visualization/sigma/client/sources_transformer.py,sha256=mRupzxjtjDqELIouHF0egBkgslDmn5Y4uqO_sbUGCNs,3244
|
|
281
|
+
castor_extractor/visualization/sigma/client/sources_transformer_test.py,sha256=06yUHXyv65amXLKXhix6K3kkVc1kpBqSjIYcxbyMI4Y,2766
|
|
282
|
+
castor_extractor/visualization/sigma/extract.py,sha256=poTh70Xm2D6BwbdGApLkjXy6-t4iZnOoMB5DPfaTLEI,2929
|
|
280
283
|
castor_extractor/visualization/strategy/__init__.py,sha256=HOMv4JxqF5ZmViWi-pDE-PSXJRLTdXal_jtpHG_rlR8,123
|
|
281
284
|
castor_extractor/visualization/strategy/assets.py,sha256=yFXF_dX01patC0HQ1eU7Jo_4DZ4m6IJEg0uCB71tMoI,480
|
|
282
285
|
castor_extractor/visualization/strategy/client/__init__.py,sha256=XWP0yF5j6JefDJkDfX-RSJn3HF2ceQ0Yx1PLCfB3BBo,80
|
|
@@ -428,8 +431,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=4RgeSkHDWTWRyU2iLx
|
|
|
428
431
|
castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
|
|
429
432
|
castor_extractor/warehouse/sqlserver/query.py,sha256=7sW8cK3JzxPt6faTJ7e4lk9tE4fo_AeCymI-LqsSols,1276
|
|
430
433
|
castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
|
|
431
|
-
castor_extractor-0.24.
|
|
432
|
-
castor_extractor-0.24.
|
|
433
|
-
castor_extractor-0.24.
|
|
434
|
-
castor_extractor-0.24.
|
|
435
|
-
castor_extractor-0.24.
|
|
434
|
+
castor_extractor-0.24.38.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
435
|
+
castor_extractor-0.24.38.dist-info/METADATA,sha256=iCWUVbgDFS721szJ8kUGMA58Va3Roq3WmyGinZgnHMw,26638
|
|
436
|
+
castor_extractor-0.24.38.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
437
|
+
castor_extractor-0.24.38.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
|
|
438
|
+
castor_extractor-0.24.38.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|