castor-extractor 0.24.35__py3-none-any.whl → 0.24.38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.24.38 - 2025-08-07
4
+
5
+ * Uploader: Support US and EU zones
6
+
7
+ ## 0.24.37 - 2025-08-06
8
+
9
+ * Sigma: extract data models, dataset sources and workbook sources
10
+
11
+ ## 0.24.36 - 2025-08-04
12
+
13
+ * Sigma:
14
+ * Refresh token before lineage extraction
15
+ * Disregard 403 errors during lineage extraction
16
+
3
17
  ## 0.24.35 - 2025-07-29
4
18
 
5
19
  * Coalesce - Fix pagination issue
@@ -3,6 +3,7 @@ from argparse import ArgumentParser
3
3
 
4
4
  from castor_extractor.uploader import ( # type: ignore
5
5
  FileType,
6
+ Zone,
6
7
  upload_any,
7
8
  )
8
9
  from castor_extractor.utils import parse_filled_arguments # type: ignore
@@ -40,6 +41,15 @@ def _args() -> ArgumentParser:
40
41
  ),
41
42
  choices=supported_file_type,
42
43
  )
44
+ supported_zones = [zone.value for zone in Zone]
45
+ parser.add_argument(
46
+ "-z",
47
+ "--zone",
48
+ help="geographic zone to upload, currently supported are {}, defaults to EU".format(
49
+ supported_zones,
50
+ ),
51
+ choices=supported_zones,
52
+ )
43
53
  return parser
44
54
 
45
55
 
@@ -1,2 +1,3 @@
1
1
  from .constant import FileType
2
+ from .enums import Zone
2
3
  from .upload import upload, upload_any, upload_manifest
@@ -1,9 +1,13 @@
1
1
  from enum import Enum
2
2
 
3
3
  from ..utils import RetryStrategy
4
+ from .enums import Zone
4
5
 
5
6
  # url of the gcs proxy
6
- INGEST_URL = "https://ingest.castordoc.com"
7
+ INGEST_URLS = {
8
+ Zone.EU: "https://ingest.castordoc.com",
9
+ Zone.US: "https://ingest.us.castordoc.com",
10
+ }
7
11
 
8
12
  RETRY_BASE_MS = 10_000
9
13
  RETRY_JITTER_MS = 1_000
@@ -0,0 +1,8 @@
1
+ from enum import Enum
2
+
3
+
4
+ class Zone(Enum):
5
+ """Geographic cluster location"""
6
+
7
+ EU = "EU"
8
+ US = "US"
@@ -4,6 +4,7 @@ from pydantic import UUID4, Field
4
4
  from pydantic_settings import BaseSettings, SettingsConfigDict
5
5
 
6
6
  from .constant import FileType
7
+ from .enums import Zone
7
8
 
8
9
  UPLOADER_ENV_PREFIX = "CASTOR_UPLOADER_"
9
10
 
@@ -22,3 +23,4 @@ class UploaderSettings(BaseSettings):
22
23
  file_type: FileType
23
24
  source_id: UUID4
24
25
  token: str = Field(repr=False)
26
+ zone: Optional[Zone] = Zone.EU
@@ -10,13 +10,14 @@ import requests
10
10
 
11
11
  from ..utils.retry import retry
12
12
  from .constant import (
13
- INGEST_URL,
13
+ INGEST_URLS,
14
14
  PATH_TEMPLATES,
15
15
  RETRY_BASE_MS,
16
16
  RETRY_JITTER_MS,
17
17
  RETRY_STRATEGY,
18
18
  FileType,
19
19
  )
20
+ from .enums import Zone
20
21
  from .env import get_blob_env
21
22
  from .settings import UploaderSettings
22
23
  from .utils import iter_files
@@ -33,6 +34,7 @@ def _path_and_url(
33
34
  source_id: UUID,
34
35
  file_type: FileType,
35
36
  file_path: str,
37
+ zone: Zone,
36
38
  ) -> tuple[str, str]:
37
39
  now = datetime.utcnow()
38
40
  timestamp = int(now.timestamp())
@@ -44,7 +46,7 @@ def _path_and_url(
44
46
  filename=filename,
45
47
  )
46
48
 
47
- url = f"{INGEST_URL}/{path}"
49
+ url = f"{INGEST_URLS[zone]}/{path}"
48
50
 
49
51
  return path, url
50
52
 
@@ -61,13 +63,16 @@ def _upload(
61
63
  source_id: UUID,
62
64
  file_path: str,
63
65
  file_type: FileType,
66
+ zone: Optional[Zone] = Zone.EU,
64
67
  ) -> None:
65
68
  """
66
69
  Upload the given file to Google Cloud Storage (GCS)
67
70
  - Don't call GCS API directly
68
71
  - Call the ingestion proxy which handles authorisation and uploading
69
72
  """
70
- path, url = _path_and_url(source_id, file_type, file_path)
73
+ if not zone:
74
+ zone = Zone.EU
75
+ path, url = _path_and_url(source_id, file_type, file_path, zone)
71
76
  headers = _headers(token)
72
77
  timeout, max_retries = get_blob_env()
73
78
 
@@ -97,6 +102,7 @@ def _upload(
97
102
  def upload_manifest(
98
103
  token: str,
99
104
  source_id: UUID,
105
+ zone: Optional[Zone],
100
106
  file_path: Optional[str] = None,
101
107
  ) -> None:
102
108
  """
@@ -106,13 +112,20 @@ def upload_manifest(
106
112
  """
107
113
  if not file_path:
108
114
  raise ValueError("file path is needed to upload a manifest")
109
- _upload(token, source_id, file_path, FileType.DBT)
115
+ _upload(
116
+ token=token,
117
+ source_id=source_id,
118
+ file_path=file_path,
119
+ file_type=FileType.DBT,
120
+ zone=zone,
121
+ )
110
122
 
111
123
 
112
124
  def upload(
113
125
  token: str,
114
126
  source_id: UUID,
115
127
  file_type: FileType,
128
+ zone: Optional[Zone],
116
129
  file_path: Optional[str] = None,
117
130
  directory_path: Optional[str] = None,
118
131
  ) -> None:
@@ -133,7 +146,13 @@ def upload(
133
146
  raise ValueError(message)
134
147
 
135
148
  for file_ in files:
136
- _upload(token, source_id, file_, file_type)
149
+ _upload(
150
+ token=token,
151
+ source_id=source_id,
152
+ file_path=file_,
153
+ file_type=file_type,
154
+ zone=zone,
155
+ )
137
156
 
138
157
 
139
158
  def upload_any(**kwargs) -> None:
@@ -156,6 +175,7 @@ def upload_any(**kwargs) -> None:
156
175
  token=settings.token,
157
176
  source_id=settings.source_id,
158
177
  file_path=settings.file_path,
178
+ zone=settings.zone,
159
179
  )
160
180
  return None
161
181
 
@@ -165,4 +185,5 @@ def upload_any(**kwargs) -> None:
165
185
  file_type=file_type,
166
186
  file_path=settings.file_path,
167
187
  directory_path=settings.directory_path,
188
+ zone=settings.zone,
168
189
  )
@@ -1,6 +1,7 @@
1
1
  from uuid import UUID
2
2
 
3
- from .constant import INGEST_URL, FileType
3
+ from .constant import INGEST_URLS, FileType
4
+ from .enums import Zone
4
5
  from .upload import _path_and_url
5
6
 
6
7
 
@@ -8,7 +9,8 @@ def test__path():
8
9
  source_id = UUID("399a8b22-3187-11ec-8d3d-0242ac130003")
9
10
  file_type = FileType.VIZ
10
11
  file_path = "filename"
12
+ zone = Zone.EU
11
13
 
12
- path, url = _path_and_url(source_id, file_type, file_path)
14
+ path, url = _path_and_url(source_id, file_type, file_path, zone)
13
15
  assert path == f"visualization-{source_id}/{file_path}"
14
- assert url == f"{INGEST_URL}/{path}"
16
+ assert url == f"{INGEST_URLS[Zone.EU]}/{path}"
@@ -4,10 +4,13 @@ from ...types import ExternalAsset
4
4
  class SigmaAsset(ExternalAsset):
5
5
  """Sigma assets"""
6
6
 
7
+ DATAMODELS = "datamodels"
7
8
  DATASETS = "datasets"
9
+ DATASET_SOURCES = "dataset_sources"
8
10
  ELEMENTS = "elements"
9
11
  FILES = "files"
10
12
  LINEAGES = "lineages"
11
13
  MEMBERS = "members"
12
14
  QUERIES = "queries"
13
15
  WORKBOOKS = "workbooks"
16
+ WORKBOOK_SOURCES = "workbook_sources"
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  from collections.abc import Iterator
2
3
  from concurrent.futures import ThreadPoolExecutor
3
4
  from functools import partial
@@ -24,6 +25,9 @@ from .pagination import (
24
25
  SIGMA_QUERIES_PAGINATION_LIMIT,
25
26
  SigmaPagination,
26
27
  )
28
+ from .sources_transformer import SigmaSourcesTransformer
29
+
30
+ logger = logging.getLogger(__name__)
27
31
 
28
32
  _CONTENT_TYPE = "application/x-www-form-urlencoded"
29
33
 
@@ -54,6 +58,13 @@ SIGMA_SAFE_MODE = RequestSafeMode(
54
58
  max_errors=_VOLUME_IGNORED,
55
59
  status_codes=_IGNORED_ERROR_CODES,
56
60
  )
61
+ SIGMA_SAFE_MODE_LINEAGE = RequestSafeMode(
62
+ max_errors=_VOLUME_IGNORED,
63
+ status_codes=(
64
+ *_IGNORED_ERROR_CODES,
65
+ HTTPStatus.FORBIDDEN,
66
+ ),
67
+ )
57
68
  _THREADS_LINEAGE = 10 # empirically found; hit the rate limit with 20 workers
58
69
  _RETRY_NUMBER = 1
59
70
  _RETRY_BASE_MS = 60_000
@@ -128,6 +139,12 @@ class SigmaClient(APIClient):
128
139
  params={"limit": limit},
129
140
  )
130
141
 
142
+ def _get_all_datamodels(self) -> Iterator[dict]:
143
+ request = self._get_paginated(
144
+ endpoint=SigmaEndpointFactory.datamodels()
145
+ )
146
+ yield from fetch_all_pages(request, SigmaPagination)
147
+
131
148
  def _get_all_datasets(self) -> Iterator[dict]:
132
149
  request = self._get_paginated(endpoint=SigmaEndpointFactory.datasets())
133
150
  yield from fetch_all_pages(request, SigmaPagination)
@@ -210,18 +227,35 @@ class SigmaClient(APIClient):
210
227
  return contexts
211
228
 
212
229
  def _get_all_lineages(self, elements: list[dict]) -> Iterator[dict]:
230
+ """
231
+ The safe mode is temporarily modified to include 403 errors.
232
+
233
+ Due to concurrency issues, we force a refresh of the token in hopes that
234
+ the lineage extraction takes less than the token expiration time of
235
+ 1 hour.
236
+ """
237
+ safe_mode = self._safe_mode
238
+ self._safe_mode = SIGMA_SAFE_MODE_LINEAGE
239
+
213
240
  lineage_context = self._lineage_context(elements)
214
241
 
242
+ self._auth.refresh_token()
243
+
215
244
  with ThreadPoolExecutor(max_workers=_THREADS_LINEAGE) as executor:
216
245
  results = executor.map(self._get_lineage, lineage_context)
217
246
 
218
247
  for lineage in results:
248
+ if not lineage.lineage:
249
+ continue
250
+
219
251
  yield {
220
252
  **lineage.lineage,
221
253
  "workbook_id": lineage.context.workbook_id,
222
254
  "element_id": lineage.context.element_id,
223
255
  }
224
256
 
257
+ self._safe_mode = safe_mode
258
+
225
259
  @staticmethod
226
260
  def _yield_deduplicated_queries(
227
261
  queries: Iterable[dict], workbook_id: str
@@ -251,18 +285,36 @@ class SigmaClient(APIClient):
251
285
 
252
286
  yield from self._yield_deduplicated_queries(queries, workbook_id)
253
287
 
288
+ def _get_all_dataset_sources(self, datasets: list[dict]) -> Iterator[dict]:
289
+ yield from SigmaSourcesTransformer(self).get_dataset_sources(datasets)
290
+
291
+ def _get_all_workbook_sources(
292
+ self, workbooks: list[dict]
293
+ ) -> Iterator[dict]:
294
+ yield from SigmaSourcesTransformer(self).get_workbook_sources(workbooks)
295
+
254
296
  def fetch(
255
297
  self,
256
298
  asset: SigmaAsset,
257
- workbooks: Optional[list[dict]] = None,
299
+ datasets: Optional[list[dict]] = None,
258
300
  elements: Optional[list[dict]] = None,
301
+ workbooks: Optional[list[dict]] = None,
259
302
  ) -> Iterator[dict]:
260
303
  """Returns the needed metadata for the queried asset"""
261
- if asset == SigmaAsset.DATASETS:
304
+ if asset == SigmaAsset.DATAMODELS:
305
+ yield from self._get_all_datamodels()
306
+
307
+ elif asset == SigmaAsset.DATASETS:
262
308
  yield from self._get_all_datasets()
263
309
 
310
+ elif asset == SigmaAsset.DATASET_SOURCES:
311
+ if datasets is None:
312
+ raise ValueError("Missing datasets to extract dataset sources")
313
+
314
+ yield from self._get_all_dataset_sources(datasets)
315
+
264
316
  elif asset == SigmaAsset.ELEMENTS:
265
- if not workbooks:
317
+ if workbooks is None:
266
318
  raise ValueError("Missing workbooks to extract elements")
267
319
 
268
320
  yield from self._get_all_elements(workbooks)
@@ -271,15 +323,16 @@ class SigmaClient(APIClient):
271
323
  yield from self._get_all_files()
272
324
 
273
325
  elif asset == SigmaAsset.LINEAGES:
274
- if not elements:
326
+ if elements is None:
275
327
  raise ValueError("Missing elements to extract lineage")
328
+
276
329
  yield from self._get_all_lineages(elements)
277
330
 
278
331
  elif asset == SigmaAsset.MEMBERS:
279
332
  yield from self._get_all_members()
280
333
 
281
334
  elif asset == SigmaAsset.QUERIES:
282
- if not workbooks:
335
+ if workbooks is None:
283
336
  raise ValueError("Missing workbooks to extract queries")
284
337
 
285
338
  yield from self._get_all_queries(workbooks)
@@ -287,5 +340,13 @@ class SigmaClient(APIClient):
287
340
  elif asset == SigmaAsset.WORKBOOKS:
288
341
  yield from self._get_all_workbooks()
289
342
 
343
+ elif asset == SigmaAsset.WORKBOOK_SOURCES:
344
+ if workbooks is None:
345
+ raise ValueError(
346
+ "Missing workbooks to extract workbook sources"
347
+ )
348
+
349
+ yield from self._get_all_workbook_sources(workbooks)
350
+
290
351
  else:
291
352
  raise ValueError(f"This asset {asset} is unknown")
@@ -1,6 +1,7 @@
1
1
  class SigmaEndpointFactory:
2
2
  """Wrapper class around all endpoints we're using"""
3
3
 
4
+ DATAMODELS = "dataModels"
4
5
  DATASETS = "datasets"
5
6
  FILES = "files"
6
7
  MEMBERS = "members"
@@ -10,10 +11,22 @@ class SigmaEndpointFactory:
10
11
  def authentication(cls) -> str:
11
12
  return "v2/auth/token"
12
13
 
14
+ @classmethod
15
+ def connection_path(cls, inode_id: str) -> str:
16
+ return f"v2/connections/paths/{inode_id}"
17
+
18
+ @classmethod
19
+ def datamodels(cls) -> str:
20
+ return f"v2/{cls.DATAMODELS}"
21
+
13
22
  @classmethod
14
23
  def datasets(cls) -> str:
15
24
  return f"v2/{cls.DATASETS}"
16
25
 
26
+ @classmethod
27
+ def dataset_sources(cls, dataset_id: str) -> str:
28
+ return f"v2/{cls.DATASETS}/{dataset_id}/sources"
29
+
17
30
  @classmethod
18
31
  def elements(cls, workbook_id: str, page_id: str) -> str:
19
32
  return f"v2/{cls.WORKBOOKS}/{workbook_id}/pages/{page_id}/elements"
@@ -41,3 +54,7 @@ class SigmaEndpointFactory:
41
54
  @classmethod
42
55
  def workbooks(cls) -> str:
43
56
  return f"v2/{cls.WORKBOOKS}"
57
+
58
+ @classmethod
59
+ def workbook_sources(cls, workbook_id: str) -> str:
60
+ return f"v2/{cls.WORKBOOKS}/{workbook_id}/sources"
@@ -0,0 +1,94 @@
1
+ import logging
2
+ from typing import TYPE_CHECKING, Callable, Iterator
3
+
4
+ from .endpoints import SigmaEndpointFactory
5
+
6
+ if TYPE_CHECKING:
7
+ from .client import SigmaClient
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class SigmaSourcesTransformer:
13
+ """Retrieves asset sources and enhances them with additional information."""
14
+
15
+ def __init__(self, api_client: "SigmaClient"):
16
+ self.api_client = api_client
17
+
18
+ def _map_table_id_to_connection_path(
19
+ self, all_sources: list
20
+ ) -> dict[str, dict]:
21
+ """Maps a table id to its connection and path information."""
22
+ logger.info("Mapping table ids to connection and path information")
23
+
24
+ unique_table_ids = {
25
+ source["inodeId"]
26
+ for asset_sources in all_sources
27
+ for source in asset_sources["sources"]
28
+ if source["type"] == "table"
29
+ }
30
+
31
+ return {
32
+ table_id: self.api_client._get(
33
+ endpoint=SigmaEndpointFactory.connection_path(table_id)
34
+ )
35
+ for table_id in unique_table_ids
36
+ }
37
+
38
+ @staticmethod
39
+ def _enhance_table_source(source: dict, table_to_path: dict) -> dict:
40
+ """
41
+ Combines a single table source with its connection and path information.
42
+ """
43
+ if source["type"] != "table":
44
+ return source
45
+
46
+ path_info = table_to_path.get(source["inodeId"], {})
47
+ source["connectionId"] = path_info.get("connectionId")
48
+ source["path"] = path_info.get("path")
49
+ return source
50
+
51
+ def _transform_sources(
52
+ self, all_sources: list, table_to_path: dict
53
+ ) -> Iterator[dict]:
54
+ """
55
+ Yields all sources, with table sources being enhanced with additional information.
56
+ """
57
+ logger.info("Merging sources with table information")
58
+
59
+ for asset_sources in all_sources:
60
+ enhanced_sources = [
61
+ self._enhance_table_source(source, table_to_path)
62
+ for source in asset_sources["sources"]
63
+ ]
64
+
65
+ yield {
66
+ "asset_id": asset_sources["asset_id"],
67
+ "sources": enhanced_sources,
68
+ }
69
+
70
+ def _get_all_sources(
71
+ self, endpoint: Callable[[str], str], asset_ids: set[str]
72
+ ) -> Iterator[dict]:
73
+ """Returns transformed sources for the given assets"""
74
+ all_sources = []
75
+
76
+ for asset_id in asset_ids:
77
+ sources = self.api_client._get(endpoint=endpoint(asset_id))
78
+ all_sources.append({"asset_id": asset_id, "sources": sources})
79
+
80
+ table_to_path = self._map_table_id_to_connection_path(all_sources)
81
+
82
+ yield from self._transform_sources(all_sources, table_to_path)
83
+
84
+ def get_dataset_sources(self, datasets: list[dict]) -> Iterator[dict]:
85
+ asset_ids = {dataset["datasetId"] for dataset in datasets}
86
+ yield from self._get_all_sources(
87
+ endpoint=SigmaEndpointFactory.dataset_sources, asset_ids=asset_ids
88
+ )
89
+
90
+ def get_workbook_sources(self, workbooks: list[dict]) -> Iterator[dict]:
91
+ asset_ids = {workbook["workbookId"] for workbook in workbooks}
92
+ yield from self._get_all_sources(
93
+ endpoint=SigmaEndpointFactory.workbook_sources, asset_ids=asset_ids
94
+ )
@@ -0,0 +1,101 @@
1
+ from unittest.mock import Mock
2
+
3
+ from .sources_transformer import SigmaSourcesTransformer
4
+
5
+ _ALL_SOURCES = [
6
+ {
7
+ "asset_id": "asset1",
8
+ "sources": [
9
+ {"type": "dataset", "inodeId": "1234"}, # non-table source
10
+ {"type": "table", "inodeId": "table1"},
11
+ {"type": "table", "inodeId": "table2"},
12
+ ],
13
+ },
14
+ {
15
+ "asset_id": "asset2",
16
+ "sources": [
17
+ {"type": "table", "inodeId": "table1"}, # repeated source
18
+ ],
19
+ },
20
+ ]
21
+
22
+
23
+ _TABLE_TO_PATH = {
24
+ "table1": {
25
+ "connectionId": "conn1",
26
+ "path": ["db", "schema", "table1"],
27
+ },
28
+ "table2": {
29
+ "connectionId": "conn2",
30
+ "path": ["db", "schema", "table2"],
31
+ },
32
+ }
33
+
34
+
35
+ def test__map_table_id_to_connection_path():
36
+ transformer = SigmaSourcesTransformer(api_client=Mock())
37
+
38
+ def mock_get(endpoint):
39
+ if "table1" in endpoint:
40
+ return _TABLE_TO_PATH["table1"]
41
+ elif "table2" in endpoint:
42
+ return _TABLE_TO_PATH["table2"]
43
+ else:
44
+ raise ValueError(f"Unexpected endpoint: {endpoint}")
45
+
46
+ transformer.api_client._get.side_effect = mock_get
47
+
48
+ result = transformer._map_table_id_to_connection_path(_ALL_SOURCES)
49
+
50
+ assert len(result) == 2
51
+ assert result["table1"] == {
52
+ "connectionId": "conn1",
53
+ "path": ["db", "schema", "table1"],
54
+ }
55
+ assert result["table2"] == {
56
+ "connectionId": "conn2",
57
+ "path": ["db", "schema", "table2"],
58
+ }
59
+ assert transformer.api_client._get.call_count == 2
60
+
61
+
62
+ def test__transform_sources():
63
+ transformer = SigmaSourcesTransformer(api_client=Mock())
64
+
65
+ result = list(transformer._transform_sources(_ALL_SOURCES, _TABLE_TO_PATH))
66
+
67
+ assert len(result) == 2
68
+
69
+ asset_1_results = result[0]
70
+ assert len(asset_1_results["sources"]) == 3
71
+ actual_sources = sorted(
72
+ asset_1_results["sources"], key=lambda x: x["inodeId"]
73
+ )
74
+ expected_sources = [
75
+ {"type": "dataset", "inodeId": "1234"},
76
+ {
77
+ "type": "table",
78
+ "inodeId": "table1",
79
+ "connectionId": "conn1",
80
+ "path": ["db", "schema", "table1"],
81
+ },
82
+ {
83
+ "type": "table",
84
+ "inodeId": "table2",
85
+ "connectionId": "conn2",
86
+ "path": ["db", "schema", "table2"],
87
+ },
88
+ ]
89
+ expected_sources = sorted(expected_sources, key=lambda x: x["inodeId"])
90
+ assert actual_sources == expected_sources
91
+
92
+ asset_2_results = result[1]
93
+ assert asset_2_results["asset_id"] == "asset2"
94
+ assert asset_2_results["sources"] == [
95
+ {
96
+ "type": "table",
97
+ "inodeId": "table1",
98
+ "connectionId": "conn1",
99
+ "path": ["db", "schema", "table1"],
100
+ }
101
+ ]
@@ -22,14 +22,30 @@ def iterate_all_data(
22
22
  ) -> Iterable[tuple[SigmaAsset, Union[list, Iterator, dict]]]:
23
23
  """Iterate over the extracted data from Sigma"""
24
24
 
25
+ logger.info("Extracting DATA MODELS from API")
26
+ datamodels = client.fetch(SigmaAsset.DATAMODELS)
27
+ yield SigmaAsset.DATASETS, list(deep_serialize(datamodels))
28
+
25
29
  logger.info("Extracting DATASETS from API")
26
- datasets = client.fetch(SigmaAsset.DATASETS)
30
+ datasets = list(client.fetch(SigmaAsset.DATASETS))
27
31
  yield SigmaAsset.DATASETS, list(deep_serialize(datasets))
28
32
 
33
+ logger.info("Extracting DATASET SOURCES from API")
34
+ dataset_sources = client.fetch(
35
+ SigmaAsset.DATASET_SOURCES, datasets=datasets
36
+ )
37
+ yield SigmaAsset.DATASET_SOURCES, list(deep_serialize(dataset_sources))
38
+
29
39
  logger.info("Extracting WORKBOOKS from API")
30
40
  workbooks = list(client.fetch(SigmaAsset.WORKBOOKS))
31
41
  yield SigmaAsset.WORKBOOKS, list(deep_serialize(workbooks))
32
42
 
43
+ logger.info("Extracting WORKBOOK SOURCES from API")
44
+ workbook_sources = client.fetch(
45
+ SigmaAsset.WORKBOOK_SOURCES, workbooks=workbooks
46
+ )
47
+ yield SigmaAsset.WORKBOOKS, list(deep_serialize(workbook_sources))
48
+
33
49
  logger.info("Extracting FILES from API")
34
50
  files = client.fetch(SigmaAsset.FILES)
35
51
  yield SigmaAsset.FILES, list(deep_serialize(files))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: castor-extractor
3
- Version: 0.24.35
3
+ Version: 0.24.38
4
4
  Summary: Extract your metadata assets.
5
5
  Home-page: https://www.castordoc.com/
6
6
  License: EULA
@@ -215,6 +215,20 @@ For any questions or bug report, contact us at [support@coalesce.io](mailto:supp
215
215
 
216
216
  # Changelog
217
217
 
218
+ ## 0.24.38 - 2025-08-07
219
+
220
+ * Uploader: Support US and EU zones
221
+
222
+ ## 0.24.37 - 2025-08-06
223
+
224
+ * Sigma: extract data models, dataset sources and workbook sources
225
+
226
+ ## 0.24.36 - 2025-08-04
227
+
228
+ * Sigma:
229
+ * Refresh token before lineage extraction
230
+ * Disregard 403 errors during lineage extraction
231
+
218
232
  ## 0.24.35 - 2025-07-29
219
233
 
220
234
  * Coalesce - Fix pagination issue
@@ -1,4 +1,4 @@
1
- CHANGELOG.md,sha256=1S9O_c1LH8T4P78akRxlFS8Tv0i9Jgswy7V9zvd_UQw,18900
1
+ CHANGELOG.md,sha256=cdsC0cY-q3t1K8a-kXhK3OY6y-yrF8uICKb8OqJ3SJo,19185
2
2
  Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
3
3
  DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
4
4
  LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
@@ -29,7 +29,7 @@ castor_extractor/commands/extract_strategy.py,sha256=Q-pUymatPrBFGXobhyUPzFph0-t
29
29
  castor_extractor/commands/extract_tableau.py,sha256=LNtI29LbVk1vp4RNrn89GmdW6R_7QBYunRmkowDhbco,1982
30
30
  castor_extractor/commands/extract_thoughtspot.py,sha256=caAYJlH-vK7u5IUB6OKXxcaWfLgc7d_XqnFDWK6YNS4,639
31
31
  castor_extractor/commands/file_check.py,sha256=TJx76Ymd0QCECmq35zRJMkPE8DJtSInB28MuSXWk8Ao,2644
32
- castor_extractor/commands/upload.py,sha256=rLXp7gQ8zb1kLbho4FT87q8eJd8Gvo_TkyIynAaQ-4s,1342
32
+ castor_extractor/commands/upload.py,sha256=sqpEF_qqCNvT_niIrM6jPhzLaFVjtYwpc2iZw540F20,1633
33
33
  castor_extractor/file_checker/__init__.py,sha256=OSt6YLhUT42U_Cp3LCLHMVruwDkksL75Ij13X2UPnVk,119
34
34
  castor_extractor/file_checker/column.py,sha256=6bJhcW1snYwgHKkqlS0Ak7XLHZr4YBwO46JCIlnQNKg,3086
35
35
  castor_extractor/file_checker/column_test.py,sha256=1j8PxvmvmJgpd-mk30iMYOme32ovPSIn4yCXywFoXrg,1935
@@ -86,13 +86,14 @@ castor_extractor/transformation/dbt/client.py,sha256=BIue1DNAn2b7kHeiXBkGNosq8jZ
86
86
  castor_extractor/transformation/dbt/client_test.py,sha256=RLL7y_pLDv2QBM03qBht8yYEooeT_woRADHcb8vgBQ4,4535
87
87
  castor_extractor/transformation/dbt/credentials.py,sha256=pGq7GqFQTw9TwN1DXSHC-0yJ2H6B_wMAbHyQTLqJVh0,543
88
88
  castor_extractor/types.py,sha256=nHel2hv6NoHmdpOX_heEfO2-DnZPoYA2x0eJdbFvT0s,1276
89
- castor_extractor/uploader/__init__.py,sha256=A4bq_SrEtKAsl0r_D_duSTvL5WIQjVfsMy7tDx9IKg0,87
90
- castor_extractor/uploader/constant.py,sha256=yTigLHDlYwoRr6CpFIl7ReElFsQd4H-qkluMZJPWSx0,865
89
+ castor_extractor/uploader/__init__.py,sha256=xe3QHmHb35TILEhr7__nI_0t0tDolpQuujUyd84YcjI,111
90
+ castor_extractor/uploader/constant.py,sha256=ZmQtFx9nnR0GSLZ9k41upzV3ub4FJCUIyojIEVh-qIg,956
91
+ castor_extractor/uploader/enums.py,sha256=s5KVeBZWRDbDu-qOnrJhTSkSqzh0gxv0W1Z4cUsXfb8,109
91
92
  castor_extractor/uploader/env.py,sha256=5KiWHV-WTHfF68T_vzI-ypKAxzy9b9fnz2y4T3lH6QY,871
92
93
  castor_extractor/uploader/env_test.py,sha256=ClCWWtwd2N-5ClIDUxVMeKkWfhhOTxpppsXUDmdjxSg,472
93
- castor_extractor/uploader/settings.py,sha256=3MvOX-UFRqrLZoiT7wYn9jUGro7NX4RCafYzrXrLQtA,590
94
- castor_extractor/uploader/upload.py,sha256=PSQfkO_7LSE0WBo9Tm_hlS2ONepKeB0cBFdJXySnues,4310
95
- castor_extractor/uploader/upload_test.py,sha256=7fwstdQe7FjuwGilsCdFpEQr1qLoR2WTRUzyy93fISw,402
94
+ castor_extractor/uploader/settings.py,sha256=sUZpg9eHemM99DMrBW8bnlMuoTmCmLCKq-D0OCuQbGA,649
95
+ castor_extractor/uploader/upload.py,sha256=b2g9vWWjXWbt8Ms7brTc7OK_I7Z-1VSibNbppGoB2oQ,4764
96
+ castor_extractor/uploader/upload_test.py,sha256=UgN7TnT9Chn6KVzRcAX0Tuvp7-tps3ugxGitlgb9TSY,462
96
97
  castor_extractor/uploader/utils.py,sha256=otAaySj5aeem6f0CTd0Te6ioJ6uP2J1p348j-SdIwDI,802
97
98
  castor_extractor/utils/__init__.py,sha256=z_BdKTUyuug3I5AzCuSGrAVskfLax4_olfORIjhZw_M,1691
98
99
  castor_extractor/utils/argument_parser.py,sha256=S4EcIh3wNDjs3fOrQnttCcPsAmG8m_Txl7xvEh0Q37s,283
@@ -269,14 +270,16 @@ castor_extractor/visualization/salesforce_reporting/client/rest.py,sha256=AqL1DT
269
270
  castor_extractor/visualization/salesforce_reporting/client/soql.py,sha256=ytZnX6zE-NoS_Kz12KghMcCM4ukPwhMj6U0rQZ_8Isk,1621
270
271
  castor_extractor/visualization/salesforce_reporting/extract.py,sha256=ScStilebLGf4HDTFqhVTQAvv_OrKxc8waycfBKdsVAc,1359
271
272
  castor_extractor/visualization/sigma/__init__.py,sha256=GINql4yJLtjfOJgjHaWNpE13cMtnKNytiFRomwav27Q,114
272
- castor_extractor/visualization/sigma/assets.py,sha256=JZ1Cpxnml8P3mIJoTUM57hvylB18ErECQXaP5FF63O4,268
273
+ castor_extractor/visualization/sigma/assets.py,sha256=uKGKDaeY1ejc7XGh4eFaNp2ygG7hgca132xsX4eCwKQ,380
273
274
  castor_extractor/visualization/sigma/client/__init__.py,sha256=YQv06FBBQHvBMFg_tN0nUcmUp2NCL2s-eFTXG8rXaBg,74
274
- castor_extractor/visualization/sigma/client/client.py,sha256=ZE44k5klBVnc5lld3tpjuKGeSdFmlJ0wr5DOB4pEfco,9446
275
+ castor_extractor/visualization/sigma/client/client.py,sha256=VU0BHlug3tCpGA1je0PjEy4hU4TKhCH9UUGi8LRmNy8,11422
275
276
  castor_extractor/visualization/sigma/client/client_test.py,sha256=ae0ZOvKutCm44jnrJ-0_A5Y6ZGyDkMf9Ml3eEP8dNkY,581
276
277
  castor_extractor/visualization/sigma/client/credentials.py,sha256=XddAuQSmCKpxJ70TQgRnOj0vMPYVtiStk_lMMQ1AiNM,693
277
- castor_extractor/visualization/sigma/client/endpoints.py,sha256=DBFphbgoH78_MZUGM_bKBAq28Nl7LWSZ6VRsbxrxtDg,1162
278
+ castor_extractor/visualization/sigma/client/endpoints.py,sha256=i7KTKnl2Os6752CdtJl0vPSC_Z6JxmacodV_saOnce0,1662
278
279
  castor_extractor/visualization/sigma/client/pagination.py,sha256=2bFA7GiBUUasFtHJKA90516d283p7Pg50-4zw6Fwt8I,726
279
- castor_extractor/visualization/sigma/extract.py,sha256=XIT1qsj6g6dgBWP8HPfj_medZexu48EaY9tUwi14gzM,2298
280
+ castor_extractor/visualization/sigma/client/sources_transformer.py,sha256=mRupzxjtjDqELIouHF0egBkgslDmn5Y4uqO_sbUGCNs,3244
281
+ castor_extractor/visualization/sigma/client/sources_transformer_test.py,sha256=06yUHXyv65amXLKXhix6K3kkVc1kpBqSjIYcxbyMI4Y,2766
282
+ castor_extractor/visualization/sigma/extract.py,sha256=poTh70Xm2D6BwbdGApLkjXy6-t4iZnOoMB5DPfaTLEI,2929
280
283
  castor_extractor/visualization/strategy/__init__.py,sha256=HOMv4JxqF5ZmViWi-pDE-PSXJRLTdXal_jtpHG_rlR8,123
281
284
  castor_extractor/visualization/strategy/assets.py,sha256=yFXF_dX01patC0HQ1eU7Jo_4DZ4m6IJEg0uCB71tMoI,480
282
285
  castor_extractor/visualization/strategy/client/__init__.py,sha256=XWP0yF5j6JefDJkDfX-RSJn3HF2ceQ0Yx1PLCfB3BBo,80
@@ -428,8 +431,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=4RgeSkHDWTWRyU2iLx
428
431
  castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
429
432
  castor_extractor/warehouse/sqlserver/query.py,sha256=7sW8cK3JzxPt6faTJ7e4lk9tE4fo_AeCymI-LqsSols,1276
430
433
  castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
431
- castor_extractor-0.24.35.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
432
- castor_extractor-0.24.35.dist-info/METADATA,sha256=-vrfKzS5B3r2qL7tjFjFBR-AizzuVIexEVJHCci7Z5s,26353
433
- castor_extractor-0.24.35.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
434
- castor_extractor-0.24.35.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
435
- castor_extractor-0.24.35.dist-info/RECORD,,
434
+ castor_extractor-0.24.38.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
435
+ castor_extractor-0.24.38.dist-info/METADATA,sha256=iCWUVbgDFS721szJ8kUGMA58Va3Roq3WmyGinZgnHMw,26638
436
+ castor_extractor-0.24.38.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
437
+ castor_extractor-0.24.38.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
438
+ castor_extractor-0.24.38.dist-info/RECORD,,