castor-extractor 0.24.40__py3-none-any.whl → 0.24.43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +14 -0
- castor_extractor/visualization/sigma/client/sources_transformer.py +17 -3
- castor_extractor/visualization/strategy/extract.py +1 -1
- castor_extractor/warehouse/databricks/api_client.py +7 -1
- castor_extractor/warehouse/sqlserver/extract.py +2 -0
- castor_extractor/warehouse/sqlserver/queries/column.sql +1 -1
- castor_extractor/warehouse/sqlserver/queries/schema.sql +1 -1
- castor_extractor/warehouse/sqlserver/queries/table.sql +1 -1
- castor_extractor/warehouse/sqlserver/query.py +7 -0
- {castor_extractor-0.24.40.dist-info → castor_extractor-0.24.43.dist-info}/METADATA +15 -1
- {castor_extractor-0.24.40.dist-info → castor_extractor-0.24.43.dist-info}/RECORD +14 -14
- {castor_extractor-0.24.40.dist-info → castor_extractor-0.24.43.dist-info}/LICENCE +0 -0
- {castor_extractor-0.24.40.dist-info → castor_extractor-0.24.43.dist-info}/WHEEL +0 -0
- {castor_extractor-0.24.40.dist-info → castor_extractor-0.24.43.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
|
@@ -1,5 +1,19 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.24.43 - 2025-08-20
|
|
4
|
+
|
|
5
|
+
* SQLServer:
|
|
6
|
+
* raise error when no database is left after filtering
|
|
7
|
+
* use uppercase INFORMATION_SCHEMA for case-sensitive database compatibility
|
|
8
|
+
|
|
9
|
+
## 0.24.42 - 2025-08-19
|
|
10
|
+
|
|
11
|
+
* Strategy: exclude COLUMNS from extraction
|
|
12
|
+
|
|
13
|
+
## 0.24.41 - 2025-08-19
|
|
14
|
+
|
|
15
|
+
* Sigma: retry on 429 errors when fetching connection paths
|
|
16
|
+
|
|
3
17
|
## 0.24.40 - 2025-08-18
|
|
4
18
|
|
|
5
19
|
* SQLServer: fix database allowlist/blocklist filtering
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from http import HTTPStatus
|
|
2
3
|
from typing import TYPE_CHECKING, Callable, Iterator
|
|
3
4
|
|
|
5
|
+
from ....utils import retry_request
|
|
4
6
|
from .endpoints import SigmaEndpointFactory
|
|
5
7
|
|
|
6
8
|
if TYPE_CHECKING:
|
|
@@ -8,6 +10,9 @@ if TYPE_CHECKING:
|
|
|
8
10
|
|
|
9
11
|
logger = logging.getLogger(__name__)
|
|
10
12
|
|
|
13
|
+
SIGMA_CONNECTION_PATH_MAX_RETRY = 1
|
|
14
|
+
SIGMA_CONNECTION_PATH_SLEEP_MS = 30_000 # 30 seconds
|
|
15
|
+
|
|
11
16
|
|
|
12
17
|
class SigmaSourcesTransformer:
|
|
13
18
|
"""Retrieves asset sources and enhances them with additional information."""
|
|
@@ -15,6 +20,17 @@ class SigmaSourcesTransformer:
|
|
|
15
20
|
def __init__(self, api_client: "SigmaClient"):
|
|
16
21
|
self.api_client = api_client
|
|
17
22
|
|
|
23
|
+
@retry_request(
|
|
24
|
+
status_codes=(HTTPStatus.TOO_MANY_REQUESTS,),
|
|
25
|
+
max_retries=SIGMA_CONNECTION_PATH_MAX_RETRY,
|
|
26
|
+
base_ms=SIGMA_CONNECTION_PATH_SLEEP_MS,
|
|
27
|
+
)
|
|
28
|
+
def _get_connection_path(self, table_id: str) -> dict:
|
|
29
|
+
"""Retrieves the connection path for a given table id"""
|
|
30
|
+
return self.api_client._get(
|
|
31
|
+
endpoint=SigmaEndpointFactory.connection_path(table_id)
|
|
32
|
+
)
|
|
33
|
+
|
|
18
34
|
def _map_table_id_to_connection_path(
|
|
19
35
|
self, all_sources: list
|
|
20
36
|
) -> dict[str, dict]:
|
|
@@ -29,9 +45,7 @@ class SigmaSourcesTransformer:
|
|
|
29
45
|
}
|
|
30
46
|
|
|
31
47
|
return {
|
|
32
|
-
table_id: self.
|
|
33
|
-
endpoint=SigmaEndpointFactory.connection_path(table_id)
|
|
34
|
-
)
|
|
48
|
+
table_id: self._get_connection_path(table_id)
|
|
35
49
|
for table_id in unique_table_ids
|
|
36
50
|
}
|
|
37
51
|
|
|
@@ -22,7 +22,7 @@ def iterate_all_data(
|
|
|
22
22
|
) -> Iterable[tuple[str, Union[list, dict]]]:
|
|
23
23
|
"""Iterate over the extracted data from Strategy"""
|
|
24
24
|
|
|
25
|
-
for asset in StrategyAsset:
|
|
25
|
+
for asset in StrategyAsset.mandatory:
|
|
26
26
|
logger.info(f"Extracting {asset.value.upper()} from REST API")
|
|
27
27
|
data = client.fetch(asset)
|
|
28
28
|
yield asset.name.lower(), list(deep_serialize(data))
|
|
@@ -81,7 +81,13 @@ class DatabricksAPIClient(APIClient):
|
|
|
81
81
|
def databases(self) -> list[dict]:
|
|
82
82
|
content = self._get(DatabricksEndpointFactory.databases())
|
|
83
83
|
_databases = self.formatter.format_database(content.get("catalogs", []))
|
|
84
|
-
|
|
84
|
+
filtered_databases = [
|
|
85
|
+
d for d in _databases if self._keep_catalog(d["database_name"])
|
|
86
|
+
]
|
|
87
|
+
logger.info(
|
|
88
|
+
f"Available databases: {[d['database_name'] for d in filtered_databases]}"
|
|
89
|
+
)
|
|
90
|
+
return filtered_databases
|
|
85
91
|
|
|
86
92
|
def _schemas_of_database(self, database: dict) -> list[dict]:
|
|
87
93
|
payload = {"catalog_name": database["database_name"]}
|
|
@@ -90,7 +90,7 @@ meta AS (
|
|
|
90
90
|
t.table_name,
|
|
91
91
|
t.table_type
|
|
92
92
|
FROM
|
|
93
|
-
{database}.
|
|
93
|
+
{database}.INFORMATION_SCHEMA.TABLES AS t
|
|
94
94
|
LEFT JOIN {database}.sys.databases AS db
|
|
95
95
|
ON t.table_catalog COLLATE DATABASE_DEFAULT = db.name COLLATE DATABASE_DEFAULT
|
|
96
96
|
)
|
|
@@ -11,6 +11,11 @@ from ..abstract import (
|
|
|
11
11
|
logger = logging.getLogger(__name__)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
+
_NO_DATABASE_ERROR_MSG = (
|
|
15
|
+
"No databases eligible for extraction. "
|
|
16
|
+
"If you are using the db_allow/db_block options, please make sure to use the correct case."
|
|
17
|
+
)
|
|
18
|
+
|
|
14
19
|
_DATABASE_REQUIRED = (
|
|
15
20
|
WarehouseAsset.SCHEMA,
|
|
16
21
|
WarehouseAsset.TABLE,
|
|
@@ -29,6 +34,8 @@ class MSSQLQueryBuilder(AbstractQueryBuilder):
|
|
|
29
34
|
time_filter: Optional[TimeFilter] = None,
|
|
30
35
|
):
|
|
31
36
|
super().__init__(time_filter=time_filter)
|
|
37
|
+
if not databases:
|
|
38
|
+
raise ValueError(_NO_DATABASE_ERROR_MSG)
|
|
32
39
|
self._databases = databases
|
|
33
40
|
|
|
34
41
|
@staticmethod
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: castor-extractor
|
|
3
|
-
Version: 0.24.
|
|
3
|
+
Version: 0.24.43
|
|
4
4
|
Summary: Extract your metadata assets.
|
|
5
5
|
Home-page: https://www.castordoc.com/
|
|
6
6
|
License: EULA
|
|
@@ -215,6 +215,20 @@ For any questions or bug report, contact us at [support@coalesce.io](mailto:supp
|
|
|
215
215
|
|
|
216
216
|
# Changelog
|
|
217
217
|
|
|
218
|
+
## 0.24.43 - 2025-08-20
|
|
219
|
+
|
|
220
|
+
* SQLServer:
|
|
221
|
+
* raise error when no database is left after filtering
|
|
222
|
+
* use uppercase INFORMATION_SCHEMA for case-sensitive database compatibility
|
|
223
|
+
|
|
224
|
+
## 0.24.42 - 2025-08-19
|
|
225
|
+
|
|
226
|
+
* Strategy: exclude COLUMNS from extraction
|
|
227
|
+
|
|
228
|
+
## 0.24.41 - 2025-08-19
|
|
229
|
+
|
|
230
|
+
* Sigma: retry on 429 errors when fetching connection paths
|
|
231
|
+
|
|
218
232
|
## 0.24.40 - 2025-08-18
|
|
219
233
|
|
|
220
234
|
* SQLServer: fix database allowlist/blocklist filtering
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=BP52t2sRBsw161mUbZUgLka80iFSbvhcYPju_bgXLL0,19737
|
|
2
2
|
Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
|
|
3
3
|
DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
|
|
4
4
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
@@ -277,7 +277,7 @@ castor_extractor/visualization/sigma/client/client_test.py,sha256=ae0ZOvKutCm44j
|
|
|
277
277
|
castor_extractor/visualization/sigma/client/credentials.py,sha256=XddAuQSmCKpxJ70TQgRnOj0vMPYVtiStk_lMMQ1AiNM,693
|
|
278
278
|
castor_extractor/visualization/sigma/client/endpoints.py,sha256=i7KTKnl2Os6752CdtJl0vPSC_Z6JxmacodV_saOnce0,1662
|
|
279
279
|
castor_extractor/visualization/sigma/client/pagination.py,sha256=2bFA7GiBUUasFtHJKA90516d283p7Pg50-4zw6Fwt8I,726
|
|
280
|
-
castor_extractor/visualization/sigma/client/sources_transformer.py,sha256=
|
|
280
|
+
castor_extractor/visualization/sigma/client/sources_transformer.py,sha256=n-5mZWSvzfTwpM5VP_bwlcxcaAwCKEEbpMCG_1KRVP4,3748
|
|
281
281
|
castor_extractor/visualization/sigma/client/sources_transformer_test.py,sha256=06yUHXyv65amXLKXhix6K3kkVc1kpBqSjIYcxbyMI4Y,2766
|
|
282
282
|
castor_extractor/visualization/sigma/extract.py,sha256=poTh70Xm2D6BwbdGApLkjXy6-t4iZnOoMB5DPfaTLEI,2929
|
|
283
283
|
castor_extractor/visualization/strategy/__init__.py,sha256=HOMv4JxqF5ZmViWi-pDE-PSXJRLTdXal_jtpHG_rlR8,123
|
|
@@ -286,7 +286,7 @@ castor_extractor/visualization/strategy/client/__init__.py,sha256=XWP0yF5j6JefDJ
|
|
|
286
286
|
castor_extractor/visualization/strategy/client/client.py,sha256=6DJO0Fh67FXxmwY5h_X9cu5sEq3GhM19b9hwn_fvhSE,9460
|
|
287
287
|
castor_extractor/visualization/strategy/client/credentials.py,sha256=urFfNxWX1JG6wwFMYImufQzHa5g-sgjdlVGzi63owwg,1113
|
|
288
288
|
castor_extractor/visualization/strategy/client/properties.py,sha256=66oBm8Kz6HEQW_jNR5_fAI_O921R2F5yH2Ff3zjtJOk,4500
|
|
289
|
-
castor_extractor/visualization/strategy/extract.py,sha256=
|
|
289
|
+
castor_extractor/visualization/strategy/extract.py,sha256=9Ec48BwiA-5nykhcQiqkSiL_RB034_Ci-ck89YK5Nic,1176
|
|
290
290
|
castor_extractor/visualization/tableau/__init__.py,sha256=eFI_1hjdkxyUiAYiy3szwyuwn3yJ5C_KbpBU0ySJDcQ,138
|
|
291
291
|
castor_extractor/visualization/tableau/assets.py,sha256=HbCRd8VCj1WBEeqg9jwnygnT7xOFJ6PQD7Lq7sV-XR0,635
|
|
292
292
|
castor_extractor/visualization/tableau/client/__init__.py,sha256=P8RKFKOC63WkH5hdEytJOwHS9vzQ8GXreLfXZetmMP8,78
|
|
@@ -335,7 +335,7 @@ castor_extractor/warehouse/bigquery/queries/view_ddl.sql,sha256=obCm-IN9V8_YSZTw
|
|
|
335
335
|
castor_extractor/warehouse/bigquery/query.py,sha256=FEekxlkrfAXzsT8Kj1AIqYd5mURB5MlZIkbFVXVqEhU,4762
|
|
336
336
|
castor_extractor/warehouse/bigquery/types.py,sha256=rfKkKA13Et7TM4I0uVaXkLfuaBXkv51bNTp4AO0QSdw,57
|
|
337
337
|
castor_extractor/warehouse/databricks/__init__.py,sha256=YG3YSIJgCFRjjI8eExy9T7qGnfnjWhMFh8c15KTs_BA,184
|
|
338
|
-
castor_extractor/warehouse/databricks/api_client.py,sha256=
|
|
338
|
+
castor_extractor/warehouse/databricks/api_client.py,sha256=qBf2T5R1l5a1_kmTbrTYW8VNsz4h_L3zCpUuboNEunQ,6448
|
|
339
339
|
castor_extractor/warehouse/databricks/api_client_test.py,sha256=YTWC-X7L-XAfK5b39TUgTmR1ifv0QrY5tvLNoSbpmjg,466
|
|
340
340
|
castor_extractor/warehouse/databricks/client.py,sha256=LzpeVQIOYi_QTfdOHbK6SB4SgxhZ7p9TNxh0Iwfz850,3307
|
|
341
341
|
castor_extractor/warehouse/databricks/client_test.py,sha256=dqEdEAt-6e8CtQ7M2L5vDYkn4JvOjqyqZSFEpQ55WRc,1432
|
|
@@ -422,17 +422,17 @@ castor_extractor/warehouse/snowflake/queries/view_ddl.sql,sha256=eWsci_50cxiYIv3
|
|
|
422
422
|
castor_extractor/warehouse/snowflake/query.py,sha256=C2LTdPwBzMQ_zMncg0Kq4_WkoY7K9as5tvxBDrIOlwI,1763
|
|
423
423
|
castor_extractor/warehouse/sqlserver/__init__.py,sha256=PdOuYznmvKAbfWAm8UdN47MfEsd9jqPi_dDi3WEo1KY,116
|
|
424
424
|
castor_extractor/warehouse/sqlserver/client.py,sha256=Bjfpw96IKAQfWPiU5SZYEDfetwfkqZrnKbQYoStcnZc,2007
|
|
425
|
-
castor_extractor/warehouse/sqlserver/extract.py,sha256=
|
|
425
|
+
castor_extractor/warehouse/sqlserver/extract.py,sha256=C8MubArUUq5r8UyalXzmQycqgQOv_0DYKXmxDYeGpHg,2350
|
|
426
426
|
castor_extractor/warehouse/sqlserver/queries/.sqlfluff,sha256=yy0KQdz8I_67vnXyX8eeWwOWkxTXvHyVKSVwhURktd8,48
|
|
427
|
-
castor_extractor/warehouse/sqlserver/queries/column.sql,sha256=
|
|
427
|
+
castor_extractor/warehouse/sqlserver/queries/column.sql,sha256=MpIpUGP-oRJEDv6baO34RKg0KtZqK9CHH7e818-_qvY,2913
|
|
428
428
|
castor_extractor/warehouse/sqlserver/queries/database.sql,sha256=4dPeBCn85MEOXr1f-DPXxiI3RvvoE_1n8lsbTs26E0I,150
|
|
429
|
-
castor_extractor/warehouse/sqlserver/queries/schema.sql,sha256=
|
|
430
|
-
castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=
|
|
429
|
+
castor_extractor/warehouse/sqlserver/queries/schema.sql,sha256=sMOrMC2UCqTzRMHiQqQFWozaujrefLBSkwMrpbo-rO4,881
|
|
430
|
+
castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=liLvzp6NyrW8vF7TrP5_4Asmw2cyMDu8FCvhEV6Z1jc,2837
|
|
431
431
|
castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
|
|
432
|
-
castor_extractor/warehouse/sqlserver/query.py,sha256=
|
|
432
|
+
castor_extractor/warehouse/sqlserver/query.py,sha256=QcGJ3lYbWpGCx-qMytTiEnTqGeEsUZrBLIdg_9jSm8A,1526
|
|
433
433
|
castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
|
|
434
|
-
castor_extractor-0.24.
|
|
435
|
-
castor_extractor-0.24.
|
|
436
|
-
castor_extractor-0.24.
|
|
437
|
-
castor_extractor-0.24.
|
|
438
|
-
castor_extractor-0.24.
|
|
434
|
+
castor_extractor-0.24.43.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
435
|
+
castor_extractor-0.24.43.dist-info/METADATA,sha256=05SXgWB7i36oBtLtNbik3yDxf2vliZuYuVYvXHP7bqg,27190
|
|
436
|
+
castor_extractor-0.24.43.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
437
|
+
castor_extractor-0.24.43.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
|
|
438
|
+
castor_extractor-0.24.43.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|