castor-extractor 0.24.40__py3-none-any.whl → 0.24.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.24.43 - 2025-08-20
4
+
5
+ * SQLServer:
6
+ * raise error when no database is left after filtering
7
+ * use uppercase INFORMATION_SCHEMA for case-sensitive database compatibility
8
+
9
+ ## 0.24.42 - 2025-08-19
10
+
11
+ * Strategy: exclude COLUMNS from extraction
12
+
13
+ ## 0.24.41 - 2025-08-19
14
+
15
+ * Sigma: retry on 429 errors when fetching connection paths
16
+
3
17
  ## 0.24.40 - 2025-08-18
4
18
 
5
19
  * SQLServer: fix database allowlist/blocklist filtering
@@ -1,6 +1,8 @@
1
1
  import logging
2
+ from http import HTTPStatus
2
3
  from typing import TYPE_CHECKING, Callable, Iterator
3
4
 
5
+ from ....utils import retry_request
4
6
  from .endpoints import SigmaEndpointFactory
5
7
 
6
8
  if TYPE_CHECKING:
@@ -8,6 +10,9 @@ if TYPE_CHECKING:
8
10
 
9
11
  logger = logging.getLogger(__name__)
10
12
 
13
+ SIGMA_CONNECTION_PATH_MAX_RETRY = 1
14
+ SIGMA_CONNECTION_PATH_SLEEP_MS = 30_000 # 30 seconds
15
+
11
16
 
12
17
  class SigmaSourcesTransformer:
13
18
  """Retrieves asset sources and enhances them with additional information."""
@@ -15,6 +20,17 @@ class SigmaSourcesTransformer:
15
20
  def __init__(self, api_client: "SigmaClient"):
16
21
  self.api_client = api_client
17
22
 
23
+ @retry_request(
24
+ status_codes=(HTTPStatus.TOO_MANY_REQUESTS,),
25
+ max_retries=SIGMA_CONNECTION_PATH_MAX_RETRY,
26
+ base_ms=SIGMA_CONNECTION_PATH_SLEEP_MS,
27
+ )
28
+ def _get_connection_path(self, table_id: str) -> dict:
29
+ """Retrieves the connection path for a given table id"""
30
+ return self.api_client._get(
31
+ endpoint=SigmaEndpointFactory.connection_path(table_id)
32
+ )
33
+
18
34
  def _map_table_id_to_connection_path(
19
35
  self, all_sources: list
20
36
  ) -> dict[str, dict]:
@@ -29,9 +45,7 @@ class SigmaSourcesTransformer:
29
45
  }
30
46
 
31
47
  return {
32
- table_id: self.api_client._get(
33
- endpoint=SigmaEndpointFactory.connection_path(table_id)
34
- )
48
+ table_id: self._get_connection_path(table_id)
35
49
  for table_id in unique_table_ids
36
50
  }
37
51
 
@@ -22,7 +22,7 @@ def iterate_all_data(
22
22
  ) -> Iterable[tuple[str, Union[list, dict]]]:
23
23
  """Iterate over the extracted data from Strategy"""
24
24
 
25
- for asset in StrategyAsset:
25
+ for asset in StrategyAsset.mandatory:
26
26
  logger.info(f"Extracting {asset.value.upper()} from REST API")
27
27
  data = client.fetch(asset)
28
28
  yield asset.name.lower(), list(deep_serialize(data))
@@ -81,7 +81,13 @@ class DatabricksAPIClient(APIClient):
81
81
  def databases(self) -> list[dict]:
82
82
  content = self._get(DatabricksEndpointFactory.databases())
83
83
  _databases = self.formatter.format_database(content.get("catalogs", []))
84
- return [d for d in _databases if self._keep_catalog(d["database_name"])]
84
+ filtered_databases = [
85
+ d for d in _databases if self._keep_catalog(d["database_name"])
86
+ ]
87
+ logger.info(
88
+ f"Available databases: {[d['database_name'] for d in filtered_databases]}"
89
+ )
90
+ return filtered_databases
85
91
 
86
92
  def _schemas_of_database(self, database: dict) -> list[dict]:
87
93
  payload = {"catalog_name": database["database_name"]}
@@ -57,6 +57,8 @@ def extract_all(**kwargs) -> None:
57
57
  blocked=kwargs.get("db_blocked"),
58
58
  )
59
59
 
60
+ logger.info(f"Available databases: {databases}\n")
61
+
60
62
  query_builder = MSSQLQueryBuilder(
61
63
  databases=databases,
62
64
  )
@@ -87,7 +87,7 @@ columns AS (
87
87
  i.comment,
88
88
  column_id = CONCAT(i.table_id, '.', c.column_name)
89
89
  FROM
90
- {database}.information_schema.columns AS c
90
+ {database}.INFORMATION_SCHEMA.COLUMNS AS c
91
91
  LEFT JOIN column_ids AS i
92
92
  ON
93
93
  (
@@ -8,7 +8,7 @@ WITH ids AS (
8
8
  SELECT DISTINCT
9
9
  table_catalog,
10
10
  table_schema
11
- FROM {database}.information_schema.tables
11
+ FROM {database}.INFORMATION_SCHEMA.TABLES
12
12
  )
13
13
 
14
14
  SELECT
@@ -90,7 +90,7 @@ meta AS (
90
90
  t.table_name,
91
91
  t.table_type
92
92
  FROM
93
- {database}.information_schema.tables AS t
93
+ {database}.INFORMATION_SCHEMA.TABLES AS t
94
94
  LEFT JOIN {database}.sys.databases AS db
95
95
  ON t.table_catalog COLLATE DATABASE_DEFAULT = db.name COLLATE DATABASE_DEFAULT
96
96
  )
@@ -11,6 +11,11 @@ from ..abstract import (
11
11
  logger = logging.getLogger(__name__)
12
12
 
13
13
 
14
+ _NO_DATABASE_ERROR_MSG = (
15
+ "No databases eligible for extraction. "
16
+ "If you are using the db_allow/db_block options, please make sure to use the correct case."
17
+ )
18
+
14
19
  _DATABASE_REQUIRED = (
15
20
  WarehouseAsset.SCHEMA,
16
21
  WarehouseAsset.TABLE,
@@ -29,6 +34,8 @@ class MSSQLQueryBuilder(AbstractQueryBuilder):
29
34
  time_filter: Optional[TimeFilter] = None,
30
35
  ):
31
36
  super().__init__(time_filter=time_filter)
37
+ if not databases:
38
+ raise ValueError(_NO_DATABASE_ERROR_MSG)
32
39
  self._databases = databases
33
40
 
34
41
  @staticmethod
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: castor-extractor
3
- Version: 0.24.40
3
+ Version: 0.24.43
4
4
  Summary: Extract your metadata assets.
5
5
  Home-page: https://www.castordoc.com/
6
6
  License: EULA
@@ -215,6 +215,20 @@ For any questions or bug report, contact us at [support@coalesce.io](mailto:supp
215
215
 
216
216
  # Changelog
217
217
 
218
+ ## 0.24.43 - 2025-08-20
219
+
220
+ * SQLServer:
221
+ * raise error when no database is left after filtering
222
+ * use uppercase INFORMATION_SCHEMA for case-sensitive database compatibility
223
+
224
+ ## 0.24.42 - 2025-08-19
225
+
226
+ * Strategy: exclude COLUMNS from extraction
227
+
228
+ ## 0.24.41 - 2025-08-19
229
+
230
+ * Sigma: retry on 429 errors when fetching connection paths
231
+
218
232
  ## 0.24.40 - 2025-08-18
219
233
 
220
234
  * SQLServer: fix database allowlist/blocklist filtering
@@ -1,4 +1,4 @@
1
- CHANGELOG.md,sha256=tgZkN-SNTMCro37DG0nW91MaD6ZnHM9VWWZG2-7TP68,19406
1
+ CHANGELOG.md,sha256=BP52t2sRBsw161mUbZUgLka80iFSbvhcYPju_bgXLL0,19737
2
2
  Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
3
3
  DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
4
4
  LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
@@ -277,7 +277,7 @@ castor_extractor/visualization/sigma/client/client_test.py,sha256=ae0ZOvKutCm44j
277
277
  castor_extractor/visualization/sigma/client/credentials.py,sha256=XddAuQSmCKpxJ70TQgRnOj0vMPYVtiStk_lMMQ1AiNM,693
278
278
  castor_extractor/visualization/sigma/client/endpoints.py,sha256=i7KTKnl2Os6752CdtJl0vPSC_Z6JxmacodV_saOnce0,1662
279
279
  castor_extractor/visualization/sigma/client/pagination.py,sha256=2bFA7GiBUUasFtHJKA90516d283p7Pg50-4zw6Fwt8I,726
280
- castor_extractor/visualization/sigma/client/sources_transformer.py,sha256=mRupzxjtjDqELIouHF0egBkgslDmn5Y4uqO_sbUGCNs,3244
280
+ castor_extractor/visualization/sigma/client/sources_transformer.py,sha256=n-5mZWSvzfTwpM5VP_bwlcxcaAwCKEEbpMCG_1KRVP4,3748
281
281
  castor_extractor/visualization/sigma/client/sources_transformer_test.py,sha256=06yUHXyv65amXLKXhix6K3kkVc1kpBqSjIYcxbyMI4Y,2766
282
282
  castor_extractor/visualization/sigma/extract.py,sha256=poTh70Xm2D6BwbdGApLkjXy6-t4iZnOoMB5DPfaTLEI,2929
283
283
  castor_extractor/visualization/strategy/__init__.py,sha256=HOMv4JxqF5ZmViWi-pDE-PSXJRLTdXal_jtpHG_rlR8,123
@@ -286,7 +286,7 @@ castor_extractor/visualization/strategy/client/__init__.py,sha256=XWP0yF5j6JefDJ
286
286
  castor_extractor/visualization/strategy/client/client.py,sha256=6DJO0Fh67FXxmwY5h_X9cu5sEq3GhM19b9hwn_fvhSE,9460
287
287
  castor_extractor/visualization/strategy/client/credentials.py,sha256=urFfNxWX1JG6wwFMYImufQzHa5g-sgjdlVGzi63owwg,1113
288
288
  castor_extractor/visualization/strategy/client/properties.py,sha256=66oBm8Kz6HEQW_jNR5_fAI_O921R2F5yH2Ff3zjtJOk,4500
289
- castor_extractor/visualization/strategy/extract.py,sha256=2fBuvS2xiOGXRpxXnZsE_C3en6t1-BlM5TbusjHyEkg,1166
289
+ castor_extractor/visualization/strategy/extract.py,sha256=9Ec48BwiA-5nykhcQiqkSiL_RB034_Ci-ck89YK5Nic,1176
290
290
  castor_extractor/visualization/tableau/__init__.py,sha256=eFI_1hjdkxyUiAYiy3szwyuwn3yJ5C_KbpBU0ySJDcQ,138
291
291
  castor_extractor/visualization/tableau/assets.py,sha256=HbCRd8VCj1WBEeqg9jwnygnT7xOFJ6PQD7Lq7sV-XR0,635
292
292
  castor_extractor/visualization/tableau/client/__init__.py,sha256=P8RKFKOC63WkH5hdEytJOwHS9vzQ8GXreLfXZetmMP8,78
@@ -335,7 +335,7 @@ castor_extractor/warehouse/bigquery/queries/view_ddl.sql,sha256=obCm-IN9V8_YSZTw
335
335
  castor_extractor/warehouse/bigquery/query.py,sha256=FEekxlkrfAXzsT8Kj1AIqYd5mURB5MlZIkbFVXVqEhU,4762
336
336
  castor_extractor/warehouse/bigquery/types.py,sha256=rfKkKA13Et7TM4I0uVaXkLfuaBXkv51bNTp4AO0QSdw,57
337
337
  castor_extractor/warehouse/databricks/__init__.py,sha256=YG3YSIJgCFRjjI8eExy9T7qGnfnjWhMFh8c15KTs_BA,184
338
- castor_extractor/warehouse/databricks/api_client.py,sha256=kLcUGSgrfybZUrpt0tE7qe2OoSSN7IK4myyB7c0czOY,6260
338
+ castor_extractor/warehouse/databricks/api_client.py,sha256=qBf2T5R1l5a1_kmTbrTYW8VNsz4h_L3zCpUuboNEunQ,6448
339
339
  castor_extractor/warehouse/databricks/api_client_test.py,sha256=YTWC-X7L-XAfK5b39TUgTmR1ifv0QrY5tvLNoSbpmjg,466
340
340
  castor_extractor/warehouse/databricks/client.py,sha256=LzpeVQIOYi_QTfdOHbK6SB4SgxhZ7p9TNxh0Iwfz850,3307
341
341
  castor_extractor/warehouse/databricks/client_test.py,sha256=dqEdEAt-6e8CtQ7M2L5vDYkn4JvOjqyqZSFEpQ55WRc,1432
@@ -422,17 +422,17 @@ castor_extractor/warehouse/snowflake/queries/view_ddl.sql,sha256=eWsci_50cxiYIv3
422
422
  castor_extractor/warehouse/snowflake/query.py,sha256=C2LTdPwBzMQ_zMncg0Kq4_WkoY7K9as5tvxBDrIOlwI,1763
423
423
  castor_extractor/warehouse/sqlserver/__init__.py,sha256=PdOuYznmvKAbfWAm8UdN47MfEsd9jqPi_dDi3WEo1KY,116
424
424
  castor_extractor/warehouse/sqlserver/client.py,sha256=Bjfpw96IKAQfWPiU5SZYEDfetwfkqZrnKbQYoStcnZc,2007
425
- castor_extractor/warehouse/sqlserver/extract.py,sha256=GbOlSq8JR6HaJZunkfiRxaSt0pbgazQjF8GpgqWWIcU,2294
425
+ castor_extractor/warehouse/sqlserver/extract.py,sha256=C8MubArUUq5r8UyalXzmQycqgQOv_0DYKXmxDYeGpHg,2350
426
426
  castor_extractor/warehouse/sqlserver/queries/.sqlfluff,sha256=yy0KQdz8I_67vnXyX8eeWwOWkxTXvHyVKSVwhURktd8,48
427
- castor_extractor/warehouse/sqlserver/queries/column.sql,sha256=eRILCgdygYRvtfSdxaswIiIYKW-PiJXW2qi3yHtrfns,2913
427
+ castor_extractor/warehouse/sqlserver/queries/column.sql,sha256=MpIpUGP-oRJEDv6baO34RKg0KtZqK9CHH7e818-_qvY,2913
428
428
  castor_extractor/warehouse/sqlserver/queries/database.sql,sha256=4dPeBCn85MEOXr1f-DPXxiI3RvvoE_1n8lsbTs26E0I,150
429
- castor_extractor/warehouse/sqlserver/queries/schema.sql,sha256=Zp4G86FJ_Be8Zqvdlu7K8DqmsUL62kxbwaUk5asZ0V4,881
430
- castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=YwFhHc6rGbszqQt7Izh7EngVwrrBoEZ9kniuWXNtGco,2837
429
+ castor_extractor/warehouse/sqlserver/queries/schema.sql,sha256=sMOrMC2UCqTzRMHiQqQFWozaujrefLBSkwMrpbo-rO4,881
430
+ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=liLvzp6NyrW8vF7TrP5_4Asmw2cyMDu8FCvhEV6Z1jc,2837
431
431
  castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
432
- castor_extractor/warehouse/sqlserver/query.py,sha256=7sW8cK3JzxPt6faTJ7e4lk9tE4fo_AeCymI-LqsSols,1276
432
+ castor_extractor/warehouse/sqlserver/query.py,sha256=QcGJ3lYbWpGCx-qMytTiEnTqGeEsUZrBLIdg_9jSm8A,1526
433
433
  castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
434
- castor_extractor-0.24.40.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
435
- castor_extractor-0.24.40.dist-info/METADATA,sha256=ONg1SCc3gcrOJqBE92EtyfQctf-hRxI_u2VUbBpvgVA,26859
436
- castor_extractor-0.24.40.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
437
- castor_extractor-0.24.40.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
438
- castor_extractor-0.24.40.dist-info/RECORD,,
434
+ castor_extractor-0.24.43.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
435
+ castor_extractor-0.24.43.dist-info/METADATA,sha256=05SXgWB7i36oBtLtNbik3yDxf2vliZuYuVYvXHP7bqg,27190
436
+ castor_extractor-0.24.43.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
437
+ castor_extractor-0.24.43.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
438
+ castor_extractor-0.24.43.dist-info/RECORD,,