castor-extractor 0.20.4__py3-none-any.whl → 0.20.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

CHANGELOG.md CHANGED
@@ -1,6 +1,14 @@
1
1
 
2
2
  # Changelog
3
3
 
4
+ ## 0.20.6 - 2024-10-15
5
+
6
+ * Tableau: include `site_id` in **workbooks** to build url
7
+
8
+ ## 0.20.5 - 2024-10-09
9
+
10
+ * Redshift: enable extraction from a Redshift Serverless instance
11
+
4
12
  ## 0.20.4 - 2024-10-09
5
13
 
6
14
  * Salesforce warehouse: `Labels` instead of `api_names` for columns
Dockerfile CHANGED
@@ -1,4 +1,4 @@
1
- # syntax=docker/dockerfile:1.5
1
+ # syntax=docker/dockerfile:1
2
2
 
3
3
  FROM --platform=linux/amd64 python:3.11-slim
4
4
 
@@ -23,6 +23,11 @@ def main():
23
23
  action="store_true",
24
24
  help="Skips files already extracted instead of replacing them",
25
25
  )
26
+ parser.add_argument(
27
+ "--serverless",
28
+ action="store_true",
29
+ help="Enables extraction for Redshift Serverless",
30
+ )
26
31
  parser.set_defaults(skip_existing=False)
27
32
 
28
33
  args = parser.parse_args()
@@ -34,5 +39,6 @@ def main():
34
39
  user=args.user,
35
40
  password=args.password,
36
41
  output_directory=args.output,
42
+ serverless=args.serverless,
37
43
  skip_existing=args.skip_existing,
38
44
  )
@@ -4,7 +4,7 @@ import tableauserverclient as TSC # type: ignore
4
4
 
5
5
  from ....utils import SerializedAsset
6
6
  from ..assets import TABLEAU_PULSE_ASSETS, TableauRevampAsset
7
- from ..constants import DEFAULT_TIMEOUT_SECONDS
7
+ from ..constants import CREDENTIALS_SITE_ID_KEY, DEFAULT_TIMEOUT_SECONDS
8
8
  from .client_metadata_api import TableauClientMetadataApi
9
9
  from .client_rest_api import TableauClientRestApi
10
10
  from .client_tsc import TableauClientTSC
@@ -33,6 +33,15 @@ _REST_API_ASSETS = (
33
33
  logging.getLogger("tableau.endpoint").setLevel(logging.WARNING)
34
34
 
35
35
 
36
+ def _add_site_id(workbooks: SerializedAsset, site_id: str) -> SerializedAsset:
37
+ """
38
+ Add site_id from credentials: it's necessary to compute workbook's url
39
+ """
40
+ for workbook in workbooks:
41
+ workbook[CREDENTIALS_SITE_ID_KEY] = site_id
42
+ return workbooks
43
+
44
+
36
45
  def _merge_datasources(
37
46
  datasources: SerializedAsset,
38
47
  tsc_datasources: SerializedAsset,
@@ -182,7 +191,10 @@ class TableauRevampClient:
182
191
  def _fetch_workbooks(self) -> SerializedAsset:
183
192
  asset = TableauRevampAsset.WORKBOOK
184
193
 
194
+ site_id = self._credentials.site_id
185
195
  workbooks = self._client_metadata.fetch(asset)
196
+ workbooks = _add_site_id(workbooks, site_id)
197
+
186
198
  workbook_projects = self._client_tsc.fetch(asset)
187
199
 
188
200
  return _merge_workbooks(workbooks, workbook_projects)
@@ -1,3 +1,5 @@
1
1
  DEFAULT_PAGE_SIZE = 100
2
2
 
3
3
  DEFAULT_TIMEOUT_SECONDS = 100
4
+
5
+ CREDENTIALS_SITE_ID_KEY = "credentials_site_id"
@@ -34,6 +34,7 @@ REDSHIFT_PASSWORD = "CASTOR_REDSHIFT_PASSWORD" # noqa: S105
34
34
  REDSHIFT_HOST = "CASTOR_REDSHIFT_HOST"
35
35
  REDSHIFT_PORT = "CASTOR_REDSHIFT_PORT"
36
36
  REDSHIFT_DATABASE = "CASTOR_REDSHIFT_DATABASE"
37
+ REDSHIFT_SERVERLESS = "CASTOR_REDSHIFT_SERVERLESS"
37
38
 
38
39
 
39
40
  def _credentials(params: dict) -> dict:
@@ -48,6 +49,14 @@ def _credentials(params: dict) -> dict:
48
49
  }
49
50
 
50
51
 
52
+ def _query_builder(params: dict) -> RedshiftQueryBuilder:
53
+ env_parameter = from_env(REDSHIFT_SERVERLESS, allow_missing=True)
54
+ from_env_ = str(env_parameter).lower() == "true"
55
+ from_params_ = params.get("serverless", False)
56
+ is_serverless = from_params_ or from_env_
57
+ return RedshiftQueryBuilder(is_serverless=is_serverless)
58
+
59
+
51
60
  def extract_all(**kwargs) -> None:
52
61
  """
53
62
  Extract all assets from Redshift and store the results in CSV files
@@ -56,7 +65,7 @@ def extract_all(**kwargs) -> None:
56
65
 
57
66
  client = RedshiftClient(credentials=_credentials(kwargs))
58
67
 
59
- query_builder = RedshiftQueryBuilder()
68
+ query_builder = _query_builder(kwargs)
60
69
 
61
70
  storage = LocalStorage(directory=output_directory)
62
71
 
@@ -0,0 +1,26 @@
1
+ import pytest
2
+
3
+ from .extract import (
4
+ REDSHIFT_SERVERLESS,
5
+ _query_builder,
6
+ )
7
+
8
+
9
+ @pytest.mark.parametrize(
10
+ "serverless_param,env_param,expected",
11
+ [
12
+ (True, "False", True),
13
+ (False, "True", True),
14
+ (None, "TRUE", True),
15
+ (None, "TrUe", True),
16
+ (None, "FAlSE", False),
17
+ (None, "False", False),
18
+ (None, None, False),
19
+ (True, None, True),
20
+ ],
21
+ )
22
+ def test__query_builder(serverless_param, env_param, expected, monkeypatch):
23
+ params = {"serverless": serverless_param}
24
+ monkeypatch.setenv(REDSHIFT_SERVERLESS, env_param)
25
+
26
+ assert _query_builder(params).is_serverless == expected
@@ -0,0 +1,69 @@
1
+ WITH parameters AS (
2
+ SELECT
3
+ :day AS day_start,
4
+ :hour_min AS hour_min,
5
+ :hour_max AS hour_max
6
+ ),
7
+
8
+ queries_deduplicated AS (
9
+ SELECT DISTINCT q.query_id
10
+ FROM SYS_QUERY_HISTORY AS q
11
+ CROSS JOIN parameters AS p
12
+ WHERE TRUE
13
+ AND DATE(q.start_time) = p.day_start
14
+ AND EXTRACT('hour' FROM q.start_time) BETWEEN p.hour_min AND p.hour_max
15
+ ),
16
+
17
+ query AS (
18
+ SELECT
19
+ q.query_id,
20
+ qt.text,
21
+ qt.sequence,
22
+ COUNT(*) OVER(PARTITION BY q.query_id) AS sequence_count
23
+ FROM queries_deduplicated AS q
24
+ INNER JOIN SYS_QUERY_TEXT AS qt ON q.query_id = qt.query_id
25
+ ),
26
+
27
+ raw_query_text AS
28
+ (
29
+ SELECT
30
+ q.query_id,
31
+ LISTAGG(q.text, '') WITHIN GROUP (ORDER BY q.sequence) AS agg_text
32
+ FROM query AS q
33
+ WHERE TRUE
34
+ -- LISTAGG raises an error when total length >= 65535
35
+ -- each query text contains 4000 char max
36
+ AND q.sequence_count < (65535 / 4000)
37
+ GROUP BY q.query_id
38
+ ),
39
+
40
+ query_text AS (
41
+ SELECT
42
+ query_id,
43
+ CASE
44
+ WHEN agg_text ILIKE 'INSERT INTO%%'
45
+ THEN REGEXP_REPLACE(agg_text, 'VALUES (.*)', 'DEFAULT VALUES')
46
+ ELSE agg_text
47
+ END AS agg_text
48
+ FROM raw_query_text
49
+ )
50
+ SELECT
51
+ q.query_id::VARCHAR(256) AS query_id,
52
+ qt.agg_text::VARCHAR(60000) AS query_text,
53
+ q.database_name AS database_id,
54
+ q.database_name AS database_name,
55
+ q.session_id AS process_id,
56
+ 0 as aborted,
57
+ q.start_time AS start_time,
58
+ q.end_time AS end_time,
59
+ q.user_id AS user_id,
60
+ q.query_label,
61
+ u.usename AS user_name
62
+ FROM SYS_QUERY_HISTORY AS q
63
+ JOIN query_text AS qt ON q.query_id = qt.query_id
64
+ JOIN pg_catalog.pg_user AS u ON u.usesysid = q.user_id
65
+ CROSS JOIN parameters AS p
66
+ WHERE TRUE
67
+ AND DATE(q.start_time) = p.day_start
68
+ AND EXTRACT('hour' FROM q.start_time) BETWEEN p.hour_min AND p.hour_max
69
+ AND q.status = 'success'
@@ -15,10 +15,21 @@ class RedshiftQueryBuilder(AbstractQueryBuilder):
15
15
 
16
16
  def __init__(
17
17
  self,
18
+ is_serverless: bool = False,
18
19
  time_filter: Optional[TimeFilter] = None,
19
20
  ):
20
21
  super().__init__(time_filter=time_filter)
22
+ self.is_serverless = is_serverless
23
+
24
+ def build_query_serverless(self) -> ExtractionQuery:
25
+ """To get the query history in Redshift Serverless, we cannot use STL tables."""
26
+ statement = self._load_from_file("query_serverless.sql")
27
+ params = self._time_filter.to_dict()
28
+ return ExtractionQuery(statement, params)
21
29
 
22
30
  def build(self, asset: WarehouseAsset) -> List[ExtractionQuery]:
23
- query = self.build_default(asset)
31
+ if asset == WarehouseAsset.QUERY and self.is_serverless:
32
+ query = self.build_query_serverless()
33
+ else:
34
+ query = self.build_default(asset)
24
35
  return [query]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: castor-extractor
3
- Version: 0.20.4
3
+ Version: 0.20.6
4
4
  Summary: Extract your metadata assets.
5
5
  Home-page: https://www.castordoc.com/
6
6
  License: EULA
@@ -208,6 +208,14 @@ For any questions or bug report, contact us at [support@castordoc.com](mailto:su
208
208
 
209
209
  # Changelog
210
210
 
211
+ ## 0.20.6 - 2024-10-15
212
+
213
+ * Tableau: include `site_id` in **workbooks** to build url
214
+
215
+ ## 0.20.5 - 2024-10-09
216
+
217
+ * Redshift: enable extraction from a Redshift Serverless instance
218
+
211
219
  ## 0.20.4 - 2024-10-09
212
220
 
213
221
  * Salesforce warehouse: `Labels` instead of `api_names` for columns
@@ -1,5 +1,5 @@
1
- CHANGELOG.md,sha256=CzVaQbFAS2hlZE2ak7DTYHWBNjMaC59e8UK7Q9p10tw,13905
2
- Dockerfile,sha256=HcX5z8OpeSvkScQsN-Y7CNMUig_UB6vTMDl7uqzuLGE,303
1
+ CHANGELOG.md,sha256=Q6RN08q5JpWqwJjuR6cSm2k9vyeHbaNuufiVlS07vDo,14080
2
+ Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
3
3
  DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
4
4
  LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
5
5
  README.md,sha256=uF6PXm9ocPITlKVSh9afTakHmpLx3TvawLf-CbMP3wM,3578
@@ -17,7 +17,7 @@ castor_extractor/commands/extract_notion.py,sha256=uaxcF3_bT7D_-JxnIW0F7VVDphI_Z
17
17
  castor_extractor/commands/extract_postgres.py,sha256=pX0RnCPi4nw6QQ6wiAuZ_Xt3ZbDuMUG9aQKuqFgJtAU,1154
18
18
  castor_extractor/commands/extract_powerbi.py,sha256=f0G5w61KXExJ6Sw39_mJIwqQNpLorE5-LKmZXlUqvKI,783
19
19
  castor_extractor/commands/extract_qlik.py,sha256=VBe_xFKh_nR0QSFFIncAaC8yDqBeMa6VunBAga7AeGg,891
20
- castor_extractor/commands/extract_redshift.py,sha256=bdLp7d7ImZoKCkWc3f3NXF1imIzMVT43_KPI-x4UVac,1155
20
+ castor_extractor/commands/extract_redshift.py,sha256=zRBg2D_ft4GLdPSdmetRcgQVAA80DXtdRSYsQhAWIik,1334
21
21
  castor_extractor/commands/extract_salesforce.py,sha256=3j3YTmMkPAwocR-B1ozJQai0UIZPtpmAyWj-hHvdWn4,1226
22
22
  castor_extractor/commands/extract_salesforce_reporting.py,sha256=FdANTNiLkIPdm80XMYxWReHjdycLsIa61pyeCD-sUDk,962
23
23
  castor_extractor/commands/extract_sigma.py,sha256=sxewHcZ1Doq35V2qnpX_zCKKXkrb1_9bYjUMg7BOW-k,643
@@ -274,7 +274,7 @@ castor_extractor/visualization/tableau/usage.py,sha256=LlFwlbEr-EnYUJjKZha99CRCR
274
274
  castor_extractor/visualization/tableau_revamp/__init__.py,sha256=a3DGjQhaz17gBqW-E84TAgupKbqLC40y5Ajo1yn-ot4,156
275
275
  castor_extractor/visualization/tableau_revamp/assets.py,sha256=8sJsK6Qixao6xVmVaO1usvs16SjNub9sIx7o-adYV14,659
276
276
  castor_extractor/visualization/tableau_revamp/client/__init__.py,sha256=wmS9uLtUiqNYVloi0-DgD8d2qzu3RVZEAtWiaDp6G_M,90
277
- castor_extractor/visualization/tableau_revamp/client/client.py,sha256=8pATeVTKslF5HCbN1c_DusysP7lTU51yXPp8uhzWwZg,7165
277
+ castor_extractor/visualization/tableau_revamp/client/client.py,sha256=oaxvPsCccAcTWooXmDQNcJ6RFUVsCUzl6HxaHIwh5kU,7564
278
278
  castor_extractor/visualization/tableau_revamp/client/client_metadata_api.py,sha256=yNnGR3Tk32TUmaDejaz5fkw2p9DtmMeCv5rsZNOHUfY,3047
279
279
  castor_extractor/visualization/tableau_revamp/client/client_rest_api.py,sha256=0g8AddrhzirRCUWR2jrudPR02mk4Of5YilWth7zJO-g,4016
280
280
  castor_extractor/visualization/tableau_revamp/client/client_tsc.py,sha256=BBwIOqK2zU66udFRmLGmB_3J1ILGhVOY5Hq4nmsonF0,1853
@@ -282,7 +282,7 @@ castor_extractor/visualization/tableau_revamp/client/credentials.py,sha256=qA-Ea
282
282
  castor_extractor/visualization/tableau_revamp/client/errors.py,sha256=dTe1shqmWmAXpDpCz-E24m8dGYjt6rvIGV9qQb4jnvI,150
283
283
  castor_extractor/visualization/tableau_revamp/client/gql_queries.py,sha256=-V3ToD5Gi7nmfVB2OxTOZw8dcOiF7_ciSWjjW2UdvvI,2270
284
284
  castor_extractor/visualization/tableau_revamp/client/rest_fields.py,sha256=gx39X1zMfRVpjmFbgvbgbvtlE0QwxOtk8rZFsIqeGRI,978
285
- castor_extractor/visualization/tableau_revamp/constants.py,sha256=thS935pJyuZkdciM2EFHbIuTqSFYfB3YGCJYJ_Ls294,55
285
+ castor_extractor/visualization/tableau_revamp/constants.py,sha256=lHGB50FgVNO2nXeIhkvQKivD8ZFBIjDrflgD5cTXKJw,104
286
286
  castor_extractor/visualization/tableau_revamp/extract.py,sha256=BPy38rFjGG6Nh1eDFeCckE4RHaO-bWW2uhXh7wm8mKk,1368
287
287
  castor_extractor/visualization/thoughtspot/__init__.py,sha256=NhTGUk5Kdt54oCjHYoAt0cLBmVLys5lFYiRANL6wCmI,150
288
288
  castor_extractor/visualization/thoughtspot/assets.py,sha256=lPRvXk0PKybgLv1AcDVxg-ssf4XLTs0biRqLrqC2TzU,196
@@ -362,18 +362,20 @@ castor_extractor/warehouse/postgres/query.py,sha256=5QmI79BP_EjqxeABNg56rxuM9Xuu
362
362
  castor_extractor/warehouse/redshift/__init__.py,sha256=CC82SejYDlwYhZhhn40ln-oTsRx7AJ1Km61cxPkymjE,125
363
363
  castor_extractor/warehouse/redshift/client.py,sha256=My7003HGBhTpS6X5NgYcKwntR7h45scLaGr-LSY0tIc,2172
364
364
  castor_extractor/warehouse/redshift/client_test.py,sha256=74lZfna71qs80EKAuitJ8_ZjAGtpYHf5tChySinVPoQ,1023
365
- castor_extractor/warehouse/redshift/extract.py,sha256=XnAnBSapzXYUWVQKMhVaLlloA-uXEqseOhSt9flVsdw,2331
365
+ castor_extractor/warehouse/redshift/extract.py,sha256=pblUQ2XafVTpyHrDLrmwFKy55mUNF03dZvgPweihYUc,2723
366
+ castor_extractor/warehouse/redshift/extract_test.py,sha256=-8eWOsFEv4DFvBmalaE_TzQD6YdgwnGRPCkKsycJuxg,653
366
367
  castor_extractor/warehouse/redshift/queries/.sqlfluff,sha256=W4pFQiY8KMtXwn3WguYQJA8cj78VR7K-iokPoZoy5aM,30
367
368
  castor_extractor/warehouse/redshift/queries/column.sql,sha256=ZXdurmaJRD2fejDksU5eh37Q4srmnVrEjSzsrtg_il8,6840
368
369
  castor_extractor/warehouse/redshift/queries/database.sql,sha256=_C0knW159YDfReGuWLjIdvxHzefo1Xg2xw2dJKJzNk8,299
369
370
  castor_extractor/warehouse/redshift/queries/group.sql,sha256=8p0wlqllnwOTiAgiV237DvFYHGOEcYwaHdyqVQg3F6E,101
370
371
  castor_extractor/warehouse/redshift/queries/query.sql,sha256=yZNGnUdebvvDx0J0KMSJ2hNgkK4gPduyOfPM_7-DIfo,3465
372
+ castor_extractor/warehouse/redshift/queries/query_serverless.sql,sha256=QlYYFLJ2gInVczuXDxTGColM3-_zLSpPD0tBuLVFMyQ,1925
371
373
  castor_extractor/warehouse/redshift/queries/schema.sql,sha256=Mf6nooi2w2PhGxM2_kDAf3oQ8QnR-hpT5Y0AmUzghGg,585
372
374
  castor_extractor/warehouse/redshift/queries/table.sql,sha256=y8CGOwPHH_Mr8g1Zvuz2U5ldL8zuPm5v3M5RPZqIhsE,2645
373
375
  castor_extractor/warehouse/redshift/queries/table_freshness.sql,sha256=l61_ysmTEtuMwK9RmYmD5cu0HmD1RXwTEhX0ytBeyxg,726
374
376
  castor_extractor/warehouse/redshift/queries/user.sql,sha256=sEXveJAuNvZacvpI6WfwsX6VavoMb2VqYA32f6Dt-_Y,170
375
377
  castor_extractor/warehouse/redshift/queries/view_ddl.sql,sha256=Pkyh_QT6d4rhTeyiVcqw6O8CRl7NEhk2p7eM5YIn5kg,719
376
- castor_extractor/warehouse/redshift/query.py,sha256=0C81rkt2cpkWrJIxxwALDyqr-49vlqQM04y_N6wwStc,540
378
+ castor_extractor/warehouse/redshift/query.py,sha256=F2MiFqPRNGfBrCtkXNRs28Q_i9DfIEKh93yDUVb8Yjw,1060
377
379
  castor_extractor/warehouse/salesforce/__init__.py,sha256=NR4aNea5jeE1xYqeZ_29deeN84CkN0_D_Z7CLQdJvFY,137
378
380
  castor_extractor/warehouse/salesforce/client.py,sha256=-9WHcQwEMrpGRQ9CN-bsRSR2Tnx9d-f_FtV4ntsf71w,3287
379
381
  castor_extractor/warehouse/salesforce/constants.py,sha256=GusduVBCPvwpk_Im6F3bDvXeNQ7hRnCMdIAjIg65RnE,52
@@ -413,8 +415,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx
413
415
  castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
414
416
  castor_extractor/warehouse/sqlserver/query.py,sha256=j_d5-HMnzBouwGfywVZMRSSwbXzPvzDWlFCZmvxcoGQ,539
415
417
  castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
416
- castor_extractor-0.20.4.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
417
- castor_extractor-0.20.4.dist-info/METADATA,sha256=YcFx5O-gccq_JevTWl9xfeE5LGf5baiUKHfPrG1QX28,21123
418
- castor_extractor-0.20.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
419
- castor_extractor-0.20.4.dist-info/entry_points.txt,sha256=IVGy_oM8VjzADMAxzmiNJTYYidTCsI98MpO_mkXjkqE,1573
420
- castor_extractor-0.20.4.dist-info/RECORD,,
418
+ castor_extractor-0.20.6.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
419
+ castor_extractor-0.20.6.dist-info/METADATA,sha256=Vgkknt_--nGtd3FcSAsG5JxA6zvBVlc-lwSOqlXoeTE,21298
420
+ castor_extractor-0.20.6.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
421
+ castor_extractor-0.20.6.dist-info/entry_points.txt,sha256=IVGy_oM8VjzADMAxzmiNJTYYidTCsI98MpO_mkXjkqE,1573
422
+ castor_extractor-0.20.6.dist-info/RECORD,,