castor-extractor 0.20.4__py3-none-any.whl → 0.20.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +4 -0
- castor_extractor/commands/extract_redshift.py +6 -0
- castor_extractor/warehouse/redshift/extract.py +10 -1
- castor_extractor/warehouse/redshift/extract_test.py +26 -0
- castor_extractor/warehouse/redshift/queries/query_serverless.sql +69 -0
- castor_extractor/warehouse/redshift/query.py +12 -1
- {castor_extractor-0.20.4.dist-info → castor_extractor-0.20.5.dist-info}/METADATA +5 -1
- {castor_extractor-0.20.4.dist-info → castor_extractor-0.20.5.dist-info}/RECORD +11 -9
- {castor_extractor-0.20.4.dist-info → castor_extractor-0.20.5.dist-info}/LICENCE +0 -0
- {castor_extractor-0.20.4.dist-info → castor_extractor-0.20.5.dist-info}/WHEEL +0 -0
- {castor_extractor-0.20.4.dist-info → castor_extractor-0.20.5.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
|
@@ -23,6 +23,11 @@ def main():
|
|
|
23
23
|
action="store_true",
|
|
24
24
|
help="Skips files already extracted instead of replacing them",
|
|
25
25
|
)
|
|
26
|
+
parser.add_argument(
|
|
27
|
+
"--serverless",
|
|
28
|
+
action="store_true",
|
|
29
|
+
help="Enables extraction for Redshift Serverless",
|
|
30
|
+
)
|
|
26
31
|
parser.set_defaults(skip_existing=False)
|
|
27
32
|
|
|
28
33
|
args = parser.parse_args()
|
|
@@ -34,5 +39,6 @@ def main():
|
|
|
34
39
|
user=args.user,
|
|
35
40
|
password=args.password,
|
|
36
41
|
output_directory=args.output,
|
|
42
|
+
serverless=args.serverless,
|
|
37
43
|
skip_existing=args.skip_existing,
|
|
38
44
|
)
|
|
@@ -34,6 +34,7 @@ REDSHIFT_PASSWORD = "CASTOR_REDSHIFT_PASSWORD" # noqa: S105
|
|
|
34
34
|
REDSHIFT_HOST = "CASTOR_REDSHIFT_HOST"
|
|
35
35
|
REDSHIFT_PORT = "CASTOR_REDSHIFT_PORT"
|
|
36
36
|
REDSHIFT_DATABASE = "CASTOR_REDSHIFT_DATABASE"
|
|
37
|
+
REDSHIFT_SERVERLESS = "CASTOR_REDSHIFT_SERVERLESS"
|
|
37
38
|
|
|
38
39
|
|
|
39
40
|
def _credentials(params: dict) -> dict:
|
|
@@ -48,6 +49,14 @@ def _credentials(params: dict) -> dict:
|
|
|
48
49
|
}
|
|
49
50
|
|
|
50
51
|
|
|
52
|
+
def _query_builder(params: dict) -> RedshiftQueryBuilder:
|
|
53
|
+
env_parameter = from_env(REDSHIFT_SERVERLESS, allow_missing=True)
|
|
54
|
+
from_env_ = str(env_parameter).lower() == "true"
|
|
55
|
+
from_params_ = params.get("serverless", False)
|
|
56
|
+
is_serverless = from_params_ or from_env_
|
|
57
|
+
return RedshiftQueryBuilder(is_serverless=is_serverless)
|
|
58
|
+
|
|
59
|
+
|
|
51
60
|
def extract_all(**kwargs) -> None:
|
|
52
61
|
"""
|
|
53
62
|
Extract all assets from Redshift and store the results in CSV files
|
|
@@ -56,7 +65,7 @@ def extract_all(**kwargs) -> None:
|
|
|
56
65
|
|
|
57
66
|
client = RedshiftClient(credentials=_credentials(kwargs))
|
|
58
67
|
|
|
59
|
-
query_builder =
|
|
68
|
+
query_builder = _query_builder(kwargs)
|
|
60
69
|
|
|
61
70
|
storage = LocalStorage(directory=output_directory)
|
|
62
71
|
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from .extract import (
|
|
4
|
+
REDSHIFT_SERVERLESS,
|
|
5
|
+
_query_builder,
|
|
6
|
+
)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@pytest.mark.parametrize(
|
|
10
|
+
"serverless_param,env_param,expected",
|
|
11
|
+
[
|
|
12
|
+
(True, "False", True),
|
|
13
|
+
(False, "True", True),
|
|
14
|
+
(None, "TRUE", True),
|
|
15
|
+
(None, "TrUe", True),
|
|
16
|
+
(None, "FAlSE", False),
|
|
17
|
+
(None, "False", False),
|
|
18
|
+
(None, None, False),
|
|
19
|
+
(True, None, True),
|
|
20
|
+
],
|
|
21
|
+
)
|
|
22
|
+
def test__query_builder(serverless_param, env_param, expected, monkeypatch):
|
|
23
|
+
params = {"serverless": serverless_param}
|
|
24
|
+
monkeypatch.setenv(REDSHIFT_SERVERLESS, env_param)
|
|
25
|
+
|
|
26
|
+
assert _query_builder(params).is_serverless == expected
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
WITH parameters AS (
|
|
2
|
+
SELECT
|
|
3
|
+
:day AS day_start,
|
|
4
|
+
:hour_min AS hour_min,
|
|
5
|
+
:hour_max AS hour_max
|
|
6
|
+
),
|
|
7
|
+
|
|
8
|
+
queries_deduplicated AS (
|
|
9
|
+
SELECT DISTINCT q.query_id
|
|
10
|
+
FROM SYS_QUERY_HISTORY AS q
|
|
11
|
+
CROSS JOIN parameters AS p
|
|
12
|
+
WHERE TRUE
|
|
13
|
+
AND DATE(q.start_time) = p.day_start
|
|
14
|
+
AND EXTRACT('hour' FROM q.start_time) BETWEEN p.hour_min AND p.hour_max
|
|
15
|
+
),
|
|
16
|
+
|
|
17
|
+
query AS (
|
|
18
|
+
SELECT
|
|
19
|
+
q.query_id,
|
|
20
|
+
qt.text,
|
|
21
|
+
qt.sequence,
|
|
22
|
+
COUNT(*) OVER(PARTITION BY q.query_id) AS sequence_count
|
|
23
|
+
FROM queries_deduplicated AS q
|
|
24
|
+
INNER JOIN SYS_QUERY_TEXT AS qt ON q.query_id = qt.query_id
|
|
25
|
+
),
|
|
26
|
+
|
|
27
|
+
raw_query_text AS
|
|
28
|
+
(
|
|
29
|
+
SELECT
|
|
30
|
+
q.query_id,
|
|
31
|
+
LISTAGG(q.text, '') WITHIN GROUP (ORDER BY q.sequence) AS agg_text
|
|
32
|
+
FROM query AS q
|
|
33
|
+
WHERE TRUE
|
|
34
|
+
-- LISTAGG raises an error when total length >= 65535
|
|
35
|
+
-- each query text contains 4000 char max
|
|
36
|
+
AND q.sequence_count < (65535 / 4000)
|
|
37
|
+
GROUP BY q.query_id
|
|
38
|
+
),
|
|
39
|
+
|
|
40
|
+
query_text AS (
|
|
41
|
+
SELECT
|
|
42
|
+
query_id,
|
|
43
|
+
CASE
|
|
44
|
+
WHEN agg_text ILIKE 'INSERT INTO%%'
|
|
45
|
+
THEN REGEXP_REPLACE(agg_text, 'VALUES (.*)', 'DEFAULT VALUES')
|
|
46
|
+
ELSE agg_text
|
|
47
|
+
END AS agg_text
|
|
48
|
+
FROM raw_query_text
|
|
49
|
+
)
|
|
50
|
+
SELECT
|
|
51
|
+
q.query_id::VARCHAR(256) AS query_id,
|
|
52
|
+
qt.agg_text::VARCHAR(60000) AS query_text,
|
|
53
|
+
q.database_name AS database_id,
|
|
54
|
+
q.database_name AS database_name,
|
|
55
|
+
q.session_id AS process_id,
|
|
56
|
+
0 as aborted,
|
|
57
|
+
q.start_time AS start_time,
|
|
58
|
+
q.end_time AS end_time,
|
|
59
|
+
q.user_id AS user_id,
|
|
60
|
+
q.query_label,
|
|
61
|
+
u.usename AS user_name
|
|
62
|
+
FROM SYS_QUERY_HISTORY AS q
|
|
63
|
+
JOIN query_text AS qt ON q.query_id = qt.query_id
|
|
64
|
+
JOIN pg_catalog.pg_user AS u ON u.usesysid = q.user_id
|
|
65
|
+
CROSS JOIN parameters AS p
|
|
66
|
+
WHERE TRUE
|
|
67
|
+
AND DATE(q.start_time) = p.day_start
|
|
68
|
+
AND EXTRACT('hour' FROM q.start_time) BETWEEN p.hour_min AND p.hour_max
|
|
69
|
+
AND q.status = 'success'
|
|
@@ -15,10 +15,21 @@ class RedshiftQueryBuilder(AbstractQueryBuilder):
|
|
|
15
15
|
|
|
16
16
|
def __init__(
|
|
17
17
|
self,
|
|
18
|
+
is_serverless: bool = False,
|
|
18
19
|
time_filter: Optional[TimeFilter] = None,
|
|
19
20
|
):
|
|
20
21
|
super().__init__(time_filter=time_filter)
|
|
22
|
+
self.is_serverless = is_serverless
|
|
23
|
+
|
|
24
|
+
def build_query_serverless(self) -> ExtractionQuery:
|
|
25
|
+
"""To get the query history in Redshift Serverless, we cannot use STL tables."""
|
|
26
|
+
statement = self._load_from_file("query_serverless.sql")
|
|
27
|
+
params = self._time_filter.to_dict()
|
|
28
|
+
return ExtractionQuery(statement, params)
|
|
21
29
|
|
|
22
30
|
def build(self, asset: WarehouseAsset) -> List[ExtractionQuery]:
|
|
23
|
-
|
|
31
|
+
if asset == WarehouseAsset.QUERY and self.is_serverless:
|
|
32
|
+
query = self.build_query_serverless()
|
|
33
|
+
else:
|
|
34
|
+
query = self.build_default(asset)
|
|
24
35
|
return [query]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: castor-extractor
|
|
3
|
-
Version: 0.20.
|
|
3
|
+
Version: 0.20.5
|
|
4
4
|
Summary: Extract your metadata assets.
|
|
5
5
|
Home-page: https://www.castordoc.com/
|
|
6
6
|
License: EULA
|
|
@@ -208,6 +208,10 @@ For any questions or bug report, contact us at [support@castordoc.com](mailto:su
|
|
|
208
208
|
|
|
209
209
|
# Changelog
|
|
210
210
|
|
|
211
|
+
## 0.20.5 - 2024-10-09
|
|
212
|
+
|
|
213
|
+
* Redshift: enable extraction from a Redshift Serverless instance
|
|
214
|
+
|
|
211
215
|
## 0.20.4 - 2024-10-09
|
|
212
216
|
|
|
213
217
|
* Salesforce warehouse: `Labels` instead of `api_names` for columns
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=gqprCxUMpvdGZdxp3AxdsRKA3JK-Q9NM72m839G5CeQ,13996
|
|
2
2
|
Dockerfile,sha256=HcX5z8OpeSvkScQsN-Y7CNMUig_UB6vTMDl7uqzuLGE,303
|
|
3
3
|
DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
|
|
4
4
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
@@ -17,7 +17,7 @@ castor_extractor/commands/extract_notion.py,sha256=uaxcF3_bT7D_-JxnIW0F7VVDphI_Z
|
|
|
17
17
|
castor_extractor/commands/extract_postgres.py,sha256=pX0RnCPi4nw6QQ6wiAuZ_Xt3ZbDuMUG9aQKuqFgJtAU,1154
|
|
18
18
|
castor_extractor/commands/extract_powerbi.py,sha256=f0G5w61KXExJ6Sw39_mJIwqQNpLorE5-LKmZXlUqvKI,783
|
|
19
19
|
castor_extractor/commands/extract_qlik.py,sha256=VBe_xFKh_nR0QSFFIncAaC8yDqBeMa6VunBAga7AeGg,891
|
|
20
|
-
castor_extractor/commands/extract_redshift.py,sha256=
|
|
20
|
+
castor_extractor/commands/extract_redshift.py,sha256=zRBg2D_ft4GLdPSdmetRcgQVAA80DXtdRSYsQhAWIik,1334
|
|
21
21
|
castor_extractor/commands/extract_salesforce.py,sha256=3j3YTmMkPAwocR-B1ozJQai0UIZPtpmAyWj-hHvdWn4,1226
|
|
22
22
|
castor_extractor/commands/extract_salesforce_reporting.py,sha256=FdANTNiLkIPdm80XMYxWReHjdycLsIa61pyeCD-sUDk,962
|
|
23
23
|
castor_extractor/commands/extract_sigma.py,sha256=sxewHcZ1Doq35V2qnpX_zCKKXkrb1_9bYjUMg7BOW-k,643
|
|
@@ -362,18 +362,20 @@ castor_extractor/warehouse/postgres/query.py,sha256=5QmI79BP_EjqxeABNg56rxuM9Xuu
|
|
|
362
362
|
castor_extractor/warehouse/redshift/__init__.py,sha256=CC82SejYDlwYhZhhn40ln-oTsRx7AJ1Km61cxPkymjE,125
|
|
363
363
|
castor_extractor/warehouse/redshift/client.py,sha256=My7003HGBhTpS6X5NgYcKwntR7h45scLaGr-LSY0tIc,2172
|
|
364
364
|
castor_extractor/warehouse/redshift/client_test.py,sha256=74lZfna71qs80EKAuitJ8_ZjAGtpYHf5tChySinVPoQ,1023
|
|
365
|
-
castor_extractor/warehouse/redshift/extract.py,sha256=
|
|
365
|
+
castor_extractor/warehouse/redshift/extract.py,sha256=pblUQ2XafVTpyHrDLrmwFKy55mUNF03dZvgPweihYUc,2723
|
|
366
|
+
castor_extractor/warehouse/redshift/extract_test.py,sha256=-8eWOsFEv4DFvBmalaE_TzQD6YdgwnGRPCkKsycJuxg,653
|
|
366
367
|
castor_extractor/warehouse/redshift/queries/.sqlfluff,sha256=W4pFQiY8KMtXwn3WguYQJA8cj78VR7K-iokPoZoy5aM,30
|
|
367
368
|
castor_extractor/warehouse/redshift/queries/column.sql,sha256=ZXdurmaJRD2fejDksU5eh37Q4srmnVrEjSzsrtg_il8,6840
|
|
368
369
|
castor_extractor/warehouse/redshift/queries/database.sql,sha256=_C0knW159YDfReGuWLjIdvxHzefo1Xg2xw2dJKJzNk8,299
|
|
369
370
|
castor_extractor/warehouse/redshift/queries/group.sql,sha256=8p0wlqllnwOTiAgiV237DvFYHGOEcYwaHdyqVQg3F6E,101
|
|
370
371
|
castor_extractor/warehouse/redshift/queries/query.sql,sha256=yZNGnUdebvvDx0J0KMSJ2hNgkK4gPduyOfPM_7-DIfo,3465
|
|
372
|
+
castor_extractor/warehouse/redshift/queries/query_serverless.sql,sha256=QlYYFLJ2gInVczuXDxTGColM3-_zLSpPD0tBuLVFMyQ,1925
|
|
371
373
|
castor_extractor/warehouse/redshift/queries/schema.sql,sha256=Mf6nooi2w2PhGxM2_kDAf3oQ8QnR-hpT5Y0AmUzghGg,585
|
|
372
374
|
castor_extractor/warehouse/redshift/queries/table.sql,sha256=y8CGOwPHH_Mr8g1Zvuz2U5ldL8zuPm5v3M5RPZqIhsE,2645
|
|
373
375
|
castor_extractor/warehouse/redshift/queries/table_freshness.sql,sha256=l61_ysmTEtuMwK9RmYmD5cu0HmD1RXwTEhX0ytBeyxg,726
|
|
374
376
|
castor_extractor/warehouse/redshift/queries/user.sql,sha256=sEXveJAuNvZacvpI6WfwsX6VavoMb2VqYA32f6Dt-_Y,170
|
|
375
377
|
castor_extractor/warehouse/redshift/queries/view_ddl.sql,sha256=Pkyh_QT6d4rhTeyiVcqw6O8CRl7NEhk2p7eM5YIn5kg,719
|
|
376
|
-
castor_extractor/warehouse/redshift/query.py,sha256=
|
|
378
|
+
castor_extractor/warehouse/redshift/query.py,sha256=F2MiFqPRNGfBrCtkXNRs28Q_i9DfIEKh93yDUVb8Yjw,1060
|
|
377
379
|
castor_extractor/warehouse/salesforce/__init__.py,sha256=NR4aNea5jeE1xYqeZ_29deeN84CkN0_D_Z7CLQdJvFY,137
|
|
378
380
|
castor_extractor/warehouse/salesforce/client.py,sha256=-9WHcQwEMrpGRQ9CN-bsRSR2Tnx9d-f_FtV4ntsf71w,3287
|
|
379
381
|
castor_extractor/warehouse/salesforce/constants.py,sha256=GusduVBCPvwpk_Im6F3bDvXeNQ7hRnCMdIAjIg65RnE,52
|
|
@@ -413,8 +415,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx
|
|
|
413
415
|
castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
|
|
414
416
|
castor_extractor/warehouse/sqlserver/query.py,sha256=j_d5-HMnzBouwGfywVZMRSSwbXzPvzDWlFCZmvxcoGQ,539
|
|
415
417
|
castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
|
|
416
|
-
castor_extractor-0.20.
|
|
417
|
-
castor_extractor-0.20.
|
|
418
|
-
castor_extractor-0.20.
|
|
419
|
-
castor_extractor-0.20.
|
|
420
|
-
castor_extractor-0.20.
|
|
418
|
+
castor_extractor-0.20.5.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
419
|
+
castor_extractor-0.20.5.dist-info/METADATA,sha256=wmQH2GPtnBGHq8a5CdjKZ6yPMNIrS4gkYrTpbV0T9yg,21214
|
|
420
|
+
castor_extractor-0.20.5.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
421
|
+
castor_extractor-0.20.5.dist-info/entry_points.txt,sha256=IVGy_oM8VjzADMAxzmiNJTYYidTCsI98MpO_mkXjkqE,1573
|
|
422
|
+
castor_extractor-0.20.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|