castor-extractor 0.5.3__py3-none-any.whl → 0.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +15 -0
- castor_extractor/commands/extract_bigquery.py +3 -1
- castor_extractor/commands/extract_looker.py +4 -1
- castor_extractor/commands/extract_metabase_api.py +3 -1
- castor_extractor/commands/extract_metabase_db.py +6 -2
- castor_extractor/commands/extract_mode.py +6 -2
- castor_extractor/commands/extract_powerbi.py +4 -1
- castor_extractor/commands/extract_snowflake.py +4 -2
- castor_extractor/commands/extract_tableau.py +4 -1
- castor_extractor/commands/file_check.py +3 -1
- castor_extractor/commands/upload.py +5 -2
- castor_extractor/file_checker/file_test.py +6 -3
- castor_extractor/file_checker/templates/generic_warehouse.py +4 -2
- castor_extractor/transformation/dbt/client/credentials.py +1 -1
- castor_extractor/types.py +2 -1
- castor_extractor/uploader/upload.py +4 -2
- castor_extractor/uploader/upload_test.py +0 -1
- castor_extractor/utils/deprecate.py +1 -1
- castor_extractor/utils/files_test.py +2 -2
- castor_extractor/utils/formatter_test.py +0 -1
- castor_extractor/utils/pager.py +4 -2
- castor_extractor/utils/pager_test.py +1 -1
- castor_extractor/utils/retry.py +1 -1
- castor_extractor/utils/safe.py +1 -1
- castor_extractor/utils/string_test.py +0 -1
- castor_extractor/utils/validation.py +4 -3
- castor_extractor/visualization/looker/api/client.py +26 -9
- castor_extractor/visualization/looker/api/client_test.py +3 -2
- castor_extractor/visualization/looker/api/constants.py +3 -1
- castor_extractor/visualization/looker/api/utils.py +3 -2
- castor_extractor/visualization/looker/assets.py +1 -0
- castor_extractor/visualization/looker/constant.py +1 -1
- castor_extractor/visualization/looker/extract.py +6 -1
- castor_extractor/visualization/metabase/client/api/client.py +2 -1
- castor_extractor/visualization/metabase/client/api/credentials.py +1 -1
- castor_extractor/visualization/metabase/client/db/client.py +4 -3
- castor_extractor/visualization/metabase/client/db/credentials.py +2 -2
- castor_extractor/visualization/metabase/client/decryption_test.py +0 -1
- castor_extractor/visualization/metabase/extract.py +4 -4
- castor_extractor/visualization/mode/client/client.py +2 -1
- castor_extractor/visualization/mode/client/client_test.py +4 -3
- castor_extractor/visualization/mode/client/credentials.py +2 -2
- castor_extractor/visualization/powerbi/client/constants.py +1 -1
- castor_extractor/visualization/powerbi/client/credentials.py +0 -1
- castor_extractor/visualization/powerbi/client/credentials_test.py +11 -3
- castor_extractor/visualization/powerbi/client/rest.py +15 -5
- castor_extractor/visualization/powerbi/client/rest_test.py +40 -13
- castor_extractor/visualization/powerbi/extract.py +4 -3
- castor_extractor/visualization/qlik/client/engine/client.py +3 -1
- castor_extractor/visualization/qlik/client/engine/json_rpc.py +4 -1
- castor_extractor/visualization/qlik/client/engine/json_rpc_test.py +0 -1
- castor_extractor/visualization/qlik/client/master.py +11 -4
- castor_extractor/visualization/qlik/client/rest_test.py +3 -2
- castor_extractor/visualization/sigma/client/client.py +7 -3
- castor_extractor/visualization/sigma/client/client_test.py +4 -2
- castor_extractor/visualization/sigma/client/credentials.py +2 -2
- castor_extractor/visualization/sigma/constants.py +1 -1
- castor_extractor/visualization/sigma/extract.py +3 -1
- castor_extractor/visualization/tableau/client/client.py +7 -5
- castor_extractor/visualization/tableau/client/client_utils.py +6 -3
- castor_extractor/visualization/tableau/client/credentials.py +6 -4
- castor_extractor/visualization/tableau/client/project.py +3 -1
- castor_extractor/visualization/tableau/client/safe_mode.py +2 -1
- castor_extractor/visualization/tableau/extract.py +7 -7
- castor_extractor/visualization/tableau/gql_fields.py +4 -4
- castor_extractor/visualization/tableau/tests/unit/graphql/paginated_object_test.py +2 -1
- castor_extractor/visualization/tableau/tests/unit/rest_api/auth_test.py +6 -3
- castor_extractor/visualization/tableau/tests/unit/rest_api/credentials_test.py +1 -1
- castor_extractor/visualization/tableau/tests/unit/rest_api/usages_test.py +2 -1
- castor_extractor/warehouse/abstract/extract.py +3 -2
- castor_extractor/warehouse/abstract/time_filter_test.py +0 -1
- castor_extractor/warehouse/bigquery/client_test.py +1 -1
- castor_extractor/warehouse/bigquery/extract.py +3 -2
- castor_extractor/warehouse/bigquery/query.py +4 -3
- castor_extractor/warehouse/postgres/extract.py +5 -3
- castor_extractor/warehouse/redshift/client_test.py +0 -1
- castor_extractor/warehouse/redshift/extract.py +5 -3
- castor_extractor/warehouse/snowflake/client.py +1 -1
- castor_extractor/warehouse/snowflake/client_test.py +1 -1
- castor_extractor/warehouse/snowflake/extract.py +5 -3
- castor_extractor/warehouse/synapse/extract.py +1 -1
- {castor_extractor-0.5.3.dist-info → castor_extractor-0.5.6.dist-info}/METADATA +2 -2
- {castor_extractor-0.5.3.dist-info → castor_extractor-0.5.6.dist-info}/RECORD +85 -85
- {castor_extractor-0.5.3.dist-info → castor_extractor-0.5.6.dist-info}/WHEEL +0 -0
- {castor_extractor-0.5.3.dist-info → castor_extractor-0.5.6.dist-info}/entry_points.txt +0 -0
|
@@ -27,17 +27,17 @@ def iterate_all_data(
|
|
|
27
27
|
|
|
28
28
|
logger.info("Extracting WORKBOOK from Tableau API")
|
|
29
29
|
yield TableauAsset.WORKBOOK, deep_serialize(
|
|
30
|
-
client.fetch(TableauAsset.WORKBOOK)
|
|
30
|
+
client.fetch(TableauAsset.WORKBOOK),
|
|
31
31
|
)
|
|
32
32
|
|
|
33
33
|
logger.info("Extracting PUBLISHED DATASOURCE from Tableau API")
|
|
34
34
|
yield TableauAsset.PUBLISHED_DATASOURCE, deep_serialize(
|
|
35
|
-
client.fetch(TableauAsset.PUBLISHED_DATASOURCE)
|
|
35
|
+
client.fetch(TableauAsset.PUBLISHED_DATASOURCE),
|
|
36
36
|
)
|
|
37
37
|
|
|
38
38
|
logger.info("Extracting PROJECT from Tableau API")
|
|
39
39
|
yield TableauAsset.PROJECT, deep_serialize(
|
|
40
|
-
client.fetch(TableauAsset.PROJECT)
|
|
40
|
+
client.fetch(TableauAsset.PROJECT),
|
|
41
41
|
)
|
|
42
42
|
|
|
43
43
|
logger.info("Extracting USAGE from Tableau API")
|
|
@@ -45,22 +45,22 @@ def iterate_all_data(
|
|
|
45
45
|
|
|
46
46
|
logger.info("Extracting WORKBOOK_TO_DATASOURCE from Tableau API")
|
|
47
47
|
yield TableauAsset.WORKBOOK_TO_DATASOURCE, deep_serialize(
|
|
48
|
-
client.fetch(TableauAsset.WORKBOOK_TO_DATASOURCE)
|
|
48
|
+
client.fetch(TableauAsset.WORKBOOK_TO_DATASOURCE),
|
|
49
49
|
)
|
|
50
50
|
|
|
51
51
|
logger.info("Extracting DATASOURCE from Tableau API")
|
|
52
52
|
yield TableauAsset.DATASOURCE, deep_serialize(
|
|
53
|
-
client.fetch(TableauAsset.DATASOURCE)
|
|
53
|
+
client.fetch(TableauAsset.DATASOURCE),
|
|
54
54
|
)
|
|
55
55
|
|
|
56
56
|
logger.info("Extracting CUSTOM_SQL_TABLE from Tableau API")
|
|
57
57
|
yield TableauAsset.CUSTOM_SQL_TABLE, deep_serialize(
|
|
58
|
-
client.fetch(TableauAsset.CUSTOM_SQL_TABLE)
|
|
58
|
+
client.fetch(TableauAsset.CUSTOM_SQL_TABLE),
|
|
59
59
|
)
|
|
60
60
|
|
|
61
61
|
logger.info("Extracting CUSTOM_SQL_QUERY from Tableau API")
|
|
62
62
|
yield TableauAsset.CUSTOM_SQL_QUERY, deep_serialize(
|
|
63
|
-
client.fetch(TableauAsset.CUSTOM_SQL_QUERY)
|
|
63
|
+
client.fetch(TableauAsset.CUSTOM_SQL_QUERY),
|
|
64
64
|
)
|
|
65
65
|
|
|
66
66
|
logger.info("Extracting FIELD from Tableau API")
|
|
@@ -169,19 +169,19 @@ QUERY_FIELDS: Dict[TableauAsset, QueryInfo] = {
|
|
|
169
169
|
{
|
|
170
170
|
FIELDS: GQLQueryFields.CUSTOM_SQL_TABLE,
|
|
171
171
|
OBJECT_TYPE: TableauGraphqlAsset.CUSTOM_SQL,
|
|
172
|
-
}
|
|
172
|
+
},
|
|
173
173
|
],
|
|
174
174
|
TableauAsset.CUSTOM_SQL_QUERY: [
|
|
175
175
|
{
|
|
176
176
|
FIELDS: GQLQueryFields.CUSTOM_SQL_QUERY,
|
|
177
177
|
OBJECT_TYPE: TableauGraphqlAsset.CUSTOM_SQL,
|
|
178
|
-
}
|
|
178
|
+
},
|
|
179
179
|
],
|
|
180
180
|
TableauAsset.DATASOURCE: [
|
|
181
181
|
{
|
|
182
182
|
FIELDS: GQLQueryFields.DATASOURCE,
|
|
183
183
|
OBJECT_TYPE: TableauGraphqlAsset.DATASOURCE,
|
|
184
|
-
}
|
|
184
|
+
},
|
|
185
185
|
],
|
|
186
186
|
TableauAsset.FIELD: [
|
|
187
187
|
{
|
|
@@ -205,6 +205,6 @@ QUERY_FIELDS: Dict[TableauAsset, QueryInfo] = {
|
|
|
205
205
|
{
|
|
206
206
|
FIELDS: GQLQueryFields.WORKBOOK_TO_DATASOURCE,
|
|
207
207
|
OBJECT_TYPE: TableauGraphqlAsset.WORKBOOK_TO_DATASOURCE,
|
|
208
|
-
}
|
|
208
|
+
},
|
|
209
209
|
],
|
|
210
210
|
}
|
|
@@ -11,7 +11,8 @@ from ....constants import TABLEAU_SERVER_VERSION
|
|
|
11
11
|
from ..utils import KEYS
|
|
12
12
|
|
|
13
13
|
TEST_ASSET_DIR = os.path.join(
|
|
14
|
-
os.path.dirname(__file__),
|
|
14
|
+
os.path.dirname(__file__),
|
|
15
|
+
"../assets/graphql/metadata",
|
|
15
16
|
)
|
|
16
17
|
|
|
17
18
|
METADATA_1 = os.path.join(TEST_ASSET_DIR, "metadata_1_get.json")
|
|
@@ -27,11 +27,14 @@ class AuthTests(unittest.TestCase):
|
|
|
27
27
|
self._client.login()
|
|
28
28
|
|
|
29
29
|
self.assertEqual(
|
|
30
|
-
"eIX6mvFsqyansa4KqEI1UwOpS8ggRs2l",
|
|
30
|
+
"eIX6mvFsqyansa4KqEI1UwOpS8ggRs2l",
|
|
31
|
+
self._client._server.auth_token,
|
|
31
32
|
)
|
|
32
33
|
self.assertEqual(
|
|
33
|
-
"6b7179ba-b82b-4f0f-91ed-812074ac5da6",
|
|
34
|
+
"6b7179ba-b82b-4f0f-91ed-812074ac5da6",
|
|
35
|
+
self._client._server.site_id,
|
|
34
36
|
)
|
|
35
37
|
self.assertEqual(
|
|
36
|
-
"1a96d216-e9b8-497b-a82a-0b899a965e01",
|
|
38
|
+
"1a96d216-e9b8-497b-a82a-0b899a965e01",
|
|
39
|
+
self._client._server.user_id,
|
|
37
40
|
)
|
|
@@ -41,7 +41,8 @@ class UsageTests(unittest.TestCase):
|
|
|
41
41
|
response_xml = f.read().decode("utf-8")
|
|
42
42
|
with requests_mock.mock() as m:
|
|
43
43
|
m.get(
|
|
44
|
-
self.baseurl + "?includeUsageStatistics=true",
|
|
44
|
+
self.baseurl + "?includeUsageStatistics=true",
|
|
45
|
+
text=response_xml,
|
|
45
46
|
)
|
|
46
47
|
results = self._client._fetch_usages(False)
|
|
47
48
|
|
|
@@ -48,7 +48,6 @@ class ExtractionProcessor:
|
|
|
48
48
|
return decorated_execute(query)
|
|
49
49
|
|
|
50
50
|
def _results(self, asset: WarehouseAsset) -> Iterator[dict]:
|
|
51
|
-
|
|
52
51
|
data: Iterator[dict] = iter([])
|
|
53
52
|
queries = self._query_builder.build(asset)
|
|
54
53
|
total = len(queries)
|
|
@@ -65,7 +64,9 @@ class ExtractionProcessor:
|
|
|
65
64
|
return data
|
|
66
65
|
|
|
67
66
|
def extract(
|
|
68
|
-
self,
|
|
67
|
+
self,
|
|
68
|
+
asset: WarehouseAsset,
|
|
69
|
+
skip_existing: bool = False,
|
|
69
70
|
) -> str:
|
|
70
71
|
"""
|
|
71
72
|
Process extraction for the given asset and returns the location of extracted data
|
|
@@ -30,7 +30,7 @@ BIGQUERY_ASSETS: SupportedAssets = OrderedDict(
|
|
|
30
30
|
WarehouseAssetGroup.QUERY: QUERIES_ASSETS,
|
|
31
31
|
WarehouseAssetGroup.VIEW_DDL: VIEWS_ASSETS,
|
|
32
32
|
WarehouseAssetGroup.ROLE: (WarehouseAsset.USER,),
|
|
33
|
-
}
|
|
33
|
+
},
|
|
34
34
|
)
|
|
35
35
|
|
|
36
36
|
|
|
@@ -66,7 +66,8 @@ def extract_all(**kwargs) -> None:
|
|
|
66
66
|
logger.info(f"Available projects: {client.get_projects()}\n")
|
|
67
67
|
|
|
68
68
|
query_builder = BigQueryQueryBuilder(
|
|
69
|
-
regions=client.get_regions(),
|
|
69
|
+
regions=client.get_regions(),
|
|
70
|
+
datasets=client.get_datasets(),
|
|
70
71
|
)
|
|
71
72
|
|
|
72
73
|
storage = LocalStorage(directory=output_directory)
|
|
@@ -61,7 +61,8 @@ class BigQueryQueryBuilder(AbstractQueryBuilder):
|
|
|
61
61
|
sync_tags: Optional[bool] = False,
|
|
62
62
|
):
|
|
63
63
|
super().__init__(
|
|
64
|
-
time_filter=time_filter,
|
|
64
|
+
time_filter=time_filter,
|
|
65
|
+
duplicated=BIGQUERY_DUPLICATES,
|
|
65
66
|
)
|
|
66
67
|
self._regions = regions
|
|
67
68
|
self._datasets = datasets
|
|
@@ -110,7 +111,7 @@ class BigQueryQueryBuilder(AbstractQueryBuilder):
|
|
|
110
111
|
|
|
111
112
|
if asset in REGION_REQUIRED:
|
|
112
113
|
logger.info(
|
|
113
|
-
f"\tWill run queries with following region params: {self._regions}"
|
|
114
|
+
f"\tWill run queries with following region params: {self._regions}",
|
|
114
115
|
)
|
|
115
116
|
return [
|
|
116
117
|
self._format(query, {"project": project, "region": region})
|
|
@@ -119,7 +120,7 @@ class BigQueryQueryBuilder(AbstractQueryBuilder):
|
|
|
119
120
|
|
|
120
121
|
if asset in DATASET_REQUIRED:
|
|
121
122
|
logger.info(
|
|
122
|
-
f"\tWill run queries with following dataset params: {self._datasets}"
|
|
123
|
+
f"\tWill run queries with following dataset params: {self._datasets}",
|
|
123
124
|
)
|
|
124
125
|
return [
|
|
125
126
|
self._format(query, {"project": project, "dataset": dataset})
|
|
@@ -23,12 +23,12 @@ POSTGRES_ASSETS: SupportedAssets = OrderedDict(
|
|
|
23
23
|
WarehouseAsset.GROUP,
|
|
24
24
|
WarehouseAsset.USER,
|
|
25
25
|
),
|
|
26
|
-
}
|
|
26
|
+
},
|
|
27
27
|
)
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
POSTGRES_USER = "CASTOR_POSTGRES_USER"
|
|
31
|
-
POSTGRES_PASSWORD = "CASTOR_POSTGRES_PASSWORD"
|
|
31
|
+
POSTGRES_PASSWORD = "CASTOR_POSTGRES_PASSWORD" # noqa: S105
|
|
32
32
|
POSTGRES_HOST = "CASTOR_POSTGRES_HOST"
|
|
33
33
|
POSTGRES_PORT = "CASTOR_POSTGRES_PORT"
|
|
34
34
|
POSTGRES_DATABASE = "CASTOR_POSTGRES_DATABASE"
|
|
@@ -59,7 +59,9 @@ def extract_all(**kwargs) -> None:
|
|
|
59
59
|
storage = LocalStorage(directory=output_directory)
|
|
60
60
|
|
|
61
61
|
extractor = ExtractionProcessor(
|
|
62
|
-
client=client,
|
|
62
|
+
client=client,
|
|
63
|
+
query_builder=query_builder,
|
|
64
|
+
storage=storage,
|
|
63
65
|
)
|
|
64
66
|
|
|
65
67
|
for group in POSTGRES_ASSETS.values():
|
|
@@ -26,11 +26,11 @@ REDSHIFT_ASSETS: SupportedAssets = OrderedDict(
|
|
|
26
26
|
WarehouseAsset.USER,
|
|
27
27
|
WarehouseAsset.GROUP,
|
|
28
28
|
),
|
|
29
|
-
}
|
|
29
|
+
},
|
|
30
30
|
)
|
|
31
31
|
|
|
32
32
|
REDSHIFT_USER = "CASTOR_REDSHIFT_USER"
|
|
33
|
-
REDSHIFT_PASSWORD = "CASTOR_REDSHIFT_PASSWORD"
|
|
33
|
+
REDSHIFT_PASSWORD = "CASTOR_REDSHIFT_PASSWORD" # noqa: S105
|
|
34
34
|
REDSHIFT_HOST = "CASTOR_REDSHIFT_HOST"
|
|
35
35
|
REDSHIFT_PORT = "CASTOR_REDSHIFT_PORT"
|
|
36
36
|
REDSHIFT_DATABASE = "CASTOR_REDSHIFT_DATABASE"
|
|
@@ -61,7 +61,9 @@ def extract_all(**kwargs) -> None:
|
|
|
61
61
|
storage = LocalStorage(directory=output_directory)
|
|
62
62
|
|
|
63
63
|
extractor = ExtractionProcessor(
|
|
64
|
-
client=client,
|
|
64
|
+
client=client,
|
|
65
|
+
query_builder=query_builder,
|
|
66
|
+
storage=storage,
|
|
65
67
|
)
|
|
66
68
|
|
|
67
69
|
for group in REDSHIFT_ASSETS.values():
|
|
@@ -104,7 +104,7 @@ class SnowflakeClient(SqlalchemyClient):
|
|
|
104
104
|
"""check and set warehouse"""
|
|
105
105
|
if self._warehouse:
|
|
106
106
|
logger.info(
|
|
107
|
-
f"Warehouse was provided in arguments: {self._warehouse}"
|
|
107
|
+
f"Warehouse was provided in arguments: {self._warehouse}",
|
|
108
108
|
)
|
|
109
109
|
_use(connection, UseResource.WAREHOUSE, self._warehouse)
|
|
110
110
|
return
|
|
@@ -48,7 +48,7 @@ def test_build_uri(_):
|
|
|
48
48
|
|
|
49
49
|
@patch.object(SqlalchemyClient, "__init__")
|
|
50
50
|
@patch(
|
|
51
|
-
"source.packages.extractor.castor_extractor.warehouse.snowflake.client._use"
|
|
51
|
+
"source.packages.extractor.castor_extractor.warehouse.snowflake.client._use",
|
|
52
52
|
)
|
|
53
53
|
def test_role(mocked_used, _):
|
|
54
54
|
client = get_snowflake_connection()
|
|
@@ -30,12 +30,12 @@ SNOWFLAKE_ASSETS: SupportedAssets = OrderedDict(
|
|
|
30
30
|
WarehouseAsset.USER,
|
|
31
31
|
),
|
|
32
32
|
WarehouseAssetGroup.LINEAGE: LINEAGE_ASSETS,
|
|
33
|
-
}
|
|
33
|
+
},
|
|
34
34
|
)
|
|
35
35
|
|
|
36
36
|
SNOWFLAKE_ACCOUNT = "CASTOR_SNOWFLAKE_ACCOUNT"
|
|
37
37
|
SNOWFLAKE_USER = "CASTOR_SNOWFLAKE_USER"
|
|
38
|
-
SNOWFLAKE_PASSWORD = "CASTOR_SNOWFLAKE_PASSWORD"
|
|
38
|
+
SNOWFLAKE_PASSWORD = "CASTOR_SNOWFLAKE_PASSWORD" # noqa: S105
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
def _credentials(params: dict) -> dict:
|
|
@@ -69,7 +69,9 @@ def extract_all(**kwargs) -> None:
|
|
|
69
69
|
storage = LocalStorage(directory=output_directory)
|
|
70
70
|
|
|
71
71
|
extractor = ExtractionProcessor(
|
|
72
|
-
client=client,
|
|
72
|
+
client=client,
|
|
73
|
+
query_builder=query_builder,
|
|
74
|
+
storage=storage,
|
|
73
75
|
)
|
|
74
76
|
|
|
75
77
|
for group in SNOWFLAKE_ASSETS.values():
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: castor-extractor
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.6
|
|
4
4
|
Summary: Extract your metadata assets.
|
|
5
5
|
Home-page: https://www.castordoc.com/
|
|
6
6
|
License: EULA
|
|
@@ -22,7 +22,7 @@ Provides-Extra: redshift
|
|
|
22
22
|
Provides-Extra: snowflake
|
|
23
23
|
Provides-Extra: tableau
|
|
24
24
|
Requires-Dist: cachetools (>=4.2.4,<5.0.0)
|
|
25
|
-
Requires-Dist: certifi (==
|
|
25
|
+
Requires-Dist: certifi (==2023.7.22)
|
|
26
26
|
Requires-Dist: charset-normalizer (>=2.0.7,<3.0.0)
|
|
27
27
|
Requires-Dist: click (>=8.0,<8.1)
|
|
28
28
|
Requires-Dist: google-api-core (>=2.1.1,<3.0.0)
|