quollio-core 0.6.5__tar.gz → 0.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {quollio_core-0.6.5 → quollio_core-0.8.0}/PKG-INFO +3 -4
- {quollio_core-0.6.5 → quollio_core-0.8.0}/pyproject.toml +2 -3
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/__init__.py +1 -1
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/bigquery.py +47 -2
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/bricks.py +26 -4
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/profilers/bigquery.py +86 -26
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/profilers/redshift.py +74 -38
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/profilers/stats.py +37 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/redshift.py +48 -5
- quollio_core-0.8.0/quollio_core/repository/cloud_resource_manager.py +25 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/snowflake.py +4 -1
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/teradata.py +25 -2
- {quollio_core-0.6.5 → quollio_core-0.8.0}/LICENSE +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/README.md +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/.gitignore +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/README.md +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/analyses/.gitkeep +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/dbt_project.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/macros/.gitkeep +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/models/quollio_lineage_column_level.sql +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/models/quollio_lineage_column_level.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.sql +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/models/sources.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/packages_hub.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/packages_local.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/profiles/profiles_template.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/seeds/.gitkeep +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/snapshots/.gitkeep +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/README.md +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/analyses/.gitkeep +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/dbt_project.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/macros/.gitkeep +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/macros/materialization/divided_view.sql +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/models/quollio_lineage_table_level.sql +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/models/quollio_lineage_table_level.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/models/quollio_lineage_view_level.sql +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/models/quollio_lineage_view_level.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/models/quollio_sqllineage_sources.sql +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/models/quollio_sqllineage_sources.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/models/quollio_stats_columns.sql +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/models/quollio_stats_columns.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/models/quollio_stats_profiling_columns.sql +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/models/quollio_stats_profiling_columns.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/models/sources.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/packages_hub.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/packages_local.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/profiles/profiles_template.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/seeds/.gitkeep +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/snapshots/.gitkeep +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/seeds/.gitkeep +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/README.md +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/analyses/.gitkeep +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/dbt_project.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/macros/.gitkeep +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/macros/materialization/divided_view.sql +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/macros/materialization/get_imported_databases.sql +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_column_level.sql +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_column_level.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_table_level.sql +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_table_level.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/models/quollio_sqllineage_sources.sql +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/models/quollio_sqllineage_sources.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.sql +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.sql +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/models/sources.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/packages_hub.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/packages_local.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/profiles/profiles_template.yml +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/seeds/.gitkeep +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/snapshots/.gitkeep +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/helper/__init__.py +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/helper/core.py +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/helper/env_default.py +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/helper/log.py +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/helper/log_utils.py +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/models/avroasset.py +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/models/qdc.py +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/profilers/__init__.py +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/profilers/databricks.py +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/profilers/lineage.py +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/profilers/qdc.py +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/profilers/snowflake.py +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/profilers/sqllineage.py +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/profilers/teradata/lineage.py +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/profilers/teradata/stats.py +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/repository/__init__.py +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/repository/bigquery.py +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/repository/databricks.py +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/repository/dbt.py +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/repository/qdc.py +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/repository/redshift.py +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/repository/snowflake.py +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/repository/ssm.py +0 -0
- {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/repository/teradata.py +0 -0
@@ -1,14 +1,13 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: quollio-core
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.8.0
|
4
4
|
Summary: Quollio Core
|
5
5
|
Author-email: quollio-dev <qt.dev@quollio.com>
|
6
6
|
Maintainer-email: RyoAriyama <ryo.arym@gmail.com>, tharuta <35373297+TakumiHaruta@users.noreply.github.com>
|
7
|
-
Requires-Python: >=3.
|
7
|
+
Requires-Python: >=3.9
|
8
8
|
Description-Content-Type: text/markdown
|
9
9
|
Classifier: Programming Language :: Python
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
11
|
-
Classifier: Programming Language :: Python :: 3.8
|
12
11
|
Classifier: Programming Language :: Python :: 3.9
|
13
12
|
Classifier: Programming Language :: Python :: 3.10
|
14
13
|
Classifier: Programming Language :: Python :: 3.11
|
@@ -29,7 +28,7 @@ Requires-Dist: PyYAML==6.0.1
|
|
29
28
|
Requires-Dist: requests==2.31.0
|
30
29
|
Requires-Dist: pyjwt==2.8.0
|
31
30
|
Requires-Dist: redshift-connector==2.0.915
|
32
|
-
Requires-Dist: snowflake-connector-python==3.
|
31
|
+
Requires-Dist: snowflake-connector-python==3.15.0
|
33
32
|
Requires-Dist: databricks-sdk==0.17.0
|
34
33
|
Requires-Dist: databricks-sql-connector==2.9.5
|
35
34
|
Requires-Dist: sqlglot==20.8.0
|
@@ -5,7 +5,7 @@ build-backend = "flit_core.buildapi"
|
|
5
5
|
[project]
|
6
6
|
name = "quollio-core"
|
7
7
|
readme = "README.md"
|
8
|
-
requires-python = ">=3.
|
8
|
+
requires-python = ">=3.9"
|
9
9
|
license = {file = "LICENSE"}
|
10
10
|
authors = [
|
11
11
|
{name = "quollio-dev", email = "qt.dev@quollio.com"},
|
@@ -17,7 +17,6 @@ maintainers = [
|
|
17
17
|
classifiers = [
|
18
18
|
"Programming Language :: Python",
|
19
19
|
"Programming Language :: Python :: 3",
|
20
|
-
"Programming Language :: Python :: 3.8",
|
21
20
|
"Programming Language :: Python :: 3.9",
|
22
21
|
"Programming Language :: Python :: 3.10",
|
23
22
|
"Programming Language :: Python :: 3.11",
|
@@ -40,7 +39,7 @@ dependencies = [
|
|
40
39
|
,"requests==2.31.0"
|
41
40
|
,"pyjwt==2.8.0"
|
42
41
|
,"redshift-connector==2.0.915"
|
43
|
-
,"snowflake-connector-python==3.
|
42
|
+
,"snowflake-connector-python==3.15.0"
|
44
43
|
,"databricks-sdk==0.17.0"
|
45
44
|
,"databricks-sql-connector==2.9.5"
|
46
45
|
,"sqlglot==20.8.0"
|
@@ -3,10 +3,12 @@ import json
|
|
3
3
|
|
4
4
|
from google.auth.credentials import Credentials
|
5
5
|
|
6
|
+
from quollio_core.helper.core import is_valid_domain
|
6
7
|
from quollio_core.helper.env_default import env_default
|
7
8
|
from quollio_core.helper.log_utils import configure_logging, error_handling_decorator, logger
|
8
9
|
from quollio_core.profilers.bigquery import bigquery_table_lineage, bigquery_table_stats
|
9
|
-
from quollio_core.
|
10
|
+
from quollio_core.profilers.qdc import gen_existing_global_id_dict, get_avro_file_content
|
11
|
+
from quollio_core.repository import qdc, ssm
|
10
12
|
from quollio_core.repository.bigquery import BigQueryClient, get_credentials, get_org_id
|
11
13
|
|
12
14
|
|
@@ -30,8 +32,15 @@ def load_lineage(
|
|
30
32
|
org_id: str,
|
31
33
|
credentials: Credentials,
|
32
34
|
qdc_client: qdc.QDCExternalAPIClient,
|
35
|
+
enable_multi_projects: str,
|
33
36
|
) -> None:
|
34
37
|
logger.info("Loading lineage data.")
|
38
|
+
file_content = get_avro_file_content(
|
39
|
+
tenant_id=tenant_id,
|
40
|
+
account_id=org_id,
|
41
|
+
qdc_client=qdc_client,
|
42
|
+
)
|
43
|
+
existing_global_ids = gen_existing_global_id_dict(avro_content=file_content)
|
35
44
|
bigquery_table_lineage(
|
36
45
|
qdc_client=qdc_client,
|
37
46
|
tenant_id=tenant_id,
|
@@ -39,6 +48,8 @@ def load_lineage(
|
|
39
48
|
regions=regions,
|
40
49
|
credentials=credentials,
|
41
50
|
org_id=org_id,
|
51
|
+
existing_global_ids=existing_global_ids,
|
52
|
+
enable_multi_projects=enable_multi_projects,
|
42
53
|
)
|
43
54
|
logger.info("Lineage data loaded successfully.")
|
44
55
|
|
@@ -146,6 +157,27 @@ if __name__ == "__main__":
|
|
146
157
|
help="Comma-separated list of dataplex stats tables - <project_id>.<dataset_id>.<table_id>",
|
147
158
|
)
|
148
159
|
|
160
|
+
parser.add_argument(
|
161
|
+
"--enable_multi_projects",
|
162
|
+
type=str,
|
163
|
+
choices=["ENABLED", "DISABLED"],
|
164
|
+
action=env_default("ENABLE_MULTI_PROJECTS"),
|
165
|
+
default="DISABLED",
|
166
|
+
required=False,
|
167
|
+
help="Whether to enable multi-projects support. If set to 'true', \
|
168
|
+
the script will load lineage and stats from all projects accessible by the credentials. Default is 'false'.",
|
169
|
+
)
|
170
|
+
parser.add_argument(
|
171
|
+
"--external_api_access",
|
172
|
+
type=str,
|
173
|
+
choices=["PUBLIC", "VPC_ENDPOINT"],
|
174
|
+
action=env_default("EXTERNAL_API_ACCESS"),
|
175
|
+
default="PUBLIC",
|
176
|
+
required=False,
|
177
|
+
help="Access method to Quollio API. Default 'PUBLIC'. Choose 'VPC_ENDPOINT'\
|
178
|
+
if you use API Gateway VPC Endpoint, DefaultValue is set to PUBLIC.",
|
179
|
+
)
|
180
|
+
|
149
181
|
args = parser.parse_args()
|
150
182
|
|
151
183
|
# Validate that dataplex_stats_tables is provided if load_stats is in commands
|
@@ -154,9 +186,21 @@ if __name__ == "__main__":
|
|
154
186
|
|
155
187
|
configure_logging(args.log_level)
|
156
188
|
|
189
|
+
api_url = args.api_url
|
190
|
+
if args.external_api_access == "VPC_ENDPOINT":
|
191
|
+
logger.debug("Using VPC Endpoint for Quollio API access")
|
192
|
+
api_url, err = ssm.get_parameter_by_assume_role(args.api_url)
|
193
|
+
if err is not None:
|
194
|
+
logger.error("Fail to ssm.get_parameter_by_assume_role. {err}".format(err=err))
|
195
|
+
raise Exception("Fail to ssm.get_parameter_by_assume_role. {err}".format(err=err))
|
196
|
+
is_domain_valid = is_valid_domain(domain=api_url, domain_type=args.external_api_access)
|
197
|
+
if not is_domain_valid:
|
198
|
+
raise ValueError("The format of quollio API URL is invalid. The URL must end with `.com` or /api.")
|
199
|
+
logger.debug("API URL: %s", api_url)
|
200
|
+
|
157
201
|
credentials = initialize_credentials(args.credentials_json)
|
158
202
|
org_id = initialize_org_id(args.credentials_json)
|
159
|
-
qdc_client = qdc.initialize_qdc_client(
|
203
|
+
qdc_client = qdc.initialize_qdc_client(api_url, args.client_id, args.client_secret)
|
160
204
|
bq_client = initialize_bq_client(credentials, args.project_id)
|
161
205
|
if args.project_id is None:
|
162
206
|
args.project_id = json.loads(args.credentials_json)["project_id"]
|
@@ -170,6 +214,7 @@ if __name__ == "__main__":
|
|
170
214
|
org_id=org_id,
|
171
215
|
credentials=credentials,
|
172
216
|
qdc_client=qdc_client,
|
217
|
+
enable_multi_projects=args.enable_multi_projects,
|
173
218
|
)
|
174
219
|
|
175
220
|
if "load_stats" in args.commands:
|
@@ -3,7 +3,7 @@ import logging
|
|
3
3
|
import os
|
4
4
|
import shutil
|
5
5
|
|
6
|
-
from quollio_core.helper.core import setup_dbt_profile, trim_prefix
|
6
|
+
from quollio_core.helper.core import is_valid_domain, setup_dbt_profile, trim_prefix
|
7
7
|
from quollio_core.helper.env_default import env_default
|
8
8
|
from quollio_core.helper.log import set_log_level
|
9
9
|
from quollio_core.profilers.databricks import (
|
@@ -13,7 +13,7 @@ from quollio_core.profilers.databricks import (
|
|
13
13
|
)
|
14
14
|
from quollio_core.profilers.stats import get_column_stats_items
|
15
15
|
from quollio_core.repository import databricks as db
|
16
|
-
from quollio_core.repository import dbt, qdc
|
16
|
+
from quollio_core.repository import dbt, qdc, ssm
|
17
17
|
|
18
18
|
logger = logging.getLogger(__name__)
|
19
19
|
|
@@ -256,6 +256,16 @@ if __name__ == "__main__":
|
|
256
256
|
required=False,
|
257
257
|
help="Whether to ingest column lineage into QDIC or not. Default value is False",
|
258
258
|
)
|
259
|
+
parser.add_argument(
|
260
|
+
"--external_api_access",
|
261
|
+
type=str,
|
262
|
+
choices=["PUBLIC", "VPC_ENDPOINT"],
|
263
|
+
action=env_default("EXTERNAL_API_ACCESS"),
|
264
|
+
default="PUBLIC",
|
265
|
+
required=False,
|
266
|
+
help="Access method to Quollio API. Default 'PUBLIC'. Choose 'VPC_ENDPOINT'\
|
267
|
+
if you use API Gateway VPC Endpoint, DefaultValue is set to PUBLIC.",
|
268
|
+
)
|
259
269
|
|
260
270
|
stats_items = get_column_stats_items()
|
261
271
|
parser.add_argument(
|
@@ -294,9 +304,21 @@ if __name__ == "__main__":
|
|
294
304
|
dbt_macro_source=args.dbt_macro_source,
|
295
305
|
)
|
296
306
|
|
307
|
+
api_url = args.api_url
|
308
|
+
if args.external_api_access == "VPC_ENDPOINT":
|
309
|
+
logger.debug("Using VPC Endpoint for Quollio API access")
|
310
|
+
api_url, err = ssm.get_parameter_by_assume_role(args.api_url)
|
311
|
+
if err is not None:
|
312
|
+
logger.error("Fail to ssm.get_parameter_by_assume_role. {err}".format(err=err))
|
313
|
+
raise Exception("Fail to ssm.get_parameter_by_assume_role. {err}".format(err=err))
|
314
|
+
is_domain_valid = is_valid_domain(domain=api_url, domain_type=args.external_api_access)
|
315
|
+
if not is_domain_valid:
|
316
|
+
raise ValueError("The format of quollio API URL is invalid. The URL must end with `.com` or /api.")
|
317
|
+
logger.debug("API URL: %s", api_url)
|
318
|
+
|
297
319
|
if "load_lineage" in args.commands:
|
298
320
|
qdc_client = qdc.QDCExternalAPIClient(
|
299
|
-
base_url=
|
321
|
+
base_url=api_url, client_id=args.client_id, client_secret=args.client_secret
|
300
322
|
)
|
301
323
|
load_lineage(
|
302
324
|
conn=conn,
|
@@ -308,7 +330,7 @@ if __name__ == "__main__":
|
|
308
330
|
|
309
331
|
if "load_stats" in args.commands:
|
310
332
|
qdc_client = qdc.QDCExternalAPIClient(
|
311
|
-
base_url=
|
333
|
+
base_url=api_url, client_id=args.client_id, client_secret=args.client_secret
|
312
334
|
)
|
313
335
|
databricks_column_stats(
|
314
336
|
conn=conn,
|
@@ -1,15 +1,25 @@
|
|
1
|
+
import io
|
2
|
+
import os
|
1
3
|
from typing import Dict, List
|
2
4
|
|
5
|
+
from fastavro import writer
|
3
6
|
from google.auth.credentials import Credentials
|
4
7
|
|
8
|
+
from quollio_core.helper.core import new_global_id
|
5
9
|
from quollio_core.helper.log_utils import error_handling_decorator, logger
|
6
|
-
from quollio_core.
|
10
|
+
from quollio_core.models.avroasset import AvroAsset
|
11
|
+
from quollio_core.models.qdc import GetImportURLRequest
|
12
|
+
from quollio_core.profilers.lineage import (
|
13
|
+
gen_table_avro_lineage_payload,
|
14
|
+
gen_table_lineage_payload,
|
15
|
+
parse_bigquery_table_lineage,
|
16
|
+
)
|
7
17
|
from quollio_core.profilers.stats import gen_table_stats_payload
|
8
18
|
from quollio_core.repository import qdc
|
9
19
|
from quollio_core.repository.bigquery import BigQueryClient, GCPLineageClient, get_entitiy_reference, get_search_request
|
20
|
+
from quollio_core.repository.cloud_resource_manager import CloudResourceManagerClient
|
10
21
|
|
11
22
|
|
12
|
-
@error_handling_decorator
|
13
23
|
def bigquery_table_lineage(
|
14
24
|
qdc_client: qdc.QDCExternalAPIClient,
|
15
25
|
tenant_id: str,
|
@@ -17,34 +27,84 @@ def bigquery_table_lineage(
|
|
17
27
|
regions: list,
|
18
28
|
org_id: str,
|
19
29
|
credentials: Credentials,
|
30
|
+
existing_global_ids: Dict[str, bool],
|
31
|
+
enable_multi_projects: str = "DISABLED",
|
20
32
|
) -> None:
|
21
33
|
lineage_client = GCPLineageClient(credentials)
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
34
|
+
crm_client = CloudResourceManagerClient(credentials)
|
35
|
+
|
36
|
+
target_project_ids = []
|
37
|
+
if enable_multi_projects == "ENABLED":
|
38
|
+
try:
|
39
|
+
target_projects = crm_client.list_projects()
|
40
|
+
except Exception as e:
|
41
|
+
raise Exception(f"ListProjects by cloud resource manager failed. Err. {str(e)}")
|
42
|
+
|
43
|
+
for target_project in target_projects["projects"]:
|
44
|
+
if target_project is None:
|
45
|
+
logger.warning("projects.Projects returns None. Proceed to loop project value")
|
46
|
+
continue
|
47
|
+
|
48
|
+
target_project_id = target_project.get("projectId", "")
|
49
|
+
if target_project_id == "":
|
50
|
+
logger.warning("projects.Projects is empty string. Proceed to loop project value")
|
51
|
+
continue
|
52
|
+
|
53
|
+
target_project_ids.append(target_project_id)
|
54
|
+
else:
|
55
|
+
target_project_ids.append(project_id)
|
56
|
+
|
57
|
+
update_table_lineage_inputs = []
|
58
|
+
for target_project_id in target_project_ids:
|
59
|
+
bq_client = BigQueryClient(credentials, target_project_id)
|
60
|
+
datasets = bq_client.list_dataset_ids()
|
61
|
+
all_tables = generate_table_list(bq_client, datasets)
|
62
|
+
lineage_links = generate_lineage_links(all_tables, lineage_client, target_project_id, regions)
|
63
|
+
lineage_links = parse_bigquery_table_lineage(lineage_links)
|
64
|
+
logger.debug("The following resources will be ingested. %s", lineage_links)
|
65
|
+
|
66
|
+
update_table_lineage_input = gen_table_lineage_payload(
|
67
|
+
tenant_id=tenant_id, endpoint=org_id, tables=lineage_links
|
40
68
|
)
|
41
|
-
|
42
|
-
|
43
|
-
|
69
|
+
update_table_lineage_input = gen_table_avro_lineage_payload(
|
70
|
+
tenant_id=tenant_id,
|
71
|
+
endpoint=org_id,
|
72
|
+
tables=lineage_links,
|
73
|
+
existing_global_ids=existing_global_ids,
|
44
74
|
)
|
45
|
-
|
46
|
-
|
47
|
-
|
75
|
+
update_table_lineage_inputs.extend(update_table_lineage_input)
|
76
|
+
|
77
|
+
stack_name = os.getenv("CF_STACK")
|
78
|
+
import_req = GetImportURLRequest(
|
79
|
+
service_name="bigquery",
|
80
|
+
source_name=stack_name,
|
81
|
+
file_name="{name}.avro".format(name=stack_name),
|
82
|
+
override_logical_name="false",
|
83
|
+
update_mode="partial",
|
84
|
+
)
|
85
|
+
datasource_id = new_global_id(tenant_id=tenant_id, cluster_id=org_id, data_id="", data_type="data_source")
|
86
|
+
logger.debug("Datasource id: {dsrc_id}".format(dsrc_id=datasource_id))
|
87
|
+
|
88
|
+
import_res = qdc_client.get_import_url(datasource_id=datasource_id, payload=import_req)
|
89
|
+
if import_res is None:
|
90
|
+
logger.error("get_import_url failed. Please retry `load_lineage` again")
|
91
|
+
return
|
92
|
+
logger.debug("ImportResponse: {res}".format(res=import_res))
|
93
|
+
|
94
|
+
avro_schema = AvroAsset.avro_schema_to_python()
|
95
|
+
|
96
|
+
buffer = io.BytesIO()
|
97
|
+
writer(buffer, avro_schema, update_table_lineage_inputs)
|
98
|
+
|
99
|
+
res = qdc_client.upload_file(
|
100
|
+
url=import_res.location,
|
101
|
+
metadata=import_res.datasource_metadata_response_body,
|
102
|
+
buffer=buffer.getbuffer().tobytes(),
|
103
|
+
)
|
104
|
+
|
105
|
+
if res == 200:
|
106
|
+
logger.info("Upload table lineage is finished.")
|
107
|
+
return
|
48
108
|
|
49
109
|
|
50
110
|
@error_handling_decorator
|
@@ -1,10 +1,17 @@
|
|
1
|
+
import io
|
1
2
|
import logging
|
2
|
-
|
3
|
+
import os
|
4
|
+
from typing import Dict, List
|
3
5
|
|
4
|
-
from
|
6
|
+
from fastavro import writer
|
7
|
+
|
8
|
+
from quollio_core.helper.core import new_global_id
|
9
|
+
from quollio_core.models.avroasset import AvroAsset
|
10
|
+
from quollio_core.models.qdc import GetImportURLRequest
|
11
|
+
from quollio_core.profilers.lineage import gen_table_avro_lineage_payload, gen_table_lineage_payload_inputs
|
5
12
|
from quollio_core.profilers.sqllineage import SQLLineage
|
6
13
|
from quollio_core.profilers.stats import (
|
7
|
-
|
14
|
+
gen_table_stats_avro_payload_from_tuple,
|
8
15
|
get_is_target_stats_items,
|
9
16
|
render_sql_for_stats,
|
10
17
|
)
|
@@ -18,6 +25,7 @@ def redshift_table_level_lineage(
|
|
18
25
|
qdc_client: qdc.QDCExternalAPIClient,
|
19
26
|
tenant_id: str,
|
20
27
|
dbt_table_name: str,
|
28
|
+
existing_global_ids: Dict[str, bool],
|
21
29
|
) -> None:
|
22
30
|
with redshift.RedshiftQueryExecutor(config=conn) as redshift_executor:
|
23
31
|
results = redshift_executor.get_query_results(
|
@@ -34,28 +42,39 @@ def redshift_table_level_lineage(
|
|
34
42
|
)
|
35
43
|
lineage_payload_inputs = gen_table_lineage_payload_inputs(input_data=results)
|
36
44
|
|
37
|
-
update_table_lineage_inputs =
|
45
|
+
update_table_lineage_inputs = gen_table_avro_lineage_payload(
|
38
46
|
tenant_id=tenant_id,
|
39
47
|
endpoint=conn.host,
|
40
48
|
tables=lineage_payload_inputs,
|
49
|
+
existing_global_ids=existing_global_ids,
|
50
|
+
)
|
51
|
+
stack_name = os.getenv("CF_STACK")
|
52
|
+
import_req = GetImportURLRequest(
|
53
|
+
service_name="redshift",
|
54
|
+
source_name=stack_name,
|
55
|
+
file_name="{name}.avro".format(name=stack_name),
|
56
|
+
override_logical_name="false",
|
57
|
+
update_mode="partial",
|
41
58
|
)
|
59
|
+
datasource_id = new_global_id(tenant_id=tenant_id, cluster_id=conn.host, data_id="", data_type="data_source")
|
60
|
+
logger.debug("Datasource id: {dsrc_id}".format(dsrc_id=datasource_id))
|
61
|
+
import_res = qdc_client.get_import_url(datasource_id=datasource_id, payload=import_req)
|
62
|
+
if import_res is None:
|
63
|
+
logger.error("get_import_url failed. Please retry `load_lineage` again")
|
64
|
+
return
|
65
|
+
logger.debug("ImportResponse: {res}".format(res=import_res))
|
42
66
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
payload=update_table_lineage_input.upstreams.as_dict(),
|
55
|
-
)
|
56
|
-
if status_code == 200:
|
57
|
-
req_count += 1
|
58
|
-
logger.info(f"Generating table lineage is finished. {req_count} lineages are ingested.")
|
67
|
+
avro_schema = AvroAsset.avro_schema_to_python()
|
68
|
+
|
69
|
+
buffer = io.BytesIO()
|
70
|
+
writer(buffer, avro_schema, update_table_lineage_inputs)
|
71
|
+
res = qdc_client.upload_file(
|
72
|
+
url=import_res.location,
|
73
|
+
metadata=import_res.datasource_metadata_response_body,
|
74
|
+
buffer=buffer.getbuffer().tobytes(),
|
75
|
+
)
|
76
|
+
if res == 200:
|
77
|
+
logger.info("Upload table lineage is finished.")
|
59
78
|
return
|
60
79
|
|
61
80
|
|
@@ -82,6 +101,7 @@ def redshift_table_stats(
|
|
82
101
|
qdc_client: qdc.QDCExternalAPIClient,
|
83
102
|
tenant_id: str,
|
84
103
|
stats_items: List[str],
|
104
|
+
existing_global_ids: Dict[str, bool],
|
85
105
|
) -> None:
|
86
106
|
is_aggregate_items = get_is_target_stats_items(stats_items=stats_items)
|
87
107
|
with redshift.RedshiftQueryExecutor(config=conn) as redshift_executor:
|
@@ -92,7 +112,7 @@ def redshift_table_stats(
|
|
92
112
|
stats_views = redshift_executor.get_query_results(query=stats_query)
|
93
113
|
logger.info("Found %s for table statistics.", len(stats_views))
|
94
114
|
|
95
|
-
|
115
|
+
update_stats_inputs = list()
|
96
116
|
for stats_view in stats_views:
|
97
117
|
table_fqn = "{catalog}.{schema}.{table}".format(
|
98
118
|
catalog=stats_view[0], schema=stats_view[1], table=stats_view[2]
|
@@ -100,23 +120,39 @@ def redshift_table_stats(
|
|
100
120
|
stats_query = render_sql_for_stats(is_aggregate_items=is_aggregate_items, table_fqn=table_fqn)
|
101
121
|
logger.debug(f"The following sql will be fetched to retrieve stats values. {stats_query}")
|
102
122
|
stats_result = redshift_executor.get_query_results(query=stats_query)
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
123
|
+
payload = gen_table_stats_avro_payload_from_tuple(
|
124
|
+
tenant_id=tenant_id, endpoint=conn.host, stats=stats_result, existing_global_ids=existing_global_ids
|
125
|
+
)
|
126
|
+
update_stats_inputs += payload
|
127
|
+
|
128
|
+
stack_name = os.getenv("CF_STACK")
|
129
|
+
import_req = GetImportURLRequest(
|
130
|
+
service_name="redshift",
|
131
|
+
source_name=stack_name,
|
132
|
+
file_name="{name}.avro".format(name=stack_name),
|
133
|
+
override_logical_name="false",
|
134
|
+
update_mode="partial",
|
135
|
+
)
|
136
|
+
datasource_id = new_global_id(tenant_id=tenant_id, cluster_id=conn.host, data_id="", data_type="data_source")
|
137
|
+
logger.debug("Datasource id: {dsrc_id}".format(dsrc_id=datasource_id))
|
138
|
+
import_res = qdc_client.get_import_url(datasource_id=datasource_id, payload=import_req)
|
139
|
+
if import_res is None:
|
140
|
+
logger.error("get_import_url failed. Please retry load_stats again")
|
141
|
+
return
|
142
|
+
logger.debug("ImportResponse: {res}".format(res=import_res))
|
143
|
+
|
144
|
+
avro_schema = AvroAsset.avro_schema_to_python()
|
145
|
+
|
146
|
+
buffer = io.BytesIO()
|
147
|
+
writer(buffer, avro_schema, update_stats_inputs)
|
148
|
+
res = qdc_client.upload_file(
|
149
|
+
url=import_res.location,
|
150
|
+
metadata=import_res.datasource_metadata_response_body,
|
151
|
+
buffer=buffer.getbuffer().tobytes(),
|
152
|
+
)
|
153
|
+
if res == 200:
|
154
|
+
logger.info("Generating table stats is finished.")
|
155
|
+
|
120
156
|
return
|
121
157
|
|
122
158
|
|
@@ -147,6 +147,43 @@ def gen_table_stats_payload(tenant_id: str, endpoint: str, stats: List[Dict[str,
|
|
147
147
|
return payloads
|
148
148
|
|
149
149
|
|
150
|
+
def gen_table_stats_avro_payload_from_tuple(
|
151
|
+
tenant_id: str, endpoint: str, stats: Tuple[List[str]], existing_global_ids: Dict[str, bool]
|
152
|
+
) -> List[Dict[str, str]]:
|
153
|
+
payloads = list()
|
154
|
+
for stat in stats:
|
155
|
+
db_name, schema_name, table_name, column_name = stat[:4]
|
156
|
+
|
157
|
+
global_id_arg = "{db}{schema}{table}{column}".format(
|
158
|
+
db=db_name, schema=schema_name, table=table_name, column=column_name
|
159
|
+
)
|
160
|
+
table_global_id = new_global_id(
|
161
|
+
tenant_id=tenant_id, cluster_id=endpoint, data_id=global_id_arg, data_type="column"
|
162
|
+
)
|
163
|
+
|
164
|
+
if existing_global_ids.get(table_global_id) is not True:
|
165
|
+
continue
|
166
|
+
|
167
|
+
avro_assets = AvroAsset(
|
168
|
+
id=table_global_id,
|
169
|
+
object_type="column",
|
170
|
+
parents=[db_name, schema_name, table_name],
|
171
|
+
name=column_name,
|
172
|
+
stats_max=convert_value_type(stat[4], True),
|
173
|
+
stats_min=convert_value_type(stat[5], True),
|
174
|
+
stats_mean=convert_value_type(stat[8], True),
|
175
|
+
stats_median=convert_value_type(stat[9], True),
|
176
|
+
stats_mode=convert_value_type(stat[10], True),
|
177
|
+
stats_stddev=convert_value_type(stat[11], True),
|
178
|
+
stats_number_of_null=convert_value_type(stat[6], True),
|
179
|
+
stats_number_of_unique=convert_value_type(stat[7], True),
|
180
|
+
)
|
181
|
+
|
182
|
+
payloads.append(avro_assets.to_dict())
|
183
|
+
|
184
|
+
return payloads
|
185
|
+
|
186
|
+
|
150
187
|
def gen_table_stats_payload_from_tuple(
|
151
188
|
tenant_id: str, endpoint: str, stats: Tuple[List[str]]
|
152
189
|
) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
|
@@ -3,16 +3,17 @@ import logging
|
|
3
3
|
import os
|
4
4
|
import shutil
|
5
5
|
|
6
|
-
from quollio_core.helper.core import setup_dbt_profile
|
6
|
+
from quollio_core.helper.core import is_valid_domain, setup_dbt_profile
|
7
7
|
from quollio_core.helper.env_default import env_default
|
8
8
|
from quollio_core.helper.log import set_log_level
|
9
|
+
from quollio_core.profilers.qdc import gen_existing_global_id_dict, get_avro_file_content
|
9
10
|
from quollio_core.profilers.redshift import (
|
10
11
|
redshift_table_level_lineage,
|
11
12
|
redshift_table_level_sqllineage,
|
12
13
|
redshift_table_stats,
|
13
14
|
)
|
14
15
|
from quollio_core.profilers.stats import get_column_stats_items
|
15
|
-
from quollio_core.repository import dbt, qdc, redshift
|
16
|
+
from quollio_core.repository import dbt, qdc, redshift, ssm
|
16
17
|
|
17
18
|
logger = logging.getLogger(__name__)
|
18
19
|
|
@@ -84,11 +85,20 @@ def load_lineage(
|
|
84
85
|
tenant_id: str,
|
85
86
|
) -> None:
|
86
87
|
logger.info("Generate redshift table to table lineage.")
|
88
|
+
|
89
|
+
file_content = get_avro_file_content(
|
90
|
+
tenant_id=tenant_id,
|
91
|
+
account_id=conn.host,
|
92
|
+
qdc_client=qdc_client,
|
93
|
+
)
|
94
|
+
existing_global_ids = gen_existing_global_id_dict(avro_content=file_content)
|
95
|
+
|
87
96
|
redshift_table_level_lineage(
|
88
97
|
conn=conn,
|
89
98
|
qdc_client=qdc_client,
|
90
99
|
tenant_id=tenant_id,
|
91
100
|
dbt_table_name="quollio_lineage_table_level",
|
101
|
+
existing_global_ids=existing_global_ids,
|
92
102
|
)
|
93
103
|
|
94
104
|
logger.info("Generate redshift view level lineage.")
|
@@ -97,6 +107,7 @@ def load_lineage(
|
|
97
107
|
qdc_client=qdc_client,
|
98
108
|
tenant_id=tenant_id,
|
99
109
|
dbt_table_name="quollio_lineage_view_level",
|
110
|
+
existing_global_ids=existing_global_ids,
|
100
111
|
)
|
101
112
|
|
102
113
|
logger.info("Lineage data is successfully loaded.")
|
@@ -115,12 +126,20 @@ def load_stats(
|
|
115
126
|
if stats_items is None:
|
116
127
|
raise ValueError("No stats items are not selected. Please specify any value to `stats_items` param.")
|
117
128
|
|
129
|
+
file_content = get_avro_file_content(
|
130
|
+
tenant_id=tenant_id,
|
131
|
+
account_id=conn.host,
|
132
|
+
qdc_client=qdc_client,
|
133
|
+
)
|
134
|
+
existing_global_ids = gen_existing_global_id_dict(avro_content=file_content)
|
135
|
+
|
118
136
|
logger.info("The following values will be aggregated. {stats_items}".format(stats_items=stats_items))
|
119
137
|
redshift_table_stats(
|
120
138
|
conn=conn,
|
121
139
|
qdc_client=qdc_client,
|
122
140
|
tenant_id=tenant_id,
|
123
141
|
stats_items=stats_items,
|
142
|
+
existing_global_ids=existing_global_ids,
|
124
143
|
)
|
125
144
|
|
126
145
|
logger.info("Stats data is successfully loaded.")
|
@@ -252,6 +271,7 @@ if __name__ == "__main__":
|
|
252
271
|
type=str,
|
253
272
|
choices=["debug", "info", "warn", "error", "none"],
|
254
273
|
action=env_default("LOG_LEVEL"),
|
274
|
+
default="info",
|
255
275
|
required=False,
|
256
276
|
help="The log level for dbt commands. Default value is info",
|
257
277
|
)
|
@@ -285,6 +305,16 @@ if __name__ == "__main__":
|
|
285
305
|
required=False,
|
286
306
|
help="The client secrete that is created on Quollio console to let clients access Quollio External API",
|
287
307
|
)
|
308
|
+
parser.add_argument(
|
309
|
+
"--external_api_access",
|
310
|
+
type=str,
|
311
|
+
choices=["PUBLIC", "VPC_ENDPOINT"],
|
312
|
+
action=env_default("EXTERNAL_API_ACCESS"),
|
313
|
+
default="PUBLIC",
|
314
|
+
required=False,
|
315
|
+
help="Access method to Quollio API. Default 'PUBLIC'. Choose 'VPC_ENDPOINT'\
|
316
|
+
if you use API Gateway VPC Endpoint, DefaultValue is set to PUBLIC.",
|
317
|
+
)
|
288
318
|
|
289
319
|
stats_items = get_column_stats_items()
|
290
320
|
parser.add_argument(
|
@@ -323,11 +353,24 @@ if __name__ == "__main__":
|
|
323
353
|
log_level=args.log_level,
|
324
354
|
dbt_macro_source=args.dbt_macro_source,
|
325
355
|
)
|
356
|
+
|
357
|
+
api_url = args.api_url
|
358
|
+
if args.external_api_access == "VPC_ENDPOINT":
|
359
|
+
logger.debug("Using VPC Endpoint for Quollio API access")
|
360
|
+
api_url, err = ssm.get_parameter_by_assume_role(args.api_url)
|
361
|
+
if err is not None:
|
362
|
+
logger.error("Fail to ssm.get_parameter_by_assume_role. {err}".format(err=err))
|
363
|
+
raise Exception("Fail to ssm.get_parameter_by_assume_role. {err}".format(err=err))
|
364
|
+
is_domain_valid = is_valid_domain(domain=api_url, domain_type=args.external_api_access)
|
365
|
+
if not is_domain_valid:
|
366
|
+
raise ValueError("The format of quollio API URL is invalid. The URL must end with `.com` or /api.")
|
367
|
+
logger.debug("API URL: %s", api_url)
|
368
|
+
|
326
369
|
if "load_lineage" in args.commands:
|
327
370
|
qdc_client = qdc.QDCExternalAPIClient(
|
328
371
|
client_id=args.client_id,
|
329
372
|
client_secret=args.client_secret,
|
330
|
-
base_url=
|
373
|
+
base_url=api_url,
|
331
374
|
)
|
332
375
|
load_lineage(
|
333
376
|
conn=conn,
|
@@ -338,7 +381,7 @@ if __name__ == "__main__":
|
|
338
381
|
qdc_client = qdc.QDCExternalAPIClient(
|
339
382
|
client_id=args.client_id,
|
340
383
|
client_secret=args.client_secret,
|
341
|
-
base_url=
|
384
|
+
base_url=api_url,
|
342
385
|
)
|
343
386
|
load_stats(
|
344
387
|
conn=conn,
|
@@ -348,7 +391,7 @@ if __name__ == "__main__":
|
|
348
391
|
)
|
349
392
|
if "load_sqllineage" in args.commands:
|
350
393
|
qdc_client = qdc.QDCExternalAPIClient(
|
351
|
-
base_url=
|
394
|
+
base_url=api_url,
|
352
395
|
client_id=args.client_id,
|
353
396
|
client_secret=args.client_secret,
|
354
397
|
)
|
@@ -0,0 +1,25 @@
|
|
1
|
+
from google.oauth2.service_account import Credentials
|
2
|
+
from googleapiclient.discovery import build
|
3
|
+
|
4
|
+
|
5
|
+
class CloudResourceManagerClient:
|
6
|
+
"""Client to interact with the Cloud Resource Manager API."""
|
7
|
+
|
8
|
+
def __init__(self, credentials: Credentials) -> None:
|
9
|
+
"""Initialize the Cloud Resource Manager client with provided credentials."""
|
10
|
+
self.client = self.__initialize(credentials=credentials)
|
11
|
+
|
12
|
+
def __initialize(self, credentials: Credentials):
|
13
|
+
return build("cloudresourcemanager", "v1", credentials=credentials)
|
14
|
+
|
15
|
+
def list_projects(self):
|
16
|
+
"""List all projects accessible with the current credentials."""
|
17
|
+
request = self.client.projects().list()
|
18
|
+
response = request.execute()
|
19
|
+
return response
|
20
|
+
|
21
|
+
def get_project(self, project_id: str):
|
22
|
+
"""Get a specific project by project ID."""
|
23
|
+
request = self.client.projects().get(projectId=project_id)
|
24
|
+
response = request.execute()
|
25
|
+
return response
|
@@ -312,6 +312,7 @@ if __name__ == "__main__":
|
|
312
312
|
type=str,
|
313
313
|
choices=["debug", "info", "warn", "error", "none"],
|
314
314
|
action=env_default("LOG_LEVEL"),
|
315
|
+
default="info",
|
315
316
|
required=False,
|
316
317
|
help="The log level for dbt commands. Default value is info",
|
317
318
|
)
|
@@ -442,6 +443,7 @@ if __name__ == "__main__":
|
|
442
443
|
)
|
443
444
|
api_url = args.api_url
|
444
445
|
if args.external_api_access == "VPC_ENDPOINT":
|
446
|
+
logger.debug("Using VPC Endpoint for Quollio API access")
|
445
447
|
api_url, err = ssm.get_parameter_by_assume_role(args.api_url)
|
446
448
|
if err is not None:
|
447
449
|
logger.error("Fail to ssm.get_parameter_by_assume_role. {err}".format(err=err))
|
@@ -449,7 +451,8 @@ if __name__ == "__main__":
|
|
449
451
|
|
450
452
|
is_domain_valid = is_valid_domain(domain=api_url, domain_type=args.external_api_access)
|
451
453
|
if not is_domain_valid:
|
452
|
-
raise ValueError("The format of quollio API URL is invalid. The URL must end with `.com`")
|
454
|
+
raise ValueError("The format of quollio API URL is invalid. The URL must end with `.com` or /api.")
|
455
|
+
logger.debug("API URL: %s", api_url)
|
453
456
|
|
454
457
|
if "load_lineage" in args.commands:
|
455
458
|
qdc_client = qdc.QDCExternalAPIClient(
|
@@ -1,12 +1,13 @@
|
|
1
1
|
import argparse
|
2
2
|
import json
|
3
3
|
|
4
|
+
from quollio_core.helper.core import is_valid_domain
|
4
5
|
from quollio_core.helper.env_default import env_default
|
5
6
|
from quollio_core.helper.log_utils import configure_logging, error_handling_decorator, logger
|
6
7
|
from quollio_core.profilers.stats import get_column_stats_items
|
7
8
|
from quollio_core.profilers.teradata.lineage import load_lineage
|
8
9
|
from quollio_core.profilers.teradata.stats import load_stats
|
9
|
-
from quollio_core.repository import qdc
|
10
|
+
from quollio_core.repository import qdc, ssm
|
10
11
|
from quollio_core.repository import teradata as teradata_repo
|
11
12
|
|
12
13
|
DEFAULT_SYSTEM_DATABASES = [
|
@@ -197,6 +198,16 @@ def main() -> None:
|
|
197
198
|
help="Name of the Teradata system database.\
|
198
199
|
Default is DBC",
|
199
200
|
)
|
201
|
+
parser.add_argument(
|
202
|
+
"--external_api_access",
|
203
|
+
type=str,
|
204
|
+
choices=["PUBLIC", "VPC_ENDPOINT"],
|
205
|
+
action=env_default("EXTERNAL_API_ACCESS"),
|
206
|
+
default="PUBLIC",
|
207
|
+
required=False,
|
208
|
+
help="Access method to Quollio API. Default 'PUBLIC'. Choose 'VPC_ENDPOINT'\
|
209
|
+
if you use API Gateway VPC Endpoint, DefaultValue is set to PUBLIC.",
|
210
|
+
)
|
200
211
|
|
201
212
|
args = parser.parse_args()
|
202
213
|
|
@@ -217,7 +228,19 @@ def main() -> None:
|
|
217
228
|
additional_params = {}
|
218
229
|
|
219
230
|
logger.info("Initializing QDC client")
|
220
|
-
|
231
|
+
api_url = args.api_url
|
232
|
+
if args.external_api_access == "VPC_ENDPOINT":
|
233
|
+
logger.debug("Using VPC Endpoint for Quollio API access")
|
234
|
+
api_url, err = ssm.get_parameter_by_assume_role(args.api_url)
|
235
|
+
if err is not None:
|
236
|
+
logger.error("Fail to ssm.get_parameter_by_assume_role. {err}".format(err=err))
|
237
|
+
raise Exception("Fail to ssm.get_parameter_by_assume_role. {err}".format(err=err))
|
238
|
+
is_domain_valid = is_valid_domain(domain=api_url, domain_type=args.external_api_access)
|
239
|
+
if not is_domain_valid:
|
240
|
+
raise ValueError("The format of quollio API URL is invalid. The URL must end with `.com` or /api.")
|
241
|
+
|
242
|
+
logger.debug("API URL: %s", api_url)
|
243
|
+
qdc_client = qdc.initialize_qdc_client(api_url, args.client_id, args.client_secret)
|
221
244
|
|
222
245
|
logger.info("Initializing Teradata client")
|
223
246
|
config = teradata_repo.TeradataConfig.from_dict(
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/analyses/.gitkeep
RENAMED
File without changes
|
{quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/dbt_project.yml
RENAMED
File without changes
|
{quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/macros/.gitkeep
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/models/sources.yml
RENAMED
File without changes
|
{quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/packages_hub.yml
RENAMED
File without changes
|
{quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/packages_local.yml
RENAMED
File without changes
|
File without changes
|
{quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/seeds/.gitkeep
RENAMED
File without changes
|
{quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/snapshots/.gitkeep
RENAMED
File without changes
|
File without changes
|
{quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/analyses/.gitkeep
RENAMED
File without changes
|
{quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/dbt_project.yml
RENAMED
File without changes
|
{quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/macros/.gitkeep
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/models/sources.yml
RENAMED
File without changes
|
{quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/packages_hub.yml
RENAMED
File without changes
|
{quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/packages_local.yml
RENAMED
File without changes
|
File without changes
|
File without changes
|
{quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/snapshots/.gitkeep
RENAMED
File without changes
|
File without changes
|
File without changes
|
{quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/analyses/.gitkeep
RENAMED
File without changes
|
{quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/dbt_project.yml
RENAMED
File without changes
|
{quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/macros/.gitkeep
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/models/sources.yml
RENAMED
File without changes
|
{quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/packages_hub.yml
RENAMED
File without changes
|
{quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/packages_local.yml
RENAMED
File without changes
|
File without changes
|
{quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/seeds/.gitkeep
RENAMED
File without changes
|
{quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/snapshots/.gitkeep
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|