quollio-core 0.6.5__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quollio_core/__init__.py +1 -1
- quollio_core/bigquery.py +22 -0
- quollio_core/profilers/bigquery.py +86 -26
- quollio_core/profilers/redshift.py +74 -38
- quollio_core/profilers/stats.py +37 -0
- quollio_core/redshift.py +46 -5
- quollio_core/repository/cloud_resource_manager.py +25 -0
- quollio_core/snowflake.py +1 -0
- {quollio_core-0.6.5.dist-info → quollio_core-0.7.0.dist-info}/METADATA +1 -1
- {quollio_core-0.6.5.dist-info → quollio_core-0.7.0.dist-info}/RECORD +12 -11
- {quollio_core-0.6.5.dist-info → quollio_core-0.7.0.dist-info}/WHEEL +0 -0
- {quollio_core-0.6.5.dist-info → quollio_core-0.7.0.dist-info}/licenses/LICENSE +0 -0
quollio_core/__init__.py
CHANGED
quollio_core/bigquery.py
CHANGED
@@ -6,6 +6,7 @@ from google.auth.credentials import Credentials
|
|
6
6
|
from quollio_core.helper.env_default import env_default
|
7
7
|
from quollio_core.helper.log_utils import configure_logging, error_handling_decorator, logger
|
8
8
|
from quollio_core.profilers.bigquery import bigquery_table_lineage, bigquery_table_stats
|
9
|
+
from quollio_core.profilers.qdc import gen_existing_global_id_dict, get_avro_file_content
|
9
10
|
from quollio_core.repository import qdc
|
10
11
|
from quollio_core.repository.bigquery import BigQueryClient, get_credentials, get_org_id
|
11
12
|
|
@@ -30,8 +31,15 @@ def load_lineage(
|
|
30
31
|
org_id: str,
|
31
32
|
credentials: Credentials,
|
32
33
|
qdc_client: qdc.QDCExternalAPIClient,
|
34
|
+
enable_multi_projects: str,
|
33
35
|
) -> None:
|
34
36
|
logger.info("Loading lineage data.")
|
37
|
+
file_content = get_avro_file_content(
|
38
|
+
tenant_id=tenant_id,
|
39
|
+
account_id=org_id,
|
40
|
+
qdc_client=qdc_client,
|
41
|
+
)
|
42
|
+
existing_global_ids = gen_existing_global_id_dict(avro_content=file_content)
|
35
43
|
bigquery_table_lineage(
|
36
44
|
qdc_client=qdc_client,
|
37
45
|
tenant_id=tenant_id,
|
@@ -39,6 +47,8 @@ def load_lineage(
|
|
39
47
|
regions=regions,
|
40
48
|
credentials=credentials,
|
41
49
|
org_id=org_id,
|
50
|
+
existing_global_ids=existing_global_ids,
|
51
|
+
enable_multi_projects=enable_multi_projects,
|
42
52
|
)
|
43
53
|
logger.info("Lineage data loaded successfully.")
|
44
54
|
|
@@ -146,6 +156,17 @@ if __name__ == "__main__":
|
|
146
156
|
help="Comma-separated list of dataplex stats tables - <project_id>.<dataset_id>.<table_id>",
|
147
157
|
)
|
148
158
|
|
159
|
+
parser.add_argument(
|
160
|
+
"--enable_multi_projects",
|
161
|
+
type=str,
|
162
|
+
choices=["ENABLED", "DISABLED"],
|
163
|
+
action=env_default("ENABLE_MULTI_PROJECTS"),
|
164
|
+
default="DISABLED",
|
165
|
+
required=False,
|
166
|
+
help="Whether to enable multi-projects support. If set to 'true', \
|
167
|
+
the script will load lineage and stats from all projects accessible by the credentials. Default is 'false'.",
|
168
|
+
)
|
169
|
+
|
149
170
|
args = parser.parse_args()
|
150
171
|
|
151
172
|
# Validate that dataplex_stats_tables is provided if load_stats is in commands
|
@@ -170,6 +191,7 @@ if __name__ == "__main__":
|
|
170
191
|
org_id=org_id,
|
171
192
|
credentials=credentials,
|
172
193
|
qdc_client=qdc_client,
|
194
|
+
enable_multi_projects=args.enable_multi_projects,
|
173
195
|
)
|
174
196
|
|
175
197
|
if "load_stats" in args.commands:
|
@@ -1,15 +1,25 @@
|
|
1
|
+
import io
|
2
|
+
import os
|
1
3
|
from typing import Dict, List
|
2
4
|
|
5
|
+
from fastavro import writer
|
3
6
|
from google.auth.credentials import Credentials
|
4
7
|
|
8
|
+
from quollio_core.helper.core import new_global_id
|
5
9
|
from quollio_core.helper.log_utils import error_handling_decorator, logger
|
6
|
-
from quollio_core.
|
10
|
+
from quollio_core.models.avroasset import AvroAsset
|
11
|
+
from quollio_core.models.qdc import GetImportURLRequest
|
12
|
+
from quollio_core.profilers.lineage import (
|
13
|
+
gen_table_avro_lineage_payload,
|
14
|
+
gen_table_lineage_payload,
|
15
|
+
parse_bigquery_table_lineage,
|
16
|
+
)
|
7
17
|
from quollio_core.profilers.stats import gen_table_stats_payload
|
8
18
|
from quollio_core.repository import qdc
|
9
19
|
from quollio_core.repository.bigquery import BigQueryClient, GCPLineageClient, get_entitiy_reference, get_search_request
|
20
|
+
from quollio_core.repository.cloud_resource_manager import CloudResourceManagerClient
|
10
21
|
|
11
22
|
|
12
|
-
@error_handling_decorator
|
13
23
|
def bigquery_table_lineage(
|
14
24
|
qdc_client: qdc.QDCExternalAPIClient,
|
15
25
|
tenant_id: str,
|
@@ -17,34 +27,84 @@ def bigquery_table_lineage(
|
|
17
27
|
regions: list,
|
18
28
|
org_id: str,
|
19
29
|
credentials: Credentials,
|
30
|
+
existing_global_ids: Dict[str, bool],
|
31
|
+
enable_multi_projects: str = "DISABLED",
|
20
32
|
) -> None:
|
21
33
|
lineage_client = GCPLineageClient(credentials)
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
34
|
+
crm_client = CloudResourceManagerClient(credentials)
|
35
|
+
|
36
|
+
target_project_ids = []
|
37
|
+
if enable_multi_projects == "ENABLED":
|
38
|
+
try:
|
39
|
+
target_projects = crm_client.list_projects()
|
40
|
+
except Exception as e:
|
41
|
+
raise Exception(f"ListProjects by cloud resource manager failed. Err. {str(e)}")
|
42
|
+
|
43
|
+
for target_project in target_projects["projects"]:
|
44
|
+
if target_project is None:
|
45
|
+
logger.warning("projects.Projects returns None. Proceed to loop project value")
|
46
|
+
continue
|
47
|
+
|
48
|
+
target_project_id = target_project.get("projectId", "")
|
49
|
+
if target_project_id == "":
|
50
|
+
logger.warning("projects.Projects is empty string. Proceed to loop project value")
|
51
|
+
continue
|
52
|
+
|
53
|
+
target_project_ids.append(target_project_id)
|
54
|
+
else:
|
55
|
+
target_project_ids.append(project_id)
|
56
|
+
|
57
|
+
update_table_lineage_inputs = []
|
58
|
+
for target_project_id in target_project_ids:
|
59
|
+
bq_client = BigQueryClient(credentials, target_project_id)
|
60
|
+
datasets = bq_client.list_dataset_ids()
|
61
|
+
all_tables = generate_table_list(bq_client, datasets)
|
62
|
+
lineage_links = generate_lineage_links(all_tables, lineage_client, target_project_id, regions)
|
63
|
+
lineage_links = parse_bigquery_table_lineage(lineage_links)
|
64
|
+
logger.debug("The following resources will be ingested. %s", lineage_links)
|
65
|
+
|
66
|
+
update_table_lineage_input = gen_table_lineage_payload(
|
67
|
+
tenant_id=tenant_id, endpoint=org_id, tables=lineage_links
|
40
68
|
)
|
41
|
-
|
42
|
-
|
43
|
-
|
69
|
+
update_table_lineage_input = gen_table_avro_lineage_payload(
|
70
|
+
tenant_id=tenant_id,
|
71
|
+
endpoint=org_id,
|
72
|
+
tables=lineage_links,
|
73
|
+
existing_global_ids=existing_global_ids,
|
44
74
|
)
|
45
|
-
|
46
|
-
|
47
|
-
|
75
|
+
update_table_lineage_inputs.extend(update_table_lineage_input)
|
76
|
+
|
77
|
+
stack_name = os.getenv("CF_STACK")
|
78
|
+
import_req = GetImportURLRequest(
|
79
|
+
service_name="bigquery",
|
80
|
+
source_name=stack_name,
|
81
|
+
file_name="{name}.avro".format(name=stack_name),
|
82
|
+
override_logical_name="false",
|
83
|
+
update_mode="partial",
|
84
|
+
)
|
85
|
+
datasource_id = new_global_id(tenant_id=tenant_id, cluster_id=org_id, data_id="", data_type="data_source")
|
86
|
+
logger.debug("Datasource id: {dsrc_id}".format(dsrc_id=datasource_id))
|
87
|
+
|
88
|
+
import_res = qdc_client.get_import_url(datasource_id=datasource_id, payload=import_req)
|
89
|
+
if import_res is None:
|
90
|
+
logger.error("get_import_url failed. Please retry `load_lineage` again")
|
91
|
+
return
|
92
|
+
logger.debug("ImportResponse: {res}".format(res=import_res))
|
93
|
+
|
94
|
+
avro_schema = AvroAsset.avro_schema_to_python()
|
95
|
+
|
96
|
+
buffer = io.BytesIO()
|
97
|
+
writer(buffer, avro_schema, update_table_lineage_inputs)
|
98
|
+
|
99
|
+
res = qdc_client.upload_file(
|
100
|
+
url=import_res.location,
|
101
|
+
metadata=import_res.datasource_metadata_response_body,
|
102
|
+
buffer=buffer.getbuffer().tobytes(),
|
103
|
+
)
|
104
|
+
|
105
|
+
if res == 200:
|
106
|
+
logger.info("Upload table lineage is finished.")
|
107
|
+
return
|
48
108
|
|
49
109
|
|
50
110
|
@error_handling_decorator
|
@@ -1,10 +1,17 @@
|
|
1
|
+
import io
|
1
2
|
import logging
|
2
|
-
|
3
|
+
import os
|
4
|
+
from typing import Dict, List
|
3
5
|
|
4
|
-
from
|
6
|
+
from fastavro import writer
|
7
|
+
|
8
|
+
from quollio_core.helper.core import new_global_id
|
9
|
+
from quollio_core.models.avroasset import AvroAsset
|
10
|
+
from quollio_core.models.qdc import GetImportURLRequest
|
11
|
+
from quollio_core.profilers.lineage import gen_table_avro_lineage_payload, gen_table_lineage_payload_inputs
|
5
12
|
from quollio_core.profilers.sqllineage import SQLLineage
|
6
13
|
from quollio_core.profilers.stats import (
|
7
|
-
|
14
|
+
gen_table_stats_avro_payload_from_tuple,
|
8
15
|
get_is_target_stats_items,
|
9
16
|
render_sql_for_stats,
|
10
17
|
)
|
@@ -18,6 +25,7 @@ def redshift_table_level_lineage(
|
|
18
25
|
qdc_client: qdc.QDCExternalAPIClient,
|
19
26
|
tenant_id: str,
|
20
27
|
dbt_table_name: str,
|
28
|
+
existing_global_ids: Dict[str, bool],
|
21
29
|
) -> None:
|
22
30
|
with redshift.RedshiftQueryExecutor(config=conn) as redshift_executor:
|
23
31
|
results = redshift_executor.get_query_results(
|
@@ -34,28 +42,39 @@ def redshift_table_level_lineage(
|
|
34
42
|
)
|
35
43
|
lineage_payload_inputs = gen_table_lineage_payload_inputs(input_data=results)
|
36
44
|
|
37
|
-
update_table_lineage_inputs =
|
45
|
+
update_table_lineage_inputs = gen_table_avro_lineage_payload(
|
38
46
|
tenant_id=tenant_id,
|
39
47
|
endpoint=conn.host,
|
40
48
|
tables=lineage_payload_inputs,
|
49
|
+
existing_global_ids=existing_global_ids,
|
50
|
+
)
|
51
|
+
stack_name = os.getenv("CF_STACK")
|
52
|
+
import_req = GetImportURLRequest(
|
53
|
+
service_name="redshift",
|
54
|
+
source_name=stack_name,
|
55
|
+
file_name="{name}.avro".format(name=stack_name),
|
56
|
+
override_logical_name="false",
|
57
|
+
update_mode="partial",
|
41
58
|
)
|
59
|
+
datasource_id = new_global_id(tenant_id=tenant_id, cluster_id=conn.host, data_id="", data_type="data_source")
|
60
|
+
logger.debug("Datasource id: {dsrc_id}".format(dsrc_id=datasource_id))
|
61
|
+
import_res = qdc_client.get_import_url(datasource_id=datasource_id, payload=import_req)
|
62
|
+
if import_res is None:
|
63
|
+
logger.error("get_import_url failed. Please retry `load_lineage` again")
|
64
|
+
return
|
65
|
+
logger.debug("ImportResponse: {res}".format(res=import_res))
|
42
66
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
payload=update_table_lineage_input.upstreams.as_dict(),
|
55
|
-
)
|
56
|
-
if status_code == 200:
|
57
|
-
req_count += 1
|
58
|
-
logger.info(f"Generating table lineage is finished. {req_count} lineages are ingested.")
|
67
|
+
avro_schema = AvroAsset.avro_schema_to_python()
|
68
|
+
|
69
|
+
buffer = io.BytesIO()
|
70
|
+
writer(buffer, avro_schema, update_table_lineage_inputs)
|
71
|
+
res = qdc_client.upload_file(
|
72
|
+
url=import_res.location,
|
73
|
+
metadata=import_res.datasource_metadata_response_body,
|
74
|
+
buffer=buffer.getbuffer().tobytes(),
|
75
|
+
)
|
76
|
+
if res == 200:
|
77
|
+
logger.info("Upload table lineage is finished.")
|
59
78
|
return
|
60
79
|
|
61
80
|
|
@@ -82,6 +101,7 @@ def redshift_table_stats(
|
|
82
101
|
qdc_client: qdc.QDCExternalAPIClient,
|
83
102
|
tenant_id: str,
|
84
103
|
stats_items: List[str],
|
104
|
+
existing_global_ids: Dict[str, bool],
|
85
105
|
) -> None:
|
86
106
|
is_aggregate_items = get_is_target_stats_items(stats_items=stats_items)
|
87
107
|
with redshift.RedshiftQueryExecutor(config=conn) as redshift_executor:
|
@@ -92,7 +112,7 @@ def redshift_table_stats(
|
|
92
112
|
stats_views = redshift_executor.get_query_results(query=stats_query)
|
93
113
|
logger.info("Found %s for table statistics.", len(stats_views))
|
94
114
|
|
95
|
-
|
115
|
+
update_stats_inputs = list()
|
96
116
|
for stats_view in stats_views:
|
97
117
|
table_fqn = "{catalog}.{schema}.{table}".format(
|
98
118
|
catalog=stats_view[0], schema=stats_view[1], table=stats_view[2]
|
@@ -100,23 +120,39 @@ def redshift_table_stats(
|
|
100
120
|
stats_query = render_sql_for_stats(is_aggregate_items=is_aggregate_items, table_fqn=table_fqn)
|
101
121
|
logger.debug(f"The following sql will be fetched to retrieve stats values. {stats_query}")
|
102
122
|
stats_result = redshift_executor.get_query_results(query=stats_query)
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
123
|
+
payload = gen_table_stats_avro_payload_from_tuple(
|
124
|
+
tenant_id=tenant_id, endpoint=conn.host, stats=stats_result, existing_global_ids=existing_global_ids
|
125
|
+
)
|
126
|
+
update_stats_inputs += payload
|
127
|
+
|
128
|
+
stack_name = os.getenv("CF_STACK")
|
129
|
+
import_req = GetImportURLRequest(
|
130
|
+
service_name="redshift",
|
131
|
+
source_name=stack_name,
|
132
|
+
file_name="{name}.avro".format(name=stack_name),
|
133
|
+
override_logical_name="false",
|
134
|
+
update_mode="partial",
|
135
|
+
)
|
136
|
+
datasource_id = new_global_id(tenant_id=tenant_id, cluster_id=conn.host, data_id="", data_type="data_source")
|
137
|
+
logger.debug("Datasource id: {dsrc_id}".format(dsrc_id=datasource_id))
|
138
|
+
import_res = qdc_client.get_import_url(datasource_id=datasource_id, payload=import_req)
|
139
|
+
if import_res is None:
|
140
|
+
logger.error("get_import_url failed. Please retry load_stats again")
|
141
|
+
return
|
142
|
+
logger.debug("ImportResponse: {res}".format(res=import_res))
|
143
|
+
|
144
|
+
avro_schema = AvroAsset.avro_schema_to_python()
|
145
|
+
|
146
|
+
buffer = io.BytesIO()
|
147
|
+
writer(buffer, avro_schema, update_stats_inputs)
|
148
|
+
res = qdc_client.upload_file(
|
149
|
+
url=import_res.location,
|
150
|
+
metadata=import_res.datasource_metadata_response_body,
|
151
|
+
buffer=buffer.getbuffer().tobytes(),
|
152
|
+
)
|
153
|
+
if res == 200:
|
154
|
+
logger.info("Generating table stats is finished.")
|
155
|
+
|
120
156
|
return
|
121
157
|
|
122
158
|
|
quollio_core/profilers/stats.py
CHANGED
@@ -147,6 +147,43 @@ def gen_table_stats_payload(tenant_id: str, endpoint: str, stats: List[Dict[str,
|
|
147
147
|
return payloads
|
148
148
|
|
149
149
|
|
150
|
+
def gen_table_stats_avro_payload_from_tuple(
|
151
|
+
tenant_id: str, endpoint: str, stats: Tuple[List[str]], existing_global_ids: Dict[str, bool]
|
152
|
+
) -> List[Dict[str, str]]:
|
153
|
+
payloads = list()
|
154
|
+
for stat in stats:
|
155
|
+
db_name, schema_name, table_name, column_name = stat[:4]
|
156
|
+
|
157
|
+
global_id_arg = "{db}{schema}{table}{column}".format(
|
158
|
+
db=db_name, schema=schema_name, table=table_name, column=column_name
|
159
|
+
)
|
160
|
+
table_global_id = new_global_id(
|
161
|
+
tenant_id=tenant_id, cluster_id=endpoint, data_id=global_id_arg, data_type="column"
|
162
|
+
)
|
163
|
+
|
164
|
+
if existing_global_ids.get(table_global_id) is not True:
|
165
|
+
continue
|
166
|
+
|
167
|
+
avro_assets = AvroAsset(
|
168
|
+
id=table_global_id,
|
169
|
+
object_type="column",
|
170
|
+
parents=[db_name, schema_name, table_name],
|
171
|
+
name=column_name,
|
172
|
+
stats_max=convert_value_type(stat[4], True),
|
173
|
+
stats_min=convert_value_type(stat[5], True),
|
174
|
+
stats_mean=convert_value_type(stat[8], True),
|
175
|
+
stats_median=convert_value_type(stat[9], True),
|
176
|
+
stats_mode=convert_value_type(stat[10], True),
|
177
|
+
stats_stddev=convert_value_type(stat[11], True),
|
178
|
+
stats_number_of_null=convert_value_type(stat[6], True),
|
179
|
+
stats_number_of_unique=convert_value_type(stat[7], True),
|
180
|
+
)
|
181
|
+
|
182
|
+
payloads.append(avro_assets.to_dict())
|
183
|
+
|
184
|
+
return payloads
|
185
|
+
|
186
|
+
|
150
187
|
def gen_table_stats_payload_from_tuple(
|
151
188
|
tenant_id: str, endpoint: str, stats: Tuple[List[str]]
|
152
189
|
) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
|
quollio_core/redshift.py
CHANGED
@@ -3,16 +3,17 @@ import logging
|
|
3
3
|
import os
|
4
4
|
import shutil
|
5
5
|
|
6
|
-
from quollio_core.helper.core import setup_dbt_profile
|
6
|
+
from quollio_core.helper.core import is_valid_domain, setup_dbt_profile
|
7
7
|
from quollio_core.helper.env_default import env_default
|
8
8
|
from quollio_core.helper.log import set_log_level
|
9
|
+
from quollio_core.profilers.qdc import gen_existing_global_id_dict, get_avro_file_content
|
9
10
|
from quollio_core.profilers.redshift import (
|
10
11
|
redshift_table_level_lineage,
|
11
12
|
redshift_table_level_sqllineage,
|
12
13
|
redshift_table_stats,
|
13
14
|
)
|
14
15
|
from quollio_core.profilers.stats import get_column_stats_items
|
15
|
-
from quollio_core.repository import dbt, qdc, redshift
|
16
|
+
from quollio_core.repository import dbt, qdc, redshift, ssm
|
16
17
|
|
17
18
|
logger = logging.getLogger(__name__)
|
18
19
|
|
@@ -84,11 +85,20 @@ def load_lineage(
|
|
84
85
|
tenant_id: str,
|
85
86
|
) -> None:
|
86
87
|
logger.info("Generate redshift table to table lineage.")
|
88
|
+
|
89
|
+
file_content = get_avro_file_content(
|
90
|
+
tenant_id=tenant_id,
|
91
|
+
account_id=conn.host,
|
92
|
+
qdc_client=qdc_client,
|
93
|
+
)
|
94
|
+
existing_global_ids = gen_existing_global_id_dict(avro_content=file_content)
|
95
|
+
|
87
96
|
redshift_table_level_lineage(
|
88
97
|
conn=conn,
|
89
98
|
qdc_client=qdc_client,
|
90
99
|
tenant_id=tenant_id,
|
91
100
|
dbt_table_name="quollio_lineage_table_level",
|
101
|
+
existing_global_ids=existing_global_ids,
|
92
102
|
)
|
93
103
|
|
94
104
|
logger.info("Generate redshift view level lineage.")
|
@@ -97,6 +107,7 @@ def load_lineage(
|
|
97
107
|
qdc_client=qdc_client,
|
98
108
|
tenant_id=tenant_id,
|
99
109
|
dbt_table_name="quollio_lineage_view_level",
|
110
|
+
existing_global_ids=existing_global_ids,
|
100
111
|
)
|
101
112
|
|
102
113
|
logger.info("Lineage data is successfully loaded.")
|
@@ -115,12 +126,20 @@ def load_stats(
|
|
115
126
|
if stats_items is None:
|
116
127
|
raise ValueError("No stats items are not selected. Please specify any value to `stats_items` param.")
|
117
128
|
|
129
|
+
file_content = get_avro_file_content(
|
130
|
+
tenant_id=tenant_id,
|
131
|
+
account_id=conn.host,
|
132
|
+
qdc_client=qdc_client,
|
133
|
+
)
|
134
|
+
existing_global_ids = gen_existing_global_id_dict(avro_content=file_content)
|
135
|
+
|
118
136
|
logger.info("The following values will be aggregated. {stats_items}".format(stats_items=stats_items))
|
119
137
|
redshift_table_stats(
|
120
138
|
conn=conn,
|
121
139
|
qdc_client=qdc_client,
|
122
140
|
tenant_id=tenant_id,
|
123
141
|
stats_items=stats_items,
|
142
|
+
existing_global_ids=existing_global_ids,
|
124
143
|
)
|
125
144
|
|
126
145
|
logger.info("Stats data is successfully loaded.")
|
@@ -252,6 +271,7 @@ if __name__ == "__main__":
|
|
252
271
|
type=str,
|
253
272
|
choices=["debug", "info", "warn", "error", "none"],
|
254
273
|
action=env_default("LOG_LEVEL"),
|
274
|
+
default="info",
|
255
275
|
required=False,
|
256
276
|
help="The log level for dbt commands. Default value is info",
|
257
277
|
)
|
@@ -285,6 +305,16 @@ if __name__ == "__main__":
|
|
285
305
|
required=False,
|
286
306
|
help="The client secrete that is created on Quollio console to let clients access Quollio External API",
|
287
307
|
)
|
308
|
+
parser.add_argument(
|
309
|
+
"--external_api_access",
|
310
|
+
type=str,
|
311
|
+
choices=["PUBLIC", "VPC_ENDPOINT"],
|
312
|
+
action=env_default("EXTERNAL_API_ACCESS"),
|
313
|
+
default="PUBLIC",
|
314
|
+
required=False,
|
315
|
+
help="Access method to Quollio API. Default 'PUBLIC'. Choose 'VPC_ENDPOINT'\
|
316
|
+
if you use API Gateway VPC Endpoint, DefaultValue is set to PUBLIC.",
|
317
|
+
)
|
288
318
|
|
289
319
|
stats_items = get_column_stats_items()
|
290
320
|
parser.add_argument(
|
@@ -323,11 +353,22 @@ if __name__ == "__main__":
|
|
323
353
|
log_level=args.log_level,
|
324
354
|
dbt_macro_source=args.dbt_macro_source,
|
325
355
|
)
|
356
|
+
|
357
|
+
api_url = args.api_url
|
358
|
+
if args.external_api_access == "VPC_ENDPOINT":
|
359
|
+
api_url, err = ssm.get_parameter_by_assume_role(args.api_url)
|
360
|
+
if err is not None:
|
361
|
+
logger.error("Fail to ssm.get_parameter_by_assume_role. {err}".format(err=err))
|
362
|
+
raise Exception("Fail to ssm.get_parameter_by_assume_role. {err}".format(err=err))
|
363
|
+
is_domain_valid = is_valid_domain(domain=api_url, domain_type=args.external_api_access)
|
364
|
+
if not is_domain_valid:
|
365
|
+
raise ValueError("The format of quollio API URL is invalid. The URL must end with `.com`")
|
366
|
+
|
326
367
|
if "load_lineage" in args.commands:
|
327
368
|
qdc_client = qdc.QDCExternalAPIClient(
|
328
369
|
client_id=args.client_id,
|
329
370
|
client_secret=args.client_secret,
|
330
|
-
base_url=
|
371
|
+
base_url=api_url,
|
331
372
|
)
|
332
373
|
load_lineage(
|
333
374
|
conn=conn,
|
@@ -338,7 +379,7 @@ if __name__ == "__main__":
|
|
338
379
|
qdc_client = qdc.QDCExternalAPIClient(
|
339
380
|
client_id=args.client_id,
|
340
381
|
client_secret=args.client_secret,
|
341
|
-
base_url=
|
382
|
+
base_url=api_url,
|
342
383
|
)
|
343
384
|
load_stats(
|
344
385
|
conn=conn,
|
@@ -348,7 +389,7 @@ if __name__ == "__main__":
|
|
348
389
|
)
|
349
390
|
if "load_sqllineage" in args.commands:
|
350
391
|
qdc_client = qdc.QDCExternalAPIClient(
|
351
|
-
base_url=
|
392
|
+
base_url=api_url,
|
352
393
|
client_id=args.client_id,
|
353
394
|
client_secret=args.client_secret,
|
354
395
|
)
|
@@ -0,0 +1,25 @@
|
|
1
|
+
from google.oauth2.service_account import Credentials
|
2
|
+
from googleapiclient.discovery import build
|
3
|
+
|
4
|
+
|
5
|
+
class CloudResourceManagerClient:
|
6
|
+
"""Client to interact with the Cloud Resource Manager API."""
|
7
|
+
|
8
|
+
def __init__(self, credentials: Credentials) -> None:
|
9
|
+
"""Initialize the Cloud Resource Manager client with provided credentials."""
|
10
|
+
self.client = self.__initialize(credentials=credentials)
|
11
|
+
|
12
|
+
def __initialize(self, credentials: Credentials):
|
13
|
+
return build("cloudresourcemanager", "v1", credentials=credentials)
|
14
|
+
|
15
|
+
def list_projects(self):
|
16
|
+
"""List all projects accessible with the current credentials."""
|
17
|
+
request = self.client.projects().list()
|
18
|
+
response = request.execute()
|
19
|
+
return response
|
20
|
+
|
21
|
+
def get_project(self, project_id: str):
|
22
|
+
"""Get a specific project by project ID."""
|
23
|
+
request = self.client.projects().get(projectId=project_id)
|
24
|
+
response = request.execute()
|
25
|
+
return response
|
quollio_core/snowflake.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
quollio_core/__init__.py,sha256=
|
2
|
-
quollio_core/bigquery.py,sha256=
|
1
|
+
quollio_core/__init__.py,sha256=ZtRODvd0lC2-vqcXyfKDya8bb6FyyFWBsGNz7TSlO-s,83
|
2
|
+
quollio_core/bigquery.py,sha256=nxMWldMr14HzOeyzYM_fRrfHQ7jbm2HyGzc1r46mlco,6821
|
3
3
|
quollio_core/bricks.py,sha256=8h3kbI2b6lGH2s-56jE_Q5-R5-nIsQYMfvtRrkFOzoU,10784
|
4
|
-
quollio_core/redshift.py,sha256=
|
5
|
-
quollio_core/snowflake.py,sha256=
|
4
|
+
quollio_core/redshift.py,sha256=Yi_udcgfen2PoCkDTIhemeCFbDVUU1rYWro9CyjHCZA,13192
|
5
|
+
quollio_core/snowflake.py,sha256=aOQ8tLSbHJEx_TUGaZLjix5KWgLiRp3A1tRx5qgUtRI,17084
|
6
6
|
quollio_core/teradata.py,sha256=H2VUcJvr8W-M2wvm3710Gf1ENb-BSscrDRKNm8gdHJE,8227
|
7
7
|
quollio_core/dbt_projects/databricks/.gitignore,sha256=1jJAyXSzJ3YUm0nx3i7wUSE4RjQMX3ad6F8O88UbtzI,29
|
8
8
|
quollio_core/dbt_projects/databricks/README.md,sha256=ZpRQyhFAODAiS8dc1Kb_ndkul4cu4o4udN_EMa49CU4,440
|
@@ -71,18 +71,19 @@ quollio_core/helper/log_utils.py,sha256=QontLKETHjSAbQniJ7YqS0RY2AYvFHSjrlPiGr31
|
|
71
71
|
quollio_core/models/avroasset.py,sha256=YZHzOS62N0_sidneXI3IZ2MA8Bz1vFVgF6F9_UilC3s,603
|
72
72
|
quollio_core/models/qdc.py,sha256=UObaUpvAQ4vOhI6jfwvNFrJ3--6AX2v9yl9_d3Juy7M,739
|
73
73
|
quollio_core/profilers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
74
|
-
quollio_core/profilers/bigquery.py,sha256=
|
74
|
+
quollio_core/profilers/bigquery.py,sha256=rJ2eMZlUtEmsNCCkenC9DuL7--btcO2dF_NmIOawGJ4,7947
|
75
75
|
quollio_core/profilers/databricks.py,sha256=ik4RiR_GOeU3S7s6C6Y9SGe1D_Y_f98BDWJVlEJXL4U,7868
|
76
76
|
quollio_core/profilers/lineage.py,sha256=GMWue6lgiz7wFYnNpaHVFivprA-iqhbCHf63IsVB8Vk,11260
|
77
77
|
quollio_core/profilers/qdc.py,sha256=P0STRfe5G4d-UI7RdVbYmAfP_tAn1HbFUjeirxWipz4,995
|
78
|
-
quollio_core/profilers/redshift.py,sha256=
|
78
|
+
quollio_core/profilers/redshift.py,sha256=f5F3jnFJP2SbmPtG-PNyVgkt4mtfxuUX45pIS0mrw60,7856
|
79
79
|
quollio_core/profilers/snowflake.py,sha256=ewvULWIlcq2h0jOyRzUpedW0NS8QlkSgICS-dZDYl18,13027
|
80
80
|
quollio_core/profilers/sqllineage.py,sha256=h0FT6CYb0A20zSc68GELZ7Q8bDbaHLQnZQHsXBEXBug,5261
|
81
|
-
quollio_core/profilers/stats.py,sha256=
|
81
|
+
quollio_core/profilers/stats.py,sha256=Go1tR8IMpMZnHZJzYtAZTC89ZDkebUmGxljwy8h5KC0,10752
|
82
82
|
quollio_core/profilers/teradata/lineage.py,sha256=2wNksBQD8vC6UTQwCglPsF53YMEVIkAb2CWTmpiTHDU,7368
|
83
83
|
quollio_core/profilers/teradata/stats.py,sha256=OagvkTRFiWVbiLABwZwR3wQ7y36edwOViDetHsYiyxI,9277
|
84
84
|
quollio_core/repository/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
85
85
|
quollio_core/repository/bigquery.py,sha256=3AyGcJNYGnUyMweyc6lGm4quwrOzd-ZBS2zNnFwafII,3990
|
86
|
+
quollio_core/repository/cloud_resource_manager.py,sha256=tmHxjV3AmKwv3OJ-f40N-XQA1qmxZTSDBDS3YK69CIo,978
|
86
87
|
quollio_core/repository/databricks.py,sha256=9Cgdv8qBnVaHqu3RA-IUBieAqb69moQ-KAAMVSf5Ds4,1877
|
87
88
|
quollio_core/repository/dbt.py,sha256=cnLwJPywLi8VowVW7zfIBa9jxVwDWO7xzzNRn1vWiuw,659
|
88
89
|
quollio_core/repository/qdc.py,sha256=Ni0rk9CX8ienqM_HYLuWLBKTwycvTENC7x7wGWhzjXs,8978
|
@@ -90,7 +91,7 @@ quollio_core/repository/redshift.py,sha256=p2ouEuYcDCjx1oBhc6H1ekQsvEqHGd3bFu3PW
|
|
90
91
|
quollio_core/repository/snowflake.py,sha256=yCYXrYf4I5GL_ITNTXoggj0xNbQsdwxPSmsVvZYwUVU,3869
|
91
92
|
quollio_core/repository/ssm.py,sha256=xpm1FzbBnIsBptuYPUNnPgkKU2AH3XxI-ZL0bEetvW0,2182
|
92
93
|
quollio_core/repository/teradata.py,sha256=1AExxRBTswpSyF4OVyAUkoiZ0yVRfqt4T99FdllkTEI,3763
|
93
|
-
quollio_core-0.
|
94
|
-
quollio_core-0.
|
95
|
-
quollio_core-0.
|
96
|
-
quollio_core-0.
|
94
|
+
quollio_core-0.7.0.dist-info/licenses/LICENSE,sha256=V8j_M8nAz8PvAOZQocyRDX7keai8UJ9skgmnwqETmdY,34520
|
95
|
+
quollio_core-0.7.0.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
96
|
+
quollio_core-0.7.0.dist-info/METADATA,sha256=xrt5HSRtSF0M-2Ef09CNsB2fo6hyMifPOySYhF5U1nA,7023
|
97
|
+
quollio_core-0.7.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|