quollio-core 0.4.4__py3-none-any.whl → 0.4.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quollio_core/__init__.py +1 -1
- quollio_core/bigquery.py +123 -0
- quollio_core/bricks.py +288 -0
- quollio_core/dbt_projects/databricks/.gitignore +4 -0
- quollio_core/dbt_projects/databricks/README.md +5 -0
- quollio_core/dbt_projects/databricks/analyses/.gitkeep +0 -0
- quollio_core/dbt_projects/databricks/dbt_project.yml +21 -0
- quollio_core/dbt_projects/databricks/macros/.gitkeep +0 -0
- quollio_core/dbt_projects/databricks/models/quollio_lineage_column_level.sql +73 -0
- quollio_core/dbt_projects/databricks/models/quollio_lineage_column_level.yml +14 -0
- quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.sql +63 -0
- quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.yml +11 -0
- quollio_core/dbt_projects/databricks/models/sources.yml +84 -0
- quollio_core/dbt_projects/databricks/package-lock.yml +14 -0
- quollio_core/dbt_projects/databricks/packages.yml +13 -0
- quollio_core/dbt_projects/databricks/profiles/profiles_template.yml +14 -0
- quollio_core/dbt_projects/databricks/seeds/.gitkeep +0 -0
- quollio_core/dbt_projects/databricks/snapshots/.gitkeep +0 -0
- quollio_core/dbt_projects/redshift/dbt_project.yml +1 -1
- quollio_core/dbt_projects/redshift/macros/materialization/divided_view.sql +101 -34
- quollio_core/dbt_projects/redshift/models/quollio_stats_columns.sql +1 -2
- quollio_core/dbt_projects/redshift/package-lock.yml +1 -1
- quollio_core/dbt_projects/seeds/.gitkeep +0 -0
- quollio_core/dbt_projects/snowflake/macros/materialization/divided_view.sql +50 -27
- quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.sql +1 -2
- quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.sql +57 -20
- quollio_core/helper/core.py +4 -0
- quollio_core/helper/env_default.py +28 -2
- quollio_core/helper/log.py +17 -0
- quollio_core/profilers/bigquery.py +81 -0
- quollio_core/profilers/databricks.py +198 -0
- quollio_core/profilers/lineage.py +26 -0
- quollio_core/profilers/redshift.py +41 -74
- quollio_core/profilers/snowflake.py +138 -169
- quollio_core/profilers/sqllineage.py +0 -1
- quollio_core/profilers/stats.py +0 -1
- quollio_core/redshift.py +15 -18
- quollio_core/repository/bigquery.py +61 -0
- quollio_core/repository/databricks.py +62 -0
- quollio_core/repository/dbt.py +0 -1
- quollio_core/repository/qdc.py +0 -3
- quollio_core/repository/redshift.py +0 -1
- quollio_core/repository/snowflake.py +6 -1
- quollio_core/snowflake.py +29 -16
- {quollio_core-0.4.4.dist-info → quollio_core-0.4.10.dist-info}/METADATA +11 -2
- {quollio_core-0.4.4.dist-info → quollio_core-0.4.10.dist-info}/RECORD +48 -25
- {quollio_core-0.4.4.dist-info → quollio_core-0.4.10.dist-info}/LICENSE +0 -0
- {quollio_core-0.4.4.dist-info → quollio_core-0.4.10.dist-info}/WHEEL +0 -0
@@ -0,0 +1,81 @@
|
|
1
|
+
import logging
|
2
|
+
from typing import Any, Dict, List
|
3
|
+
|
4
|
+
from quollio_core.profilers.lineage import gen_table_lineage_payload, parse_bigquery_table_lineage
|
5
|
+
from quollio_core.repository import qdc
|
6
|
+
from quollio_core.repository.bigquery import BigQueryClient, GCPLineageClient, get_entitiy_reference, get_search_request
|
7
|
+
|
8
|
+
logger = logging.getLogger(__name__)
|
9
|
+
|
10
|
+
|
11
|
+
def bigquery_table_lineage(
|
12
|
+
qdc_client: qdc.QDCExternalAPIClient,
|
13
|
+
tenant_id: str,
|
14
|
+
project_id: str,
|
15
|
+
regions: list,
|
16
|
+
org_id: str,
|
17
|
+
credentials: Any,
|
18
|
+
):
|
19
|
+
lineage_client = GCPLineageClient(credentials)
|
20
|
+
bq_client = BigQueryClient(credentials)
|
21
|
+
|
22
|
+
datasets = bq_client.list_datasets(project_id)
|
23
|
+
all_tables = generate_table_list(datasets, bq_client)
|
24
|
+
lineage_links = generate_lineage_links(all_tables, lineage_client, project_id, regions)
|
25
|
+
lineage_links = parse_bigquery_table_lineage(lineage_links)
|
26
|
+
|
27
|
+
update_table_lineage_inputs = gen_table_lineage_payload(tenant_id=tenant_id, endpoint=org_id, tables=lineage_links)
|
28
|
+
|
29
|
+
req_count = 0
|
30
|
+
for update_table_lineage_input in update_table_lineage_inputs:
|
31
|
+
logger.info(
|
32
|
+
"Generating table lineage. downstream: %s -> %s-> %s",
|
33
|
+
update_table_lineage_input.downstream_database_name,
|
34
|
+
update_table_lineage_input.downstream_schema_name,
|
35
|
+
update_table_lineage_input.downstream_table_name,
|
36
|
+
)
|
37
|
+
status_code = qdc_client.update_lineage_by_id(
|
38
|
+
global_id=update_table_lineage_input.downstream_global_id,
|
39
|
+
payload=update_table_lineage_input.upstreams.as_dict(),
|
40
|
+
)
|
41
|
+
if status_code == 200:
|
42
|
+
req_count += 1
|
43
|
+
logger.info("Generating table lineage is finished. %s lineages are ingested.", req_count)
|
44
|
+
|
45
|
+
|
46
|
+
def generate_table_list(datasets: List[str], bq_client: BigQueryClient) -> List[str]:
|
47
|
+
all_tables = []
|
48
|
+
for dataset in datasets:
|
49
|
+
all_tables.extend(
|
50
|
+
[
|
51
|
+
table
|
52
|
+
for table in bq_client.list_tables(dataset.dataset_id)
|
53
|
+
if table.table_type in ["TABLE", "VIEW", "MATERIALIZED_VIEW"]
|
54
|
+
]
|
55
|
+
)
|
56
|
+
|
57
|
+
all_table_names = []
|
58
|
+
for table in all_tables:
|
59
|
+
all_table_names.append(f"{table.project}.{table.dataset_id}.{table.table_id}")
|
60
|
+
|
61
|
+
return all_table_names
|
62
|
+
|
63
|
+
|
64
|
+
def generate_lineage_links(
|
65
|
+
all_tables: List[str], lineage_client: GCPLineageClient, project_id: str, regions: List[str]
|
66
|
+
) -> Dict[str, List[str]]:
|
67
|
+
lineage_links = {}
|
68
|
+
for table in all_tables:
|
69
|
+
downstream = get_entitiy_reference()
|
70
|
+
downstream.fully_qualified_name = f"bigquery:{table}"
|
71
|
+
|
72
|
+
for region in regions:
|
73
|
+
request = get_search_request(downstream_table=downstream, project_id=project_id, region=region)
|
74
|
+
response = lineage_client.get_links(request=request)
|
75
|
+
for lineage in response:
|
76
|
+
target_table = str(lineage.target.fully_qualified_name).replace("bigquery:", "")
|
77
|
+
if target_table not in lineage_links:
|
78
|
+
lineage_links[target_table] = []
|
79
|
+
lineage_links[target_table].append(str(lineage.source.fully_qualified_name).replace("bigquery:", ""))
|
80
|
+
|
81
|
+
return lineage_links
|
@@ -0,0 +1,198 @@
|
|
1
|
+
import logging
|
2
|
+
from typing import Dict, List
|
3
|
+
|
4
|
+
from quollio_core.profilers.lineage import (
|
5
|
+
gen_column_lineage_payload,
|
6
|
+
gen_table_lineage_payload,
|
7
|
+
parse_databricks_table_lineage,
|
8
|
+
)
|
9
|
+
from quollio_core.profilers.stats import gen_table_stats_payload
|
10
|
+
from quollio_core.repository import databricks, qdc
|
11
|
+
|
12
|
+
logger = logging.getLogger(__name__)
|
13
|
+
|
14
|
+
|
15
|
+
def databricks_table_level_lineage(
|
16
|
+
conn: databricks.DatabricksConnectionConfig,
|
17
|
+
endpoint: str,
|
18
|
+
qdc_client: qdc.QDCExternalAPIClient,
|
19
|
+
tenant_id: str,
|
20
|
+
dbt_table_name: str = "quollio_lineage_table_level",
|
21
|
+
) -> None:
|
22
|
+
with databricks.DatabricksQueryExecutor(config=conn) as databricks_executor:
|
23
|
+
results = databricks_executor.get_query_results(
|
24
|
+
query=f"""
|
25
|
+
SELECT
|
26
|
+
DOWNSTREAM_TABLE_NAME,
|
27
|
+
UPSTREAM_TABLES
|
28
|
+
FROM {conn.catalog}.{conn.schema}.{dbt_table_name}
|
29
|
+
"""
|
30
|
+
)
|
31
|
+
tables = parse_databricks_table_lineage(results)
|
32
|
+
update_table_lineage_inputs = gen_table_lineage_payload(
|
33
|
+
tenant_id=tenant_id,
|
34
|
+
endpoint=endpoint,
|
35
|
+
tables=tables,
|
36
|
+
)
|
37
|
+
|
38
|
+
req_count = 0
|
39
|
+
for update_table_lineage_input in update_table_lineage_inputs:
|
40
|
+
logger.info(
|
41
|
+
"Generating table lineage. downstream: %s -> %s-> %s",
|
42
|
+
update_table_lineage_input.downstream_database_name,
|
43
|
+
update_table_lineage_input.downstream_schema_name,
|
44
|
+
update_table_lineage_input.downstream_table_name,
|
45
|
+
)
|
46
|
+
status_code = qdc_client.update_lineage_by_id(
|
47
|
+
global_id=update_table_lineage_input.downstream_global_id,
|
48
|
+
payload=update_table_lineage_input.upstreams.as_dict(),
|
49
|
+
)
|
50
|
+
if status_code == 200:
|
51
|
+
req_count += 1
|
52
|
+
logger.info("Generating table lineage is finished. %s lineages are ingested.", req_count)
|
53
|
+
return
|
54
|
+
|
55
|
+
|
56
|
+
def databricks_column_level_lineage(
|
57
|
+
conn: databricks.DatabricksConnectionConfig,
|
58
|
+
endpoint: str,
|
59
|
+
qdc_client: qdc.QDCExternalAPIClient,
|
60
|
+
tenant_id: str,
|
61
|
+
dbt_table_name: str = "quollio_lineage_column_level",
|
62
|
+
) -> None:
|
63
|
+
with databricks.DatabricksQueryExecutor(config=conn) as databricks_executor:
|
64
|
+
results = databricks_executor.get_query_results(
|
65
|
+
query=f"""
|
66
|
+
SELECT
|
67
|
+
*
|
68
|
+
FROM
|
69
|
+
{conn.catalog}.{conn.schema}.{dbt_table_name}
|
70
|
+
"""
|
71
|
+
)
|
72
|
+
|
73
|
+
update_column_lineage_inputs = gen_column_lineage_payload(
|
74
|
+
tenant_id=tenant_id,
|
75
|
+
endpoint=endpoint,
|
76
|
+
columns=results,
|
77
|
+
)
|
78
|
+
|
79
|
+
req_count = 0
|
80
|
+
for update_column_lineage_input in update_column_lineage_inputs:
|
81
|
+
logger.info(
|
82
|
+
"Generating column lineage. downstream: %s -> %s -> %s -> %s",
|
83
|
+
update_column_lineage_input.downstream_database_name,
|
84
|
+
update_column_lineage_input.downstream_schema_name,
|
85
|
+
update_column_lineage_input.downstream_table_name,
|
86
|
+
update_column_lineage_input.downstream_column_name,
|
87
|
+
)
|
88
|
+
status_code = qdc_client.update_lineage_by_id(
|
89
|
+
global_id=update_column_lineage_input.downstream_global_id,
|
90
|
+
payload=update_column_lineage_input.upstreams.as_dict(),
|
91
|
+
)
|
92
|
+
if status_code == 200:
|
93
|
+
req_count += 1
|
94
|
+
logger.info(
|
95
|
+
"Generating column lineage is finished. %s lineages are ingested.",
|
96
|
+
req_count,
|
97
|
+
)
|
98
|
+
return
|
99
|
+
|
100
|
+
|
101
|
+
def _get_monitoring_tables(
|
102
|
+
conn: databricks.DatabricksConnectionConfig, monitoring_table_suffix: str = "_profile_metrics"
|
103
|
+
) -> List[Dict[str, str]]:
|
104
|
+
tables = []
|
105
|
+
query = f"""
|
106
|
+
SELECT
|
107
|
+
table_catalog,
|
108
|
+
table_schema,
|
109
|
+
table_name,
|
110
|
+
CONCAT(table_catalog, '.', table_schema, '.', table_name) AS table_fqdn
|
111
|
+
FROM
|
112
|
+
system.information_schema.tables
|
113
|
+
WHERE
|
114
|
+
table_name LIKE "%{monitoring_table_suffix}"
|
115
|
+
AND table_name NOT LIKE ('quollio_%')
|
116
|
+
"""
|
117
|
+
with databricks.DatabricksQueryExecutor(config=conn) as databricks_executor:
|
118
|
+
tables = databricks_executor.get_query_results(query)
|
119
|
+
if len(tables) > 0:
|
120
|
+
logger.info("Found %s monitoring tables.", len(tables))
|
121
|
+
return tables
|
122
|
+
else:
|
123
|
+
logger.info("No monitoring tables found.")
|
124
|
+
return []
|
125
|
+
|
126
|
+
|
127
|
+
def _get_column_stats(
|
128
|
+
conn: databricks.DatabricksConnectionConfig, monitoring_table_suffix: str = "_profile_metrics"
|
129
|
+
) -> List[Dict[str, str]]:
|
130
|
+
tables = _get_monitoring_tables(conn, monitoring_table_suffix)
|
131
|
+
if not tables:
|
132
|
+
return []
|
133
|
+
stats = []
|
134
|
+
for table in tables:
|
135
|
+
monitored_table = table["table_fqdn"].removesuffix("_profile_metrics")
|
136
|
+
monitored_table = monitored_table.split(".")
|
137
|
+
if len(monitored_table) != 3:
|
138
|
+
raise ValueError(f"Invalid table name: {table['table_fqdn']}")
|
139
|
+
with databricks.DatabricksQueryExecutor(config=conn) as databricks_executor:
|
140
|
+
query = """
|
141
|
+
SELECT
|
142
|
+
"{monitored_table_catalog}" as DB_NAME,
|
143
|
+
"{monitored_table_schema}" as SCHEMA_NAME,
|
144
|
+
"{monitored_table_name}" as TABLE_NAME,
|
145
|
+
t.COLUMN_NAME,
|
146
|
+
t.DATA_TYPE,
|
147
|
+
t.distinct_count as CARDINALITY,
|
148
|
+
t.MAX as MAX_VALUE,
|
149
|
+
t.MIN as MIN_VALUE,
|
150
|
+
t.AVG as AVG_VALUE,
|
151
|
+
t.MEDIAN as MEDIAN_VALUE,
|
152
|
+
t.STDDEV as STDDEV_VALUE,
|
153
|
+
t.NUM_NULLS as NULL_COUNT,
|
154
|
+
t.frequent_items[0].item AS MODE_VALUE,
|
155
|
+
MAX(t.window) AS LATEST
|
156
|
+
FROM
|
157
|
+
{monitoring_table} t
|
158
|
+
WHERE
|
159
|
+
t.column_name not in (':table')
|
160
|
+
GROUP BY
|
161
|
+
t.COLUMN_NAME,
|
162
|
+
t.DATA_TYPE,
|
163
|
+
t.distinct_count,
|
164
|
+
t.MAX,
|
165
|
+
t.MIN,
|
166
|
+
t.AVG,
|
167
|
+
t.MEDIAN,
|
168
|
+
t.STDDEV,
|
169
|
+
t.NUM_NULLS,
|
170
|
+
t.frequent_items
|
171
|
+
""".format(
|
172
|
+
monitoring_table=table["table_fqdn"],
|
173
|
+
monitored_table_catalog=monitored_table[0],
|
174
|
+
monitored_table_schema=monitored_table[1],
|
175
|
+
monitored_table_name=monitored_table[2],
|
176
|
+
)
|
177
|
+
stats.append(databricks_executor.get_query_results(query))
|
178
|
+
return stats
|
179
|
+
|
180
|
+
|
181
|
+
def databricks_column_stats(
|
182
|
+
conn: databricks.DatabricksConnectionConfig,
|
183
|
+
endpoint: str,
|
184
|
+
qdc_client: qdc.QDCExternalAPIClient,
|
185
|
+
tenant_id: str,
|
186
|
+
monitoring_table_suffix: str = "_profile_metrics",
|
187
|
+
) -> None:
|
188
|
+
table_stats = _get_column_stats(conn, monitoring_table_suffix)
|
189
|
+
for table in table_stats:
|
190
|
+
stats = gen_table_stats_payload(tenant_id=tenant_id, endpoint=endpoint, stats=table)
|
191
|
+
for stat in stats:
|
192
|
+
status_code = qdc_client.update_stats_by_id(
|
193
|
+
global_id=stat.global_id,
|
194
|
+
payload=stat.body.as_dict(),
|
195
|
+
)
|
196
|
+
if status_code == 200:
|
197
|
+
logger.info("Stats for %s is successfully ingested.", stat.global_id)
|
198
|
+
return
|
@@ -141,3 +141,29 @@ def parse_snowflake_results(results: List[Dict[str, str]]):
|
|
141
141
|
payload["UPSTREAM_TABLES"] = json.loads(result["UPSTREAM_TABLES"])
|
142
142
|
payloads.append(payload)
|
143
143
|
return payloads
|
144
|
+
|
145
|
+
|
146
|
+
def parse_databricks_table_lineage(results: List) -> List[Dict[str, Dict]]:
|
147
|
+
# Parses results from Quollio Databricks lineage table
|
148
|
+
# Returns tuple of downstream_table_name (0) and upstream_tables (1)
|
149
|
+
payloads = list()
|
150
|
+
for result in results:
|
151
|
+
payload = dict()
|
152
|
+
payload["DOWNSTREAM_TABLE_NAME"] = result["DOWNSTREAM_TABLE_NAME"]
|
153
|
+
payload["UPSTREAM_TABLES"] = json.loads(result["UPSTREAM_TABLES"])
|
154
|
+
payloads.append(payload)
|
155
|
+
return payloads
|
156
|
+
|
157
|
+
|
158
|
+
def parse_bigquery_table_lineage(tables: Dict) -> List[Dict[str, Dict]]:
|
159
|
+
payloads = list()
|
160
|
+
for downstream, upstream in tables.items():
|
161
|
+
payload = {
|
162
|
+
"DOWNSTREAM_TABLE_NAME": "",
|
163
|
+
"UPSTREAM_TABLES": [],
|
164
|
+
}
|
165
|
+
payload["DOWNSTREAM_TABLE_NAME"] = downstream
|
166
|
+
for upstream_table in upstream:
|
167
|
+
payload["UPSTREAM_TABLES"].append({"upstream_object_name": upstream_table})
|
168
|
+
payloads.append(payload)
|
169
|
+
return payloads
|
@@ -14,7 +14,6 @@ def redshift_table_level_lineage(
|
|
14
14
|
tenant_id: str,
|
15
15
|
dbt_table_name: str,
|
16
16
|
) -> None:
|
17
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
18
17
|
with redshift.RedshiftQueryExecutor(config=conn) as redshift_executor:
|
19
18
|
results = redshift_executor.get_query_results(
|
20
19
|
query="""
|
@@ -55,22 +54,7 @@ def redshift_table_level_lineage(
|
|
55
54
|
return
|
56
55
|
|
57
56
|
|
58
|
-
def
|
59
|
-
query = """
|
60
|
-
SELECT
|
61
|
-
DISTINCT
|
62
|
-
database_name
|
63
|
-
, schema_name
|
64
|
-
, table_name
|
65
|
-
FROM
|
66
|
-
{db}.{schema}.quollio_stats_profiling_columns
|
67
|
-
""".format(
|
68
|
-
db=db, schema=schema
|
69
|
-
)
|
70
|
-
return query
|
71
|
-
|
72
|
-
|
73
|
-
def _get_stats_tables_query(db: str, schema: str) -> str:
|
57
|
+
def _gen_get_stats_views_query(db: str, schema: str) -> str:
|
74
58
|
query = """
|
75
59
|
SELECT
|
76
60
|
DISTINCT
|
@@ -93,70 +77,54 @@ def redshift_table_stats(
|
|
93
77
|
qdc_client: qdc.QDCExternalAPIClient,
|
94
78
|
tenant_id: str,
|
95
79
|
) -> None:
|
96
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
97
80
|
|
98
81
|
with redshift.RedshiftQueryExecutor(config=conn) as redshift_executor:
|
99
|
-
|
100
|
-
target_query = _get_target_tables_query(
|
82
|
+
stats_query = _gen_get_stats_views_query(
|
101
83
|
db=conn.database,
|
102
84
|
schema=conn.schema,
|
103
85
|
)
|
104
|
-
|
86
|
+
stats_views = redshift_executor.get_query_results(query=stats_query)
|
105
87
|
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
target_table=target_asset[2],
|
88
|
+
req_count = 0
|
89
|
+
for stats_view in stats_views:
|
90
|
+
stats_query = """
|
91
|
+
SELECT
|
92
|
+
db_name
|
93
|
+
, schema_name
|
94
|
+
, table_name
|
95
|
+
, column_name
|
96
|
+
, max_value
|
97
|
+
, min_value
|
98
|
+
, null_count
|
99
|
+
, cardinality
|
100
|
+
, avg_value
|
101
|
+
, median_value
|
102
|
+
, mode_value
|
103
|
+
, stddev_value
|
104
|
+
FROM
|
105
|
+
{db}.{schema}.{table}
|
106
|
+
""".format(
|
107
|
+
db=stats_view[0],
|
108
|
+
schema=stats_view[1],
|
109
|
+
table=stats_view[2],
|
110
|
+
)
|
111
|
+
stats_result = redshift_executor.get_query_results(query=stats_query)
|
112
|
+
payloads = gen_table_stats_payload_from_tuple(tenant_id=tenant_id, endpoint=conn.host, stats=stats_result)
|
113
|
+
for payload in payloads:
|
114
|
+
logger.info(
|
115
|
+
"Generating table stats. asset: {db} -> {schema} -> {table} -> {column}".format(
|
116
|
+
db=payload.db,
|
117
|
+
schema=payload.schema,
|
118
|
+
table=payload.table,
|
119
|
+
column=payload.column,
|
120
|
+
)
|
140
121
|
)
|
141
|
-
|
142
|
-
|
143
|
-
|
122
|
+
status_code = qdc_client.update_stats_by_id(
|
123
|
+
global_id=payload.global_id,
|
124
|
+
payload=payload.body.get_column_stats(),
|
144
125
|
)
|
145
|
-
|
146
|
-
|
147
|
-
"Generating table stats. asset: {db} -> {schema} -> {table} -> {column}".format(
|
148
|
-
db=payload.db,
|
149
|
-
schema=payload.schema,
|
150
|
-
table=payload.table,
|
151
|
-
column=payload.column,
|
152
|
-
)
|
153
|
-
)
|
154
|
-
status_code = qdc_client.update_stats_by_id(
|
155
|
-
global_id=payload.global_id,
|
156
|
-
payload=payload.body.get_column_stats(),
|
157
|
-
)
|
158
|
-
if status_code == 200:
|
159
|
-
req_count += 1
|
126
|
+
if status_code == 200:
|
127
|
+
req_count += 1
|
160
128
|
logger.info(f"Generating table stats is finished. {req_count} stats are ingested.")
|
161
129
|
return
|
162
130
|
|
@@ -166,7 +134,6 @@ def redshift_table_level_sqllineage(
|
|
166
134
|
qdc_client: qdc.QDCExternalAPIClient,
|
167
135
|
tenant_id: str,
|
168
136
|
) -> None:
|
169
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
170
137
|
redshift_connector = redshift.RedshiftQueryExecutor(conn)
|
171
138
|
results = redshift_connector.get_query_results(
|
172
139
|
query="""
|