quollio-core 0.4.9__tar.gz → 0.4.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {quollio_core-0.4.9 → quollio_core-0.4.10}/PKG-INFO +1 -1
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/__init__.py +1 -1
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/bigquery.py +10 -1
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/bricks.py +2 -3
- quollio_core-0.4.10/quollio_core/helper/log.py +17 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/profilers/databricks.py +0 -2
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/profilers/redshift.py +41 -74
- quollio_core-0.4.10/quollio_core/profilers/snowflake.py +225 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/profilers/sqllineage.py +0 -1
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/redshift.py +3 -5
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/repository/databricks.py +3 -3
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/repository/dbt.py +0 -1
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/repository/qdc.py +0 -3
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/repository/redshift.py +0 -1
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/repository/snowflake.py +6 -1
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/snowflake.py +4 -5
- quollio_core-0.4.9/quollio_core/profilers/snowflake.py +0 -256
- {quollio_core-0.4.9 → quollio_core-0.4.10}/LICENSE +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/README.md +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/pyproject.toml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/.gitignore +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/README.md +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/analyses/.gitkeep +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/dbt_project.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/macros/.gitkeep +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/models/quollio_lineage_column_level.sql +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/models/quollio_lineage_column_level.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.sql +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/models/sources.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/package-lock.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/packages.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/profiles/profiles_template.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/seeds/.gitkeep +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/snapshots/.gitkeep +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/README.md +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/analyses/.gitkeep +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/dbt_project.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/macros/.gitkeep +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/macros/materialization/divided_view.sql +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/models/quollio_lineage_table_level.sql +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/models/quollio_lineage_table_level.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/models/quollio_lineage_view_level.sql +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/models/quollio_lineage_view_level.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/models/quollio_sqllineage_sources.sql +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/models/quollio_sqllineage_sources.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/models/quollio_stats_columns.sql +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/models/quollio_stats_columns.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/models/quollio_stats_profiling_columns.sql +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/models/quollio_stats_profiling_columns.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/models/sources.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/package-lock.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/packages.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/profiles/profiles_template.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/seeds/.gitkeep +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/snapshots/.gitkeep +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/seeds/.gitkeep +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/README.md +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/analyses/.gitkeep +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/dbt_project.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/macros/.gitkeep +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/macros/materialization/divided_view.sql +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_column_level.sql +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_column_level.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_table_level.sql +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_table_level.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/models/quollio_sqllineage_sources.sql +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/models/quollio_sqllineage_sources.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.sql +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.sql +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/models/sources.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/package-lock.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/packages.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/profiles/profiles_template.yml +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/seeds/.gitkeep +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/snapshots/.gitkeep +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/helper/__init__.py +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/helper/core.py +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/helper/env_default.py +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/profilers/__init__.py +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/profilers/bigquery.py +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/profilers/lineage.py +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/profilers/stats.py +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/repository/__init__.py +0 -0
- {quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/repository/bigquery.py +0 -0
@@ -3,6 +3,7 @@ import json
|
|
3
3
|
import logging
|
4
4
|
|
5
5
|
from quollio_core.helper.env_default import env_default
|
6
|
+
from quollio_core.helper.log import set_log_level
|
6
7
|
from quollio_core.profilers.bigquery import bigquery_table_lineage
|
7
8
|
from quollio_core.repository import qdc
|
8
9
|
from quollio_core.repository.bigquery import get_credentials, get_org_id
|
@@ -88,14 +89,22 @@ if __name__ == "__main__":
|
|
88
89
|
help="GCP regions where the data is located. Multiple regions can be provided separated by space.",
|
89
90
|
nargs="+",
|
90
91
|
)
|
92
|
+
parser.add_argument(
|
93
|
+
"--log_level",
|
94
|
+
type=str,
|
95
|
+
choices=["debug", "info", "warn", "error", "none"],
|
96
|
+
action=env_default("LOG_LEVEL"),
|
97
|
+
required=False,
|
98
|
+
help="The log level for dbt commands. Default value is info",
|
99
|
+
)
|
91
100
|
|
92
101
|
args = parser.parse_args()
|
102
|
+
set_log_level(level=args.log_level)
|
93
103
|
|
94
104
|
if len(args.commands) == 0:
|
95
105
|
raise ValueError("No command is provided")
|
96
106
|
|
97
107
|
if "load_lineage" in args.commands:
|
98
|
-
|
99
108
|
qdc_client = qdc.QDCExternalAPIClient(
|
100
109
|
base_url=args.api_url, client_id=args.client_id, client_secret=args.client_secret
|
101
110
|
)
|
@@ -4,6 +4,7 @@ import os
|
|
4
4
|
|
5
5
|
from quollio_core.helper.core import setup_dbt_profile, trim_prefix
|
6
6
|
from quollio_core.helper.env_default import env_default
|
7
|
+
from quollio_core.helper.log import set_log_level
|
7
8
|
from quollio_core.profilers.databricks import (
|
8
9
|
databricks_column_level_lineage,
|
9
10
|
databricks_column_stats,
|
@@ -20,7 +21,6 @@ def build_view(
|
|
20
21
|
target_tables: str = "",
|
21
22
|
log_level: str = "info",
|
22
23
|
) -> None:
|
23
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
24
24
|
|
25
25
|
logger.info("Build profiler views using dbt")
|
26
26
|
# set parameters
|
@@ -64,7 +64,6 @@ def load_lineage(
|
|
64
64
|
tenant_id: str,
|
65
65
|
enable_column_lineage: bool = False,
|
66
66
|
) -> None:
|
67
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
68
67
|
|
69
68
|
logger.info("Generate Databricks table to table lineage.")
|
70
69
|
databricks_table_level_lineage(
|
@@ -99,7 +98,6 @@ def load_column_stats(
|
|
99
98
|
qdc_client: qdc.QDCExternalAPIClient,
|
100
99
|
tenant_id: str,
|
101
100
|
) -> None:
|
102
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
103
101
|
|
104
102
|
logger.info("Generate Databricks column stats.")
|
105
103
|
databricks_column_stats(
|
@@ -243,6 +241,7 @@ if __name__ == "__main__":
|
|
243
241
|
)
|
244
242
|
|
245
243
|
args = parser.parse_args()
|
244
|
+
set_log_level(level=args.log_level)
|
246
245
|
|
247
246
|
conn = db.DatabricksConnectionConfig(
|
248
247
|
# MEMO: Metadata agent allows the string 'https://' as a host name but is not allowed by intelligence agent.
|
@@ -0,0 +1,17 @@
|
|
1
|
+
import logging
|
2
|
+
|
3
|
+
|
4
|
+
def set_log_level(level: str = "info") -> None:
|
5
|
+
fmt = "%(asctime)s - %(levelname)s - %(name)s - %(message)s"
|
6
|
+
if level == "info":
|
7
|
+
logging.basicConfig(level=logging.INFO, format=fmt)
|
8
|
+
elif level == "debug":
|
9
|
+
logging.basicConfig(level=logging.DEBUG, format=fmt)
|
10
|
+
elif level == "warn":
|
11
|
+
logging.basicConfig(level=logging.WARNING, format=fmt)
|
12
|
+
elif level == "error":
|
13
|
+
logging.basicConfig(level=logging.ERROR, format=fmt)
|
14
|
+
elif level == "critical":
|
15
|
+
logging.basicConfig(level=logging.CRITICAL, format=fmt)
|
16
|
+
else:
|
17
|
+
logging.basicConfig(level=logging.NOTSET, format=fmt)
|
@@ -19,7 +19,6 @@ def databricks_table_level_lineage(
|
|
19
19
|
tenant_id: str,
|
20
20
|
dbt_table_name: str = "quollio_lineage_table_level",
|
21
21
|
) -> None:
|
22
|
-
logging.basicConfig(level=logging.info, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
23
22
|
with databricks.DatabricksQueryExecutor(config=conn) as databricks_executor:
|
24
23
|
results = databricks_executor.get_query_results(
|
25
24
|
query=f"""
|
@@ -61,7 +60,6 @@ def databricks_column_level_lineage(
|
|
61
60
|
tenant_id: str,
|
62
61
|
dbt_table_name: str = "quollio_lineage_column_level",
|
63
62
|
) -> None:
|
64
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
65
63
|
with databricks.DatabricksQueryExecutor(config=conn) as databricks_executor:
|
66
64
|
results = databricks_executor.get_query_results(
|
67
65
|
query=f"""
|
@@ -14,7 +14,6 @@ def redshift_table_level_lineage(
|
|
14
14
|
tenant_id: str,
|
15
15
|
dbt_table_name: str,
|
16
16
|
) -> None:
|
17
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
18
17
|
with redshift.RedshiftQueryExecutor(config=conn) as redshift_executor:
|
19
18
|
results = redshift_executor.get_query_results(
|
20
19
|
query="""
|
@@ -55,22 +54,7 @@ def redshift_table_level_lineage(
|
|
55
54
|
return
|
56
55
|
|
57
56
|
|
58
|
-
def
|
59
|
-
query = """
|
60
|
-
SELECT
|
61
|
-
DISTINCT
|
62
|
-
database_name
|
63
|
-
, schema_name
|
64
|
-
, table_name
|
65
|
-
FROM
|
66
|
-
{db}.{schema}.quollio_stats_profiling_columns
|
67
|
-
""".format(
|
68
|
-
db=db, schema=schema
|
69
|
-
)
|
70
|
-
return query
|
71
|
-
|
72
|
-
|
73
|
-
def _get_stats_tables_query(db: str, schema: str) -> str:
|
57
|
+
def _gen_get_stats_views_query(db: str, schema: str) -> str:
|
74
58
|
query = """
|
75
59
|
SELECT
|
76
60
|
DISTINCT
|
@@ -93,70 +77,54 @@ def redshift_table_stats(
|
|
93
77
|
qdc_client: qdc.QDCExternalAPIClient,
|
94
78
|
tenant_id: str,
|
95
79
|
) -> None:
|
96
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
97
80
|
|
98
81
|
with redshift.RedshiftQueryExecutor(config=conn) as redshift_executor:
|
99
|
-
|
100
|
-
target_query = _get_target_tables_query(
|
82
|
+
stats_query = _gen_get_stats_views_query(
|
101
83
|
db=conn.database,
|
102
84
|
schema=conn.schema,
|
103
85
|
)
|
104
|
-
|
86
|
+
stats_views = redshift_executor.get_query_results(query=stats_query)
|
105
87
|
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
target_table=target_asset[2],
|
88
|
+
req_count = 0
|
89
|
+
for stats_view in stats_views:
|
90
|
+
stats_query = """
|
91
|
+
SELECT
|
92
|
+
db_name
|
93
|
+
, schema_name
|
94
|
+
, table_name
|
95
|
+
, column_name
|
96
|
+
, max_value
|
97
|
+
, min_value
|
98
|
+
, null_count
|
99
|
+
, cardinality
|
100
|
+
, avg_value
|
101
|
+
, median_value
|
102
|
+
, mode_value
|
103
|
+
, stddev_value
|
104
|
+
FROM
|
105
|
+
{db}.{schema}.{table}
|
106
|
+
""".format(
|
107
|
+
db=stats_view[0],
|
108
|
+
schema=stats_view[1],
|
109
|
+
table=stats_view[2],
|
110
|
+
)
|
111
|
+
stats_result = redshift_executor.get_query_results(query=stats_query)
|
112
|
+
payloads = gen_table_stats_payload_from_tuple(tenant_id=tenant_id, endpoint=conn.host, stats=stats_result)
|
113
|
+
for payload in payloads:
|
114
|
+
logger.info(
|
115
|
+
"Generating table stats. asset: {db} -> {schema} -> {table} -> {column}".format(
|
116
|
+
db=payload.db,
|
117
|
+
schema=payload.schema,
|
118
|
+
table=payload.table,
|
119
|
+
column=payload.column,
|
120
|
+
)
|
140
121
|
)
|
141
|
-
|
142
|
-
|
143
|
-
|
122
|
+
status_code = qdc_client.update_stats_by_id(
|
123
|
+
global_id=payload.global_id,
|
124
|
+
payload=payload.body.get_column_stats(),
|
144
125
|
)
|
145
|
-
|
146
|
-
|
147
|
-
"Generating table stats. asset: {db} -> {schema} -> {table} -> {column}".format(
|
148
|
-
db=payload.db,
|
149
|
-
schema=payload.schema,
|
150
|
-
table=payload.table,
|
151
|
-
column=payload.column,
|
152
|
-
)
|
153
|
-
)
|
154
|
-
status_code = qdc_client.update_stats_by_id(
|
155
|
-
global_id=payload.global_id,
|
156
|
-
payload=payload.body.get_column_stats(),
|
157
|
-
)
|
158
|
-
if status_code == 200:
|
159
|
-
req_count += 1
|
126
|
+
if status_code == 200:
|
127
|
+
req_count += 1
|
160
128
|
logger.info(f"Generating table stats is finished. {req_count} stats are ingested.")
|
161
129
|
return
|
162
130
|
|
@@ -166,7 +134,6 @@ def redshift_table_level_sqllineage(
|
|
166
134
|
qdc_client: qdc.QDCExternalAPIClient,
|
167
135
|
tenant_id: str,
|
168
136
|
) -> None:
|
169
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
170
137
|
redshift_connector = redshift.RedshiftQueryExecutor(conn)
|
171
138
|
results = redshift_connector.get_query_results(
|
172
139
|
query="""
|
@@ -0,0 +1,225 @@
|
|
1
|
+
import logging
|
2
|
+
|
3
|
+
from quollio_core.profilers.lineage import (
|
4
|
+
gen_column_lineage_payload,
|
5
|
+
gen_table_lineage_payload,
|
6
|
+
parse_snowflake_results,
|
7
|
+
)
|
8
|
+
from quollio_core.profilers.sqllineage import SQLLineage
|
9
|
+
from quollio_core.profilers.stats import gen_table_stats_payload
|
10
|
+
from quollio_core.repository import qdc, snowflake
|
11
|
+
|
12
|
+
logger = logging.getLogger(__name__)
|
13
|
+
|
14
|
+
|
15
|
+
def snowflake_table_to_table_lineage(
|
16
|
+
conn: snowflake.SnowflakeConnectionConfig,
|
17
|
+
qdc_client: qdc.QDCExternalAPIClient,
|
18
|
+
tenant_id: str,
|
19
|
+
) -> None:
|
20
|
+
with snowflake.SnowflakeQueryExecutor(conn) as sf_executor:
|
21
|
+
results = sf_executor.get_query_results(
|
22
|
+
query="""
|
23
|
+
SELECT
|
24
|
+
*
|
25
|
+
FROM
|
26
|
+
{db}.{schema}.QUOLLIO_LINEAGE_TABLE_LEVEL
|
27
|
+
""".format(
|
28
|
+
db=conn.account_database,
|
29
|
+
schema=conn.account_schema,
|
30
|
+
)
|
31
|
+
)
|
32
|
+
parsed_results = parse_snowflake_results(results=results)
|
33
|
+
update_table_lineage_inputs = gen_table_lineage_payload(
|
34
|
+
tenant_id=tenant_id,
|
35
|
+
endpoint=conn.account_id,
|
36
|
+
tables=parsed_results,
|
37
|
+
)
|
38
|
+
|
39
|
+
req_count = 0
|
40
|
+
for update_table_lineage_input in update_table_lineage_inputs:
|
41
|
+
logger.info(
|
42
|
+
"Generating table lineage. downstream: {db} -> {schema} -> {table}".format(
|
43
|
+
db=update_table_lineage_input.downstream_database_name,
|
44
|
+
schema=update_table_lineage_input.downstream_schema_name,
|
45
|
+
table=update_table_lineage_input.downstream_table_name,
|
46
|
+
)
|
47
|
+
)
|
48
|
+
status_code = qdc_client.update_lineage_by_id(
|
49
|
+
global_id=update_table_lineage_input.downstream_global_id,
|
50
|
+
payload=update_table_lineage_input.upstreams.as_dict(),
|
51
|
+
)
|
52
|
+
if status_code == 200:
|
53
|
+
req_count += 1
|
54
|
+
logger.info(f"Generating table lineage is finished. {req_count} lineages are ingested.")
|
55
|
+
return
|
56
|
+
|
57
|
+
|
58
|
+
def snowflake_column_to_column_lineage(
|
59
|
+
conn: snowflake.SnowflakeConnectionConfig,
|
60
|
+
qdc_client: qdc.QDCExternalAPIClient,
|
61
|
+
tenant_id: str,
|
62
|
+
) -> None:
|
63
|
+
with snowflake.SnowflakeQueryExecutor(conn) as sf_executor:
|
64
|
+
results = sf_executor.get_query_results(
|
65
|
+
query="""
|
66
|
+
SELECT
|
67
|
+
*
|
68
|
+
FROM
|
69
|
+
{db}.{schema}.QUOLLIO_LINEAGE_COLUMN_LEVEL
|
70
|
+
""".format(
|
71
|
+
db=conn.account_database,
|
72
|
+
schema=conn.account_schema,
|
73
|
+
)
|
74
|
+
)
|
75
|
+
update_column_lineage_inputs = gen_column_lineage_payload(
|
76
|
+
tenant_id=tenant_id,
|
77
|
+
endpoint=conn.account_id,
|
78
|
+
columns=results,
|
79
|
+
)
|
80
|
+
|
81
|
+
req_count = 0
|
82
|
+
for update_column_lineage_input in update_column_lineage_inputs:
|
83
|
+
logger.info(
|
84
|
+
"Generating column lineage. downstream: {db} -> {schema} -> {table} -> {column}".format(
|
85
|
+
db=update_column_lineage_input.downstream_database_name,
|
86
|
+
schema=update_column_lineage_input.downstream_schema_name,
|
87
|
+
table=update_column_lineage_input.downstream_table_name,
|
88
|
+
column=update_column_lineage_input.downstream_column_name,
|
89
|
+
)
|
90
|
+
)
|
91
|
+
status_code = qdc_client.update_lineage_by_id(
|
92
|
+
global_id=update_column_lineage_input.downstream_global_id,
|
93
|
+
payload=update_column_lineage_input.upstreams.as_dict(),
|
94
|
+
)
|
95
|
+
if status_code == 200:
|
96
|
+
req_count += 1
|
97
|
+
logger.info(f"Generating column lineage is finished. {req_count} lineages are ingested.")
|
98
|
+
return
|
99
|
+
|
100
|
+
|
101
|
+
def snowflake_table_level_sqllineage(
|
102
|
+
conn: snowflake.SnowflakeConnectionConfig,
|
103
|
+
qdc_client: qdc.QDCExternalAPIClient,
|
104
|
+
tenant_id: str,
|
105
|
+
) -> None:
|
106
|
+
with snowflake.SnowflakeQueryExecutor(conn) as sf_executor:
|
107
|
+
results = sf_executor.get_query_results(
|
108
|
+
query="""
|
109
|
+
SELECT
|
110
|
+
database_name
|
111
|
+
, schema_name
|
112
|
+
, query_text
|
113
|
+
FROM
|
114
|
+
{db}.{schema}.QUOLLIO_SQLLINEAGE_SOURCES
|
115
|
+
""".format(
|
116
|
+
db=conn.account_database,
|
117
|
+
schema=conn.account_schema,
|
118
|
+
)
|
119
|
+
)
|
120
|
+
update_table_lineage_inputs_list = list()
|
121
|
+
sql_lineage = SQLLineage()
|
122
|
+
for result in results:
|
123
|
+
src_tables, dest_table = sql_lineage.get_table_level_lineage_source(
|
124
|
+
sql=result["QUERY_TEXT"],
|
125
|
+
dialect="snowflake",
|
126
|
+
dest_db=result["DATABASE_NAME"],
|
127
|
+
dest_schema=result["SCHEMA_NAME"],
|
128
|
+
)
|
129
|
+
update_table_lineage_inputs = sql_lineage.gen_lineage_input(
|
130
|
+
tenant_id=tenant_id, endpoint=conn.account_id, src_tables=src_tables, dest_table=dest_table
|
131
|
+
)
|
132
|
+
update_table_lineage_inputs_list.append(update_table_lineage_inputs)
|
133
|
+
|
134
|
+
req_count = 0
|
135
|
+
for update_table_lineage_input in update_table_lineage_inputs_list:
|
136
|
+
logger.info(
|
137
|
+
"Generating table lineage. downstream: {db} -> {schema} -> {table}".format(
|
138
|
+
db=update_table_lineage_input.downstream_database_name,
|
139
|
+
schema=update_table_lineage_input.downstream_schema_name,
|
140
|
+
table=update_table_lineage_input.downstream_table_name,
|
141
|
+
)
|
142
|
+
)
|
143
|
+
status_code = qdc_client.update_lineage_by_id(
|
144
|
+
global_id=update_table_lineage_input.downstream_global_id,
|
145
|
+
payload=update_table_lineage_input.upstreams.as_dict(),
|
146
|
+
)
|
147
|
+
if status_code == 200:
|
148
|
+
req_count += 1
|
149
|
+
logger.info(f"Generating table lineage is finished. {req_count} lineages are ingested.")
|
150
|
+
return
|
151
|
+
|
152
|
+
|
153
|
+
def snowflake_table_stats(
|
154
|
+
conn: snowflake.SnowflakeConnectionConfig,
|
155
|
+
qdc_client: qdc.QDCExternalAPIClient,
|
156
|
+
tenant_id: str,
|
157
|
+
) -> None:
|
158
|
+
with snowflake.SnowflakeQueryExecutor(conn) as sf_executor:
|
159
|
+
stats_query = _gen_get_stats_views_query(
|
160
|
+
db=conn.account_database,
|
161
|
+
schema=conn.account_schema,
|
162
|
+
)
|
163
|
+
stats_views = sf_executor.get_query_results(query=stats_query)
|
164
|
+
|
165
|
+
req_count = 0
|
166
|
+
for stats_view in stats_views:
|
167
|
+
stats_query = """
|
168
|
+
SELECT
|
169
|
+
db_name
|
170
|
+
, schema_name
|
171
|
+
, table_name
|
172
|
+
, column_name
|
173
|
+
, max_value
|
174
|
+
, min_value
|
175
|
+
, null_count
|
176
|
+
, cardinality
|
177
|
+
, avg_value
|
178
|
+
, median_value
|
179
|
+
, mode_value
|
180
|
+
, stddev_value
|
181
|
+
FROM
|
182
|
+
{db}.{schema}.{table}
|
183
|
+
""".format(
|
184
|
+
db=stats_view["TABLE_CATALOG"],
|
185
|
+
schema=stats_view["TABLE_SCHEMA"],
|
186
|
+
table=stats_view["TABLE_NAME"],
|
187
|
+
)
|
188
|
+
logger.debug(f"The following sql will be fetched to retrieve stats values. {stats_query}")
|
189
|
+
stats_result = sf_executor.get_query_results(query=stats_query)
|
190
|
+
payloads = gen_table_stats_payload(tenant_id=tenant_id, endpoint=conn.account_id, stats=stats_result)
|
191
|
+
for payload in payloads:
|
192
|
+
logger.info(
|
193
|
+
"Generating table stats. asset: {db} -> {schema} -> {table} -> {column}".format(
|
194
|
+
db=payload.db,
|
195
|
+
schema=payload.schema,
|
196
|
+
table=payload.table,
|
197
|
+
column=payload.column,
|
198
|
+
)
|
199
|
+
)
|
200
|
+
status_code = qdc_client.update_stats_by_id(
|
201
|
+
global_id=payload.global_id,
|
202
|
+
payload=payload.body.get_column_stats(),
|
203
|
+
)
|
204
|
+
if status_code == 200:
|
205
|
+
req_count += 1
|
206
|
+
logger.info(f"Generating table stats is finished. {req_count} stats are ingested.")
|
207
|
+
return
|
208
|
+
|
209
|
+
|
210
|
+
def _gen_get_stats_views_query(db: str, schema: str) -> str:
|
211
|
+
query = """
|
212
|
+
SELECT
|
213
|
+
DISTINCT
|
214
|
+
TABLE_CATALOG
|
215
|
+
, TABLE_SCHEMA
|
216
|
+
, TABLE_NAME
|
217
|
+
FROM
|
218
|
+
{db}.INFORMATION_SCHEMA.TABLES
|
219
|
+
WHERE
|
220
|
+
startswith(TABLE_NAME, 'QUOLLIO_STATS_COLUMNS_')
|
221
|
+
AND TABLE_SCHEMA = UPPER('{schema}')
|
222
|
+
""".format(
|
223
|
+
db=db, schema=schema
|
224
|
+
)
|
225
|
+
return query
|
@@ -54,7 +54,6 @@ class SQLLineage:
|
|
54
54
|
dest_db: str = None,
|
55
55
|
dest_schema: str = None,
|
56
56
|
) -> Tuple[Set[Table], Table]:
|
57
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
58
57
|
try:
|
59
58
|
statement: sqlglot.Expression = sqlglot.parse_one(sql=sql, error_level=sqlglot.ErrorLevel.RAISE)
|
60
59
|
except ParseError as e:
|
@@ -4,6 +4,7 @@ import os
|
|
4
4
|
|
5
5
|
from quollio_core.helper.core import setup_dbt_profile
|
6
6
|
from quollio_core.helper.env_default import env_default
|
7
|
+
from quollio_core.helper.log import set_log_level
|
7
8
|
from quollio_core.profilers.redshift import (
|
8
9
|
redshift_table_level_lineage,
|
9
10
|
redshift_table_level_sqllineage,
|
@@ -20,8 +21,6 @@ def build_view(
|
|
20
21
|
target_tables: str = "",
|
21
22
|
log_level: str = "info",
|
22
23
|
) -> None:
|
23
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
24
|
-
|
25
24
|
logger.info("Build profiler views using dbt")
|
26
25
|
# set parameters
|
27
26
|
dbt_client = dbt.DBTClient()
|
@@ -74,7 +73,6 @@ def load_lineage(
|
|
74
73
|
qdc_client: qdc.QDCExternalAPIClient,
|
75
74
|
tenant_id: str,
|
76
75
|
) -> None:
|
77
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
78
76
|
logger.info("Generate redshift table to table lineage.")
|
79
77
|
redshift_table_level_lineage(
|
80
78
|
conn=conn,
|
@@ -101,7 +99,6 @@ def load_stats(
|
|
101
99
|
qdc_client: qdc.QDCExternalAPIClient,
|
102
100
|
tenant_id: str,
|
103
101
|
) -> None:
|
104
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
105
102
|
|
106
103
|
logger.info("Generate redshift stats.")
|
107
104
|
redshift_table_stats(
|
@@ -119,7 +116,6 @@ def load_sqllineage(
|
|
119
116
|
qdc_client: qdc.QDCExternalAPIClient,
|
120
117
|
tenant_id: str,
|
121
118
|
) -> None:
|
122
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
123
119
|
|
124
120
|
logger.info("Generate Redshift sqllineage.")
|
125
121
|
redshift_table_level_sqllineage(
|
@@ -266,6 +262,8 @@ if __name__ == "__main__":
|
|
266
262
|
help="The client secrete that is created on Quollio console to let clients access Quollio External API",
|
267
263
|
)
|
268
264
|
args = parser.parse_args()
|
265
|
+
set_log_level(level=args.log_level)
|
266
|
+
|
269
267
|
conn = redshift.RedshiftConnectionConfig(
|
270
268
|
host=args.host,
|
271
269
|
build_user=args.build_user,
|
@@ -5,7 +5,7 @@ from typing import Dict, List, Optional
|
|
5
5
|
from databricks.sdk.core import Config, HeaderFactory, oauth_service_principal
|
6
6
|
from databricks.sql.client import Connection, connect
|
7
7
|
|
8
|
-
|
8
|
+
logger = logging.getLogger(__name__)
|
9
9
|
|
10
10
|
|
11
11
|
@dataclass
|
@@ -47,8 +47,8 @@ class DatabricksQueryExecutor:
|
|
47
47
|
cur.execute(query)
|
48
48
|
result: List[Dict[str, str]] = cur.fetchall()
|
49
49
|
except Exception as e:
|
50
|
-
|
51
|
-
|
50
|
+
logger.error(query, exc_info=True)
|
51
|
+
logger.error("databricks get_query_results failed. %s", e)
|
52
52
|
raise
|
53
53
|
|
54
54
|
for row in result:
|
@@ -11,7 +11,6 @@ class DBTClient:
|
|
11
11
|
self.dbt = dbtRunner()
|
12
12
|
|
13
13
|
def invoke(self, cmd: str, project_dir: str, profile_dir: str, options: List[str] = None) -> dbtRunnerResult:
|
14
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
15
14
|
req = [cmd, "--project-dir", project_dir, "--profiles-dir", profile_dir]
|
16
15
|
if options is not None:
|
17
16
|
req.extend(options)
|
@@ -25,7 +25,6 @@ class QDCExternalAPIClient:
|
|
25
25
|
Tried to find a package for oauth0 client credentials flow,
|
26
26
|
but any of them contains bugs or lacks of features to handle the token refresh when it's expired
|
27
27
|
"""
|
28
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
29
28
|
url = f"{self.base_url}/oauth2/token"
|
30
29
|
creds = f"{self.client_id}:{self.client_secret}"
|
31
30
|
encoded_creds = base64.b64encode(creds.encode()).decode()
|
@@ -65,7 +64,6 @@ class QDCExternalAPIClient:
|
|
65
64
|
return session
|
66
65
|
|
67
66
|
def update_stats_by_id(self, global_id: str, payload: Dict[str, List[str]]) -> int:
|
68
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
69
67
|
self._refresh_token_if_expired()
|
70
68
|
headers = {"content-type": "application/json", "authorization": f"Bearer {self.auth_token}"}
|
71
69
|
endpoint = f"{self.base_url}/v2/assets/{global_id}/stats"
|
@@ -85,7 +83,6 @@ class QDCExternalAPIClient:
|
|
85
83
|
return res.status_code
|
86
84
|
|
87
85
|
def update_lineage_by_id(self, global_id: str, payload: Dict[str, List[str]]) -> int:
|
88
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
89
86
|
self._refresh_token_if_expired()
|
90
87
|
headers = {"content-type": "application/json", "authorization": f"Bearer {self.auth_token}"}
|
91
88
|
endpoint = f"{self.base_url}/v2/lineage/{global_id}"
|
@@ -67,7 +67,6 @@ class RedshiftQueryExecutor:
|
|
67
67
|
return conn
|
68
68
|
|
69
69
|
def get_query_results(self, query: str) -> Tuple[List[str]]:
|
70
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
71
70
|
with self.conn.cursor() as cur:
|
72
71
|
try:
|
73
72
|
cur.execute(query)
|
@@ -28,6 +28,12 @@ class SnowflakeQueryExecutor:
|
|
28
28
|
def __init__(self, config: SnowflakeConnectionConfig) -> None:
|
29
29
|
self.conn = self.__initialize(config)
|
30
30
|
|
31
|
+
def __enter__(self):
|
32
|
+
return self
|
33
|
+
|
34
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
35
|
+
self.conn.close()
|
36
|
+
|
31
37
|
def __initialize(self, config: SnowflakeConnectionConfig) -> SnowflakeConnection:
|
32
38
|
conn: SnowflakeConnection = connect(
|
33
39
|
user=config.account_user,
|
@@ -41,7 +47,6 @@ class SnowflakeQueryExecutor:
|
|
41
47
|
return conn
|
42
48
|
|
43
49
|
def get_query_results(self, query: str) -> List[Dict[str, str]]:
|
44
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
45
50
|
with self.conn.cursor(DictCursor) as cur:
|
46
51
|
try:
|
47
52
|
cur.execute(query)
|
@@ -4,6 +4,7 @@ import os
|
|
4
4
|
|
5
5
|
from quollio_core.helper.core import setup_dbt_profile
|
6
6
|
from quollio_core.helper.env_default import env_default
|
7
|
+
from quollio_core.helper.log import set_log_level
|
7
8
|
from quollio_core.profilers.snowflake import (
|
8
9
|
snowflake_column_to_column_lineage,
|
9
10
|
snowflake_table_level_sqllineage,
|
@@ -21,7 +22,6 @@ def build_view(
|
|
21
22
|
target_tables: str = "",
|
22
23
|
log_level: str = "info",
|
23
24
|
) -> None:
|
24
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
25
25
|
|
26
26
|
logger.info("Build profiler views using dbt")
|
27
27
|
# set parameters
|
@@ -74,9 +74,8 @@ def load_lineage(
|
|
74
74
|
tenant_id: str,
|
75
75
|
enable_column_lineage: bool = False,
|
76
76
|
) -> None:
|
77
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
78
|
-
|
79
77
|
logger.info("Generate Snowflake table to table lineage.")
|
78
|
+
|
80
79
|
snowflake_table_to_table_lineage(
|
81
80
|
conn=conn,
|
82
81
|
qdc_client=qdc_client,
|
@@ -105,7 +104,6 @@ def load_stats(
|
|
105
104
|
qdc_client: qdc.QDCExternalAPIClient,
|
106
105
|
tenant_id: str,
|
107
106
|
) -> None:
|
108
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
109
107
|
|
110
108
|
logger.info("Generate Snowflake stats.")
|
111
109
|
snowflake_table_stats(
|
@@ -124,7 +122,6 @@ def load_sqllineage(
|
|
124
122
|
qdc_client: qdc.QDCExternalAPIClient,
|
125
123
|
tenant_id: str,
|
126
124
|
) -> None:
|
127
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
128
125
|
|
129
126
|
logger.info("Generate Snowflake sqllineage.")
|
130
127
|
snowflake_table_level_sqllineage(
|
@@ -279,6 +276,8 @@ if __name__ == "__main__":
|
|
279
276
|
help="Whether to ingest column lineage into QDIC or not. Default value is False",
|
280
277
|
)
|
281
278
|
args = parser.parse_args()
|
279
|
+
set_log_level(level=args.log_level)
|
280
|
+
|
282
281
|
conn = snowflake.SnowflakeConnectionConfig(
|
283
282
|
account_id=args.account_id,
|
284
283
|
account_user=args.user,
|
@@ -1,256 +0,0 @@
|
|
1
|
-
import logging
|
2
|
-
|
3
|
-
from quollio_core.profilers.lineage import (
|
4
|
-
gen_column_lineage_payload,
|
5
|
-
gen_table_lineage_payload,
|
6
|
-
parse_snowflake_results,
|
7
|
-
)
|
8
|
-
from quollio_core.profilers.sqllineage import SQLLineage
|
9
|
-
from quollio_core.profilers.stats import gen_table_stats_payload
|
10
|
-
from quollio_core.repository import qdc, snowflake
|
11
|
-
|
12
|
-
logger = logging.getLogger(__name__)
|
13
|
-
|
14
|
-
|
15
|
-
def snowflake_table_to_table_lineage(
|
16
|
-
conn: snowflake.SnowflakeConnectionConfig,
|
17
|
-
qdc_client: qdc.QDCExternalAPIClient,
|
18
|
-
tenant_id: str,
|
19
|
-
) -> None:
|
20
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
21
|
-
sf_executor = snowflake.SnowflakeQueryExecutor(conn)
|
22
|
-
results = sf_executor.get_query_results(
|
23
|
-
query="""
|
24
|
-
SELECT
|
25
|
-
*
|
26
|
-
FROM
|
27
|
-
{db}.{schema}.QUOLLIO_LINEAGE_TABLE_LEVEL
|
28
|
-
""".format(
|
29
|
-
db=conn.account_database,
|
30
|
-
schema=conn.account_schema,
|
31
|
-
)
|
32
|
-
)
|
33
|
-
parsed_results = parse_snowflake_results(results=results)
|
34
|
-
update_table_lineage_inputs = gen_table_lineage_payload(
|
35
|
-
tenant_id=tenant_id,
|
36
|
-
endpoint=conn.account_id,
|
37
|
-
tables=parsed_results,
|
38
|
-
)
|
39
|
-
|
40
|
-
req_count = 0
|
41
|
-
for update_table_lineage_input in update_table_lineage_inputs:
|
42
|
-
logger.info(
|
43
|
-
"Generating table lineage. downstream: {db} -> {schema} -> {table}".format(
|
44
|
-
db=update_table_lineage_input.downstream_database_name,
|
45
|
-
schema=update_table_lineage_input.downstream_schema_name,
|
46
|
-
table=update_table_lineage_input.downstream_table_name,
|
47
|
-
)
|
48
|
-
)
|
49
|
-
status_code = qdc_client.update_lineage_by_id(
|
50
|
-
global_id=update_table_lineage_input.downstream_global_id,
|
51
|
-
payload=update_table_lineage_input.upstreams.as_dict(),
|
52
|
-
)
|
53
|
-
if status_code == 200:
|
54
|
-
req_count += 1
|
55
|
-
logger.info(f"Generating table lineage is finished. {req_count} lineages are ingested.")
|
56
|
-
return
|
57
|
-
|
58
|
-
|
59
|
-
def snowflake_column_to_column_lineage(
|
60
|
-
conn: snowflake.SnowflakeConnectionConfig,
|
61
|
-
qdc_client: qdc.QDCExternalAPIClient,
|
62
|
-
tenant_id: str,
|
63
|
-
) -> None:
|
64
|
-
sf_executor = snowflake.SnowflakeQueryExecutor(conn)
|
65
|
-
results = sf_executor.get_query_results(
|
66
|
-
query="""
|
67
|
-
SELECT
|
68
|
-
*
|
69
|
-
FROM
|
70
|
-
{db}.{schema}.QUOLLIO_LINEAGE_COLUMN_LEVEL
|
71
|
-
""".format(
|
72
|
-
db=conn.account_database,
|
73
|
-
schema=conn.account_schema,
|
74
|
-
)
|
75
|
-
)
|
76
|
-
update_column_lineage_inputs = gen_column_lineage_payload(
|
77
|
-
tenant_id=tenant_id,
|
78
|
-
endpoint=conn.account_id,
|
79
|
-
columns=results,
|
80
|
-
)
|
81
|
-
|
82
|
-
req_count = 0
|
83
|
-
for update_column_lineage_input in update_column_lineage_inputs:
|
84
|
-
logger.info(
|
85
|
-
"Generating column lineage. downstream: {db} -> {schema} -> {table} -> {column}".format(
|
86
|
-
db=update_column_lineage_input.downstream_database_name,
|
87
|
-
schema=update_column_lineage_input.downstream_schema_name,
|
88
|
-
table=update_column_lineage_input.downstream_table_name,
|
89
|
-
column=update_column_lineage_input.downstream_column_name,
|
90
|
-
)
|
91
|
-
)
|
92
|
-
status_code = qdc_client.update_lineage_by_id(
|
93
|
-
global_id=update_column_lineage_input.downstream_global_id,
|
94
|
-
payload=update_column_lineage_input.upstreams.as_dict(),
|
95
|
-
)
|
96
|
-
if status_code == 200:
|
97
|
-
req_count += 1
|
98
|
-
logger.info(f"Generating column lineage is finished. {req_count} lineages are ingested.")
|
99
|
-
return
|
100
|
-
|
101
|
-
|
102
|
-
def snowflake_table_level_sqllineage(
|
103
|
-
conn: snowflake.SnowflakeConnectionConfig,
|
104
|
-
qdc_client: qdc.QDCExternalAPIClient,
|
105
|
-
tenant_id: str,
|
106
|
-
) -> None:
|
107
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
108
|
-
sf_executor = snowflake.SnowflakeQueryExecutor(conn)
|
109
|
-
results = sf_executor.get_query_results(
|
110
|
-
query="""
|
111
|
-
SELECT
|
112
|
-
database_name
|
113
|
-
, schema_name
|
114
|
-
, query_text
|
115
|
-
FROM
|
116
|
-
{db}.{schema}.QUOLLIO_SQLLINEAGE_SOURCES
|
117
|
-
""".format(
|
118
|
-
db=conn.account_database,
|
119
|
-
schema=conn.account_schema,
|
120
|
-
)
|
121
|
-
)
|
122
|
-
update_table_lineage_inputs_list = list()
|
123
|
-
sql_lineage = SQLLineage()
|
124
|
-
for result in results:
|
125
|
-
src_tables, dest_table = sql_lineage.get_table_level_lineage_source(
|
126
|
-
sql=result["QUERY_TEXT"],
|
127
|
-
dialect="snowflake",
|
128
|
-
dest_db=result["DATABASE_NAME"],
|
129
|
-
dest_schema=result["SCHEMA_NAME"],
|
130
|
-
)
|
131
|
-
update_table_lineage_inputs = sql_lineage.gen_lineage_input(
|
132
|
-
tenant_id=tenant_id, endpoint=conn.account_id, src_tables=src_tables, dest_table=dest_table
|
133
|
-
)
|
134
|
-
update_table_lineage_inputs_list.append(update_table_lineage_inputs)
|
135
|
-
|
136
|
-
req_count = 0
|
137
|
-
for update_table_lineage_input in update_table_lineage_inputs_list:
|
138
|
-
logger.info(
|
139
|
-
"Generating table lineage. downstream: {db} -> {schema} -> {table}".format(
|
140
|
-
db=update_table_lineage_input.downstream_database_name,
|
141
|
-
schema=update_table_lineage_input.downstream_schema_name,
|
142
|
-
table=update_table_lineage_input.downstream_table_name,
|
143
|
-
)
|
144
|
-
)
|
145
|
-
status_code = qdc_client.update_lineage_by_id(
|
146
|
-
global_id=update_table_lineage_input.downstream_global_id,
|
147
|
-
payload=update_table_lineage_input.upstreams.as_dict(),
|
148
|
-
)
|
149
|
-
if status_code == 200:
|
150
|
-
req_count += 1
|
151
|
-
logger.info(f"Generating table lineage is finished. {req_count} lineages are ingested.")
|
152
|
-
return
|
153
|
-
|
154
|
-
|
155
|
-
def _get_target_tables_query(db: str, schema: str) -> str:
|
156
|
-
query = """
|
157
|
-
SELECT
|
158
|
-
DISTINCT
|
159
|
-
TABLE_CATALOG
|
160
|
-
, TABLE_SCHEMA
|
161
|
-
, TABLE_NAME
|
162
|
-
FROM
|
163
|
-
{db}.{schema}.QUOLLIO_STATS_PROFILING_COLUMNS
|
164
|
-
""".format(
|
165
|
-
db=db, schema=schema
|
166
|
-
)
|
167
|
-
return query
|
168
|
-
|
169
|
-
|
170
|
-
def _get_stats_tables_query(db: str, schema: str) -> str:
|
171
|
-
query = """
|
172
|
-
SELECT
|
173
|
-
DISTINCT
|
174
|
-
TABLE_CATALOG
|
175
|
-
, TABLE_SCHEMA
|
176
|
-
, TABLE_NAME
|
177
|
-
FROM
|
178
|
-
{db}.INFORMATION_SCHEMA.TABLES
|
179
|
-
WHERE
|
180
|
-
startswith(TABLE_NAME, 'QUOLLIO_STATS_COLUMNS_')
|
181
|
-
AND TABLE_SCHEMA = UPPER('{schema}')
|
182
|
-
""".format(
|
183
|
-
db=db, schema=schema
|
184
|
-
)
|
185
|
-
return query
|
186
|
-
|
187
|
-
|
188
|
-
def snowflake_table_stats(
|
189
|
-
conn: snowflake.SnowflakeConnectionConfig,
|
190
|
-
qdc_client: qdc.QDCExternalAPIClient,
|
191
|
-
tenant_id: str,
|
192
|
-
) -> None:
|
193
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
194
|
-
sf_executor = snowflake.SnowflakeQueryExecutor(conn)
|
195
|
-
|
196
|
-
target_query = _get_target_tables_query(
|
197
|
-
db=conn.account_database,
|
198
|
-
schema=conn.account_schema,
|
199
|
-
)
|
200
|
-
target_assets = sf_executor.get_query_results(query=target_query)
|
201
|
-
|
202
|
-
stats_query = _get_stats_tables_query(
|
203
|
-
db=conn.account_database,
|
204
|
-
schema=conn.account_schema,
|
205
|
-
)
|
206
|
-
stats_columns = sf_executor.get_query_results(query=stats_query)
|
207
|
-
|
208
|
-
req_count = 0
|
209
|
-
for target_asset in target_assets:
|
210
|
-
for stats_column in stats_columns:
|
211
|
-
stats_query = """
|
212
|
-
SELECT
|
213
|
-
db_name
|
214
|
-
, schema_name
|
215
|
-
, table_name
|
216
|
-
, column_name
|
217
|
-
, max_value
|
218
|
-
, min_value
|
219
|
-
, null_count
|
220
|
-
, cardinality
|
221
|
-
, avg_value
|
222
|
-
, median_value
|
223
|
-
, mode_value
|
224
|
-
, stddev_value
|
225
|
-
FROM
|
226
|
-
{db}.{schema}.{table}
|
227
|
-
WHERE
|
228
|
-
db_name = '{target_db}'
|
229
|
-
and schema_name = '{target_schema}'
|
230
|
-
and table_name = '{target_table}'
|
231
|
-
""".format(
|
232
|
-
db=stats_column["TABLE_CATALOG"],
|
233
|
-
schema=stats_column["TABLE_SCHEMA"],
|
234
|
-
table=stats_column["TABLE_NAME"],
|
235
|
-
target_db=target_asset["TABLE_CATALOG"],
|
236
|
-
target_schema=target_asset["TABLE_SCHEMA"],
|
237
|
-
target_table=target_asset["TABLE_NAME"],
|
238
|
-
)
|
239
|
-
stats_result = sf_executor.get_query_results(query=stats_query)
|
240
|
-
payloads = gen_table_stats_payload(tenant_id=tenant_id, endpoint=conn.account_id, stats=stats_result)
|
241
|
-
for payload in payloads:
|
242
|
-
logger.info(
|
243
|
-
"Generating table stats. asset: {db} -> {schema} -> {table} -> {column}".format(
|
244
|
-
db=payload.db,
|
245
|
-
schema=payload.schema,
|
246
|
-
table=payload.table,
|
247
|
-
column=payload.column,
|
248
|
-
)
|
249
|
-
)
|
250
|
-
status_code = qdc_client.update_stats_by_id(
|
251
|
-
global_id=payload.global_id,
|
252
|
-
payload=payload.body.get_column_stats(),
|
253
|
-
)
|
254
|
-
if status_code == 200:
|
255
|
-
req_count += 1
|
256
|
-
logger.info(f"Generating table stats is finished. {req_count} stats are ingested.")
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/analyses/.gitkeep
RENAMED
File without changes
|
{quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/dbt_project.yml
RENAMED
File without changes
|
{quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/macros/.gitkeep
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/models/sources.yml
RENAMED
File without changes
|
{quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/package-lock.yml
RENAMED
File without changes
|
{quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/packages.yml
RENAMED
File without changes
|
File without changes
|
{quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/seeds/.gitkeep
RENAMED
File without changes
|
{quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/snapshots/.gitkeep
RENAMED
File without changes
|
File without changes
|
{quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/analyses/.gitkeep
RENAMED
File without changes
|
{quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/dbt_project.yml
RENAMED
File without changes
|
{quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/macros/.gitkeep
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/models/sources.yml
RENAMED
File without changes
|
{quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/package-lock.yml
RENAMED
File without changes
|
File without changes
|
File without changes
|
{quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/seeds/.gitkeep
RENAMED
File without changes
|
{quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/snapshots/.gitkeep
RENAMED
File without changes
|
File without changes
|
File without changes
|
{quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/analyses/.gitkeep
RENAMED
File without changes
|
{quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/dbt_project.yml
RENAMED
File without changes
|
{quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/macros/.gitkeep
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/models/sources.yml
RENAMED
File without changes
|
{quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/package-lock.yml
RENAMED
File without changes
|
File without changes
|
File without changes
|
{quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/seeds/.gitkeep
RENAMED
File without changes
|
{quollio_core-0.4.9 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/snapshots/.gitkeep
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|