quollio-core 0.4.6__tar.gz → 0.4.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {quollio_core-0.4.6 → quollio_core-0.4.8}/PKG-INFO +7 -2
- {quollio_core-0.4.6 → quollio_core-0.4.8}/README.md +1 -1
- {quollio_core-0.4.6 → quollio_core-0.4.8}/pyproject.toml +5 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/__init__.py +1 -1
- quollio_core-0.4.8/quollio_core/bigquery.py +114 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/bricks.py +66 -14
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.yml +1 -1
- quollio_core-0.4.8/quollio_core/dbt_projects/redshift/macros/materialization/divided_view.sql +136 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/redshift/models/quollio_stats_columns.sql +1 -2
- quollio_core-0.4.8/quollio_core/dbt_projects/snowflake/macros/materialization/divided_view.sql +85 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.sql +1 -2
- quollio_core-0.4.8/quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.sql +96 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/helper/core.py +4 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/helper/env_default.py +24 -1
- quollio_core-0.4.8/quollio_core/profilers/bigquery.py +81 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/profilers/databricks.py +16 -9
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/profilers/lineage.py +14 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/redshift.py +8 -8
- quollio_core-0.4.8/quollio_core/repository/bigquery.py +61 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/snowflake.py +21 -6
- quollio_core-0.4.6/quollio_core/dbt_projects/redshift/macros/materialization/divided_view.sql +0 -97
- quollio_core-0.4.6/quollio_core/dbt_projects/snowflake/macros/materialization/divided_view.sql +0 -62
- quollio_core-0.4.6/quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.sql +0 -59
- {quollio_core-0.4.6 → quollio_core-0.4.8}/LICENSE +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/databricks/.gitignore +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/databricks/README.md +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/databricks/analyses/.gitkeep +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/databricks/dbt_project.yml +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/databricks/macros/.gitkeep +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/databricks/models/quollio_lineage_column_level.sql +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/databricks/models/quollio_lineage_column_level.yml +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.sql +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/databricks/models/sources.yml +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/databricks/package-lock.yml +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/databricks/packages.yml +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/databricks/profiles/profiles_template.yml +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/databricks/seeds/.gitkeep +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/databricks/snapshots/.gitkeep +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/redshift/README.md +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/redshift/analyses/.gitkeep +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/redshift/dbt_project.yml +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/redshift/macros/.gitkeep +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/redshift/models/quollio_lineage_table_level.sql +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/redshift/models/quollio_lineage_table_level.yml +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/redshift/models/quollio_lineage_view_level.sql +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/redshift/models/quollio_lineage_view_level.yml +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/redshift/models/quollio_sqllineage_sources.sql +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/redshift/models/quollio_sqllineage_sources.yml +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/redshift/models/quollio_stats_columns.yml +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/redshift/models/quollio_stats_profiling_columns.sql +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/redshift/models/quollio_stats_profiling_columns.yml +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/redshift/models/sources.yml +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/redshift/package-lock.yml +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/redshift/packages.yml +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/redshift/profiles/profiles_template.yml +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/redshift/seeds/.gitkeep +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/redshift/snapshots/.gitkeep +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/seeds/.gitkeep +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/snowflake/README.md +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/snowflake/analyses/.gitkeep +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/snowflake/dbt_project.yml +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/snowflake/macros/.gitkeep +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_column_level.sql +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_column_level.yml +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_table_level.sql +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_table_level.yml +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/snowflake/models/quollio_sqllineage_sources.sql +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/snowflake/models/quollio_sqllineage_sources.yml +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.yml +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.yml +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/snowflake/models/sources.yml +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/snowflake/package-lock.yml +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/snowflake/packages.yml +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/snowflake/profiles/profiles_template.yml +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/snowflake/seeds/.gitkeep +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/dbt_projects/snowflake/snapshots/.gitkeep +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/helper/__init__.py +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/profilers/__init__.py +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/profilers/redshift.py +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/profilers/snowflake.py +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/profilers/sqllineage.py +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/profilers/stats.py +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/repository/__init__.py +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/repository/databricks.py +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/repository/dbt.py +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/repository/qdc.py +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/repository/redshift.py +0 -0
- {quollio_core-0.4.6 → quollio_core-0.4.8}/quollio_core/repository/snowflake.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: quollio-core
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.8
|
4
4
|
Summary: Quollio Core
|
5
5
|
Author-email: quollio-dev <qt.dev@quollio.com>
|
6
6
|
Maintainer-email: RyoAriyama <ryo.arym@gmail.com>, tharuta <35373297+TakumiHaruta@users.noreply.github.com>
|
@@ -31,6 +31,11 @@ Requires-Dist: snowflake-connector-python==3.5.0
|
|
31
31
|
Requires-Dist: databricks-sdk==0.17.0
|
32
32
|
Requires-Dist: databricks-sql-connector==2.9.5
|
33
33
|
Requires-Dist: sqlglot==20.8.0
|
34
|
+
Requires-Dist: google-cloud==0.34.0
|
35
|
+
Requires-Dist: google-cloud-bigquery==3.22.0
|
36
|
+
Requires-Dist: google-cloud-datacatalog==3.19.0
|
37
|
+
Requires-Dist: google-cloud-datacatalog-lineage==0.3.6
|
38
|
+
Requires-Dist: google-api-python-client==2.131.0
|
34
39
|
Requires-Dist: black>=22.3.0 ; extra == "test"
|
35
40
|
Requires-Dist: coverage>=7.3.2 ; extra == "test"
|
36
41
|
Requires-Dist: isort>=5.10.1 ; extra == "test"
|
@@ -74,7 +79,7 @@ To see available commands and options, please run the following command. (ex: Sn
|
|
74
79
|
コマンドやオプションの詳細については、下記のコマンドを実行してください。(例: Snowflake)
|
75
80
|
|
76
81
|
```
|
77
|
-
$
|
82
|
+
$ python -m quollio_core.snowflake -h
|
78
83
|
```
|
79
84
|
|
80
85
|
Then run commands with the options provided.
|
@@ -31,7 +31,7 @@ To see available commands and options, please run the following command. (ex: Sn
|
|
31
31
|
コマンドやオプションの詳細については、下記のコマンドを実行してください。(例: Snowflake)
|
32
32
|
|
33
33
|
```
|
34
|
-
$
|
34
|
+
$ python -m quollio_core.snowflake -h
|
35
35
|
```
|
36
36
|
|
37
37
|
Then run commands with the options provided.
|
@@ -43,6 +43,11 @@ dependencies = [
|
|
43
43
|
,"databricks-sdk==0.17.0"
|
44
44
|
,"databricks-sql-connector==2.9.5"
|
45
45
|
,"sqlglot==20.8.0"
|
46
|
+
,"google-cloud==0.34.0"
|
47
|
+
,"google-cloud-bigquery==3.22.0"
|
48
|
+
,"google-cloud-datacatalog==3.19.0"
|
49
|
+
,"google-cloud-datacatalog-lineage==0.3.6"
|
50
|
+
,"google-api-python-client==2.131.0"
|
46
51
|
]
|
47
52
|
dynamic = ["version", "description"]
|
48
53
|
|
@@ -0,0 +1,114 @@
|
|
1
|
+
import argparse
|
2
|
+
import json
|
3
|
+
import logging
|
4
|
+
|
5
|
+
from quollio_core.helper.env_default import env_default
|
6
|
+
from quollio_core.profilers.bigquery import bigquery_table_lineage
|
7
|
+
from quollio_core.repository import qdc
|
8
|
+
from quollio_core.repository.bigquery import get_credentials, get_org_id
|
9
|
+
|
10
|
+
logger = logging.getLogger(__name__)
|
11
|
+
|
12
|
+
|
13
|
+
def load_lineage(
|
14
|
+
qdc_client: qdc.QDCExternalAPIClient, project_id: str, regions: list, tenant_id: str, credentials: dict, org_id: str
|
15
|
+
):
|
16
|
+
bigquery_table_lineage(
|
17
|
+
qdc_client=qdc_client,
|
18
|
+
tenant_id=tenant_id,
|
19
|
+
project_id=project_id,
|
20
|
+
regions=regions,
|
21
|
+
credentials=credentials,
|
22
|
+
org_id=org_id,
|
23
|
+
)
|
24
|
+
|
25
|
+
|
26
|
+
if __name__ == "__main__":
|
27
|
+
parser = argparse.ArgumentParser(
|
28
|
+
prog="Quollio Intelligence Agent for Google BigQuery",
|
29
|
+
description="Collect lineage and stats from Google BigQuery and load to Quollio Data Catalog",
|
30
|
+
epilog="Copyright (c) 2024 Quollio Technologies, Inc.",
|
31
|
+
)
|
32
|
+
parser.add_argument(
|
33
|
+
"commands",
|
34
|
+
choices=["load_lineage"],
|
35
|
+
type=str,
|
36
|
+
nargs="+",
|
37
|
+
help="""
|
38
|
+
The command to execute.
|
39
|
+
'load_lineage': Load lineage data from Google Data Catalog to Quollio,
|
40
|
+
""",
|
41
|
+
)
|
42
|
+
parser.add_argument(
|
43
|
+
"--credentials",
|
44
|
+
type=str,
|
45
|
+
action=env_default("GOOGLE_APPLICATION_CREDENTIALS"),
|
46
|
+
help="Crendentials for Google Cloud Platform",
|
47
|
+
)
|
48
|
+
parser.add_argument(
|
49
|
+
"--tenant_id",
|
50
|
+
type=str,
|
51
|
+
action=env_default("TENANT_ID"),
|
52
|
+
required=False,
|
53
|
+
help="The tenant id (company id) where the lineage and stats are loaded",
|
54
|
+
)
|
55
|
+
parser.add_argument(
|
56
|
+
"--api_url",
|
57
|
+
type=str,
|
58
|
+
action=env_default("QDC_API_URL"),
|
59
|
+
required=False,
|
60
|
+
help="The base URL of Quollio External API",
|
61
|
+
)
|
62
|
+
parser.add_argument(
|
63
|
+
"--client_id",
|
64
|
+
type=str,
|
65
|
+
action=env_default("QDC_CLIENT_ID"),
|
66
|
+
required=False,
|
67
|
+
help="The client id that is created on Quollio console to let clients access Quollio External API",
|
68
|
+
)
|
69
|
+
parser.add_argument(
|
70
|
+
"--client_secret",
|
71
|
+
type=str,
|
72
|
+
action=env_default("QDC_CLIENT_SECRET"),
|
73
|
+
required=False,
|
74
|
+
help="The client secret that is created on Quollio console to let clients access Quollio External API",
|
75
|
+
)
|
76
|
+
parser.add_argument(
|
77
|
+
"--project_id",
|
78
|
+
type=str,
|
79
|
+
action=env_default("GCP_PROJECT_ID"),
|
80
|
+
required=False,
|
81
|
+
help="GCP Project ID",
|
82
|
+
)
|
83
|
+
parser.add_argument(
|
84
|
+
"--regions",
|
85
|
+
type=str,
|
86
|
+
action=env_default("GCP_REGIONS"),
|
87
|
+
required=False,
|
88
|
+
help="GCP regions where the data is located. Multiple regions can be provided separated by space.",
|
89
|
+
nargs="+",
|
90
|
+
)
|
91
|
+
|
92
|
+
args = parser.parse_args()
|
93
|
+
|
94
|
+
if len(args.commands) == 0:
|
95
|
+
raise ValueError("No command is provided")
|
96
|
+
|
97
|
+
if "load_lineage" in args.commands:
|
98
|
+
|
99
|
+
qdc_client = qdc.QDCExternalAPIClient(
|
100
|
+
base_url=args.api_url, client_id=args.client_id, client_secret=args.client_secret
|
101
|
+
)
|
102
|
+
|
103
|
+
credentials_json = json.loads(args.credentials)
|
104
|
+
credentials = get_credentials(credentials_json=credentials_json)
|
105
|
+
org_id = get_org_id(credentials_json=credentials_json)
|
106
|
+
|
107
|
+
load_lineage(
|
108
|
+
qdc_client=qdc_client,
|
109
|
+
project_id=args.project_id,
|
110
|
+
regions=args.regions,
|
111
|
+
tenant_id=args.tenant_id,
|
112
|
+
credentials=credentials,
|
113
|
+
org_id=org_id,
|
114
|
+
)
|
@@ -2,7 +2,7 @@ import argparse
|
|
2
2
|
import logging
|
3
3
|
import os
|
4
4
|
|
5
|
-
from quollio_core.helper.core import setup_dbt_profile
|
5
|
+
from quollio_core.helper.core import setup_dbt_profile, trim_prefix
|
6
6
|
from quollio_core.helper.env_default import env_default
|
7
7
|
from quollio_core.profilers.databricks import (
|
8
8
|
databricks_column_level_lineage,
|
@@ -17,7 +17,7 @@ logger = logging.getLogger(__name__)
|
|
17
17
|
|
18
18
|
def build_view(
|
19
19
|
conn: db.DatabricksConnectionConfig,
|
20
|
-
target_tables: str,
|
20
|
+
target_tables: str = "",
|
21
21
|
log_level: str = "info",
|
22
22
|
) -> None:
|
23
23
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
@@ -41,7 +41,13 @@ def build_view(
|
|
41
41
|
options=["--no-use-colors", "--log-level", log_level],
|
42
42
|
)
|
43
43
|
|
44
|
-
run_options = ["--no-use-colors", "--log-level", log_level
|
44
|
+
run_options = ["--no-use-colors", "--log-level", log_level]
|
45
|
+
|
46
|
+
if target_tables is not None:
|
47
|
+
target_tables_str = " ".join(target_tables)
|
48
|
+
run_options.append("--select")
|
49
|
+
run_options.append(target_tables_str)
|
50
|
+
|
45
51
|
dbt_client.invoke(
|
46
52
|
cmd="run",
|
47
53
|
project_dir=project_path,
|
@@ -53,20 +59,35 @@ def build_view(
|
|
53
59
|
|
54
60
|
def load_lineage(
|
55
61
|
conn: db.DatabricksConnectionConfig,
|
62
|
+
endpoint: str,
|
56
63
|
qdc_client: qdc.QDCExternalAPIClient,
|
57
64
|
tenant_id: str,
|
65
|
+
enable_column_lineage: bool = False,
|
58
66
|
) -> None:
|
59
67
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
60
68
|
|
61
69
|
logger.info("Generate Databricks table to table lineage.")
|
62
70
|
databricks_table_level_lineage(
|
63
|
-
conn=conn,
|
71
|
+
conn=conn,
|
72
|
+
endpoint=endpoint,
|
73
|
+
qdc_client=qdc_client,
|
74
|
+
tenant_id=tenant_id,
|
75
|
+
dbt_table_name="quollio_lineage_table_level",
|
64
76
|
)
|
65
77
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
78
|
+
if enable_column_lineage:
|
79
|
+
logger.info(
|
80
|
+
f"enable_column_lineage is set to {enable_column_lineage}.Generate Databricks column to column lineage."
|
81
|
+
)
|
82
|
+
databricks_column_level_lineage(
|
83
|
+
conn=conn,
|
84
|
+
endpoint=endpoint,
|
85
|
+
qdc_client=qdc_client,
|
86
|
+
tenant_id=tenant_id,
|
87
|
+
dbt_table_name="quollio_lineage_column_level",
|
88
|
+
)
|
89
|
+
else:
|
90
|
+
logger.info("Skip column lineage ingestion. Set enable_column_lineage to True if you ingest column lineage.")
|
70
91
|
|
71
92
|
logger.info("Lineage data is successfully loaded.")
|
72
93
|
return
|
@@ -74,6 +95,7 @@ def load_lineage(
|
|
74
95
|
|
75
96
|
def load_column_stats(
|
76
97
|
conn: db.DatabricksConnectionConfig,
|
98
|
+
endpoint: str,
|
77
99
|
qdc_client: qdc.QDCExternalAPIClient,
|
78
100
|
tenant_id: str,
|
79
101
|
) -> None:
|
@@ -82,6 +104,7 @@ def load_column_stats(
|
|
82
104
|
logger.info("Generate Databricks column stats.")
|
83
105
|
databricks_column_stats(
|
84
106
|
conn=conn,
|
107
|
+
endpoint=endpoint,
|
85
108
|
qdc_client=qdc_client,
|
86
109
|
tenant_id=tenant_id,
|
87
110
|
)
|
@@ -106,7 +129,6 @@ if __name__ == "__main__":
|
|
106
129
|
'build_view': Build views using dbt,
|
107
130
|
'load_lineage': Load lineage data from created views to Quollio,
|
108
131
|
'load_stats': Load stats from created views to Quollio,
|
109
|
-
'load_sqllineage': Load lineage data from sql parse result(alpha),
|
110
132
|
""",
|
111
133
|
)
|
112
134
|
parser.add_argument(
|
@@ -193,8 +215,8 @@ if __name__ == "__main__":
|
|
193
215
|
parser.add_argument(
|
194
216
|
"--target_tables",
|
195
217
|
type=str,
|
196
|
-
nargs="
|
197
|
-
choices=["quollio_lineage_table_level", "
|
218
|
+
nargs="+",
|
219
|
+
choices=["quollio_lineage_table_level", "quollio_lineage_column_level"],
|
198
220
|
action=env_default("DATABRICKS_TARGET_TABLES"),
|
199
221
|
required=False,
|
200
222
|
help="Target tables you want to create with dbt module. \
|
@@ -202,11 +224,29 @@ if __name__ == "__main__":
|
|
202
224
|
Please specify table name with blank delimiter like tableA tableB \
|
203
225
|
if you want to create two or more tables",
|
204
226
|
)
|
227
|
+
parser.add_argument(
|
228
|
+
"--monitoring_table_suffix",
|
229
|
+
type=str,
|
230
|
+
action=env_default("DATABRICKS_MONITORING_TABLE_SUFFIX"),
|
231
|
+
required=False,
|
232
|
+
help="Sets the monitoring tables suffix for databricks. \
|
233
|
+
This is used to identify the monitoring tables created by the databricks monitoring tool. \
|
234
|
+
Default value is _profile_metrics",
|
235
|
+
)
|
236
|
+
parser.add_argument(
|
237
|
+
"--enable_column_lineage",
|
238
|
+
type=bool,
|
239
|
+
action=env_default("ENABLE_COLUMN_LINEAGE", store_true=True),
|
240
|
+
default=False,
|
241
|
+
required=False,
|
242
|
+
help="Whether to ingest column lineage into QDIC or not. Default value is False",
|
243
|
+
)
|
205
244
|
|
206
245
|
args = parser.parse_args()
|
207
246
|
|
208
247
|
conn = db.DatabricksConnectionConfig(
|
209
|
-
host
|
248
|
+
# MEMO: Metadata agent allows the string 'https://' as a host name but is not allowed by intelligence agent.
|
249
|
+
host=trim_prefix(args.host, "https://"),
|
210
250
|
http_path=args.http_path,
|
211
251
|
client_id=args.databricks_client_id,
|
212
252
|
client_secret=args.databricks_client_secret,
|
@@ -228,10 +268,22 @@ if __name__ == "__main__":
|
|
228
268
|
qdc_client = qdc.QDCExternalAPIClient(
|
229
269
|
base_url=args.api_url, client_id=args.client_id, client_secret=args.client_secret
|
230
270
|
)
|
231
|
-
load_lineage(
|
271
|
+
load_lineage(
|
272
|
+
conn=conn,
|
273
|
+
endpoint=args.host,
|
274
|
+
qdc_client=qdc_client,
|
275
|
+
tenant_id=args.tenant_id,
|
276
|
+
enable_column_lineage=args.enable_column_lineage,
|
277
|
+
)
|
232
278
|
|
233
279
|
if "load_stats" in args.commands:
|
234
280
|
qdc_client = qdc.QDCExternalAPIClient(
|
235
281
|
base_url=args.api_url, client_id=args.client_id, client_secret=args.client_secret
|
236
282
|
)
|
237
|
-
databricks_column_stats(
|
283
|
+
databricks_column_stats(
|
284
|
+
conn=conn,
|
285
|
+
endpoint=args.host,
|
286
|
+
qdc_client=qdc_client,
|
287
|
+
tenant_id=args.tenant_id,
|
288
|
+
monitoring_table_suffix=args.monitoring_table_suffix,
|
289
|
+
)
|
@@ -0,0 +1,136 @@
|
|
1
|
+
{%- materialization divided_view, default %}
|
2
|
+
{%- set identifier = model['alias'] %}
|
3
|
+
{%- set target_relations = [] %}
|
4
|
+
{%- set grant_config = config.get('grants') %}
|
5
|
+
|
6
|
+
{{ run_hooks(pre_hooks, inside_transaction=False) }}
|
7
|
+
-- `BEGIN` happens here:
|
8
|
+
{{ run_hooks(pre_hooks, inside_transaction=True) }}
|
9
|
+
|
10
|
+
-- fetch target_tables
|
11
|
+
{%- set query_stats_target_tables -%}
|
12
|
+
SELECT
|
13
|
+
distinct
|
14
|
+
database_name
|
15
|
+
, schema_name
|
16
|
+
, table_name
|
17
|
+
FROM
|
18
|
+
{{ ref('quollio_stats_profiling_columns') }}
|
19
|
+
WHERE
|
20
|
+
table_name not like 'quollio_%%'
|
21
|
+
{%- endset -%}
|
22
|
+
{%- set results = run_query(query_stats_target_tables) -%}
|
23
|
+
{%- if execute -%}
|
24
|
+
{%- set stats_target_tables = results.rows -%}
|
25
|
+
{%- else -%}
|
26
|
+
{%- set stats_target_tables = [] -%}
|
27
|
+
{%- endif -%}
|
28
|
+
|
29
|
+
-- skip creating views if the target profiling columns don't exist.
|
30
|
+
{%- if stats_target_tables | length == 0 -%}
|
31
|
+
{% call statement("main") %}
|
32
|
+
{{ log("No records found. Just execute select stmt for skipping call statement.", info=True) }}
|
33
|
+
select null
|
34
|
+
{% endcall %}
|
35
|
+
{%- set full_refresh_mode = (should_full_refresh()) -%}
|
36
|
+
{%- set should_revoke = should_revoke(target_relation, full_refresh_mode) %}
|
37
|
+
{%- endif -%}
|
38
|
+
|
39
|
+
-- build sql
|
40
|
+
{%- for stats_target_table in stats_target_tables -%}
|
41
|
+
-- get columns for statistics.
|
42
|
+
-- LISTAGG function can't be used for sys table, then it's necessary to get column for each table.
|
43
|
+
-- See https://docs.aws.amazon.com/redshift/latest/dg/c_join_PG.html.
|
44
|
+
{%- set stats_target_columns %}
|
45
|
+
SELECT
|
46
|
+
database_name
|
47
|
+
, schema_name
|
48
|
+
, table_name
|
49
|
+
, column_name
|
50
|
+
, is_bool
|
51
|
+
, is_calculable
|
52
|
+
FROM
|
53
|
+
{{ ref('quollio_stats_profiling_columns') }}
|
54
|
+
WHERE
|
55
|
+
database_name = '{{stats_target_table[0]}}'
|
56
|
+
AND schema_name = '{{stats_target_table[1]}}'
|
57
|
+
AND table_name = '{{stats_target_table[2]}}'
|
58
|
+
{%- endset -%}
|
59
|
+
|
60
|
+
{%- set results = run_query(stats_target_columns) -%}
|
61
|
+
{%- set stats_target_columns = results.rows -%}
|
62
|
+
|
63
|
+
{%- set sql_for_column_stats %}
|
64
|
+
{%- for stats_target_column in stats_target_columns -%}
|
65
|
+
{%- if not loop.first -%}UNION{% endif %}
|
66
|
+
SELECT
|
67
|
+
main.db_name
|
68
|
+
, main.schema_name
|
69
|
+
, main.table_name
|
70
|
+
, main.column_name
|
71
|
+
, main.max_value
|
72
|
+
, main.min_value
|
73
|
+
, main.null_count
|
74
|
+
, main.cardinality
|
75
|
+
, main.avg_value
|
76
|
+
, main.median_value
|
77
|
+
, mode.mode_value
|
78
|
+
, main.stddev_value
|
79
|
+
FROM
|
80
|
+
(
|
81
|
+
SELECT
|
82
|
+
DISTINCT
|
83
|
+
'{{stats_target_column[0]}}'::varchar as db_name
|
84
|
+
, '{{stats_target_column[1]}}'::varchar as schema_name
|
85
|
+
, '{{stats_target_column[2]}}'::varchar as table_name
|
86
|
+
, '{{stats_target_column[3]}}'::varchar as column_name
|
87
|
+
, {% if var("aggregate_all") == True and stats_target_column[5] == True %}cast(max("{{stats_target_column[3]}}") as varchar){% else %}null::varchar{% endif %} AS max_value
|
88
|
+
, {% if var("aggregate_all") == True and stats_target_column[5] == True %}cast(min("{{stats_target_column[3]}}") as varchar){% else %}null::varchar{% endif %} AS min_value
|
89
|
+
-- requires full table scan
|
90
|
+
, {% if var("aggregate_all") == True %}cast(SUM(NVL2("{{stats_target_column[3]}}", 0, 1)) as integer){% else %}null::integer{% endif %} AS null_count
|
91
|
+
, APPROXIMATE COUNT(DISTINCT "{{stats_target_column[3]}}") AS cardinality
|
92
|
+
-- requires full table scan
|
93
|
+
, {% if var("aggregate_all") == True and stats_target_column[5] == True %}cast(avg("{{stats_target_column[3]}}")as varchar){% else %}null::varchar{% endif %} AS avg_value
|
94
|
+
, {% if var("aggregate_all") == True and stats_target_column[5] == True %}cast(median("{{stats_target_column[3]}}") as varchar){% else %}null::varchar{% endif %} AS median_value
|
95
|
+
-- requires full table scan
|
96
|
+
, {% if stats_target_column[5] == True %}cast(STDDEV_SAMP("{{stats_target_column[3]}}") as integer){% else %}null::integer{% endif %} AS stddev_value
|
97
|
+
FROM {{ stats_target_column[0] }}.{{ stats_target_column[1] }}.{{ stats_target_column[2] }}
|
98
|
+
) main, (
|
99
|
+
{%- if var("aggregate_all") == True and stats_target_column[4] == false %}
|
100
|
+
SELECT
|
101
|
+
cast("{{stats_target_column[3]}}" as varchar) mode_value
|
102
|
+
FROM (
|
103
|
+
SELECT
|
104
|
+
DISTINCT
|
105
|
+
"{{stats_target_column[3]}}"
|
106
|
+
, ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC) AS row_num
|
107
|
+
FROM {{ stats_target_column[0] }}.{{ stats_target_column[1] }}.{{ stats_target_column[2] }}
|
108
|
+
GROUP BY
|
109
|
+
"{{stats_target_column[3]}}"
|
110
|
+
)
|
111
|
+
WHERE
|
112
|
+
row_num = 1
|
113
|
+
{% else %}
|
114
|
+
SELECT null as mode_value {%- endif -%}
|
115
|
+
) mode
|
116
|
+
{% endfor -%}
|
117
|
+
{%- endset %}
|
118
|
+
-- create a view with a index as suffix
|
119
|
+
{%- set target_identifier = "%s_%s_%s_%s"|format(model['name'], stats_target_table[0], stats_target_table[1], stats_target_table[2]) %}
|
120
|
+
{%- set target_relation = api.Relation.create(identifier=target_identifier, schema=schema, database=database, type='view') %}
|
121
|
+
-- {{ drop_relation_if_exists(target_relation) }}
|
122
|
+
{% call statement("main") %}
|
123
|
+
{{ get_replace_view_sql(target_relation, sql_for_column_stats) }}
|
124
|
+
{% endcall %}
|
125
|
+
{%- set full_refresh_mode = (should_full_refresh()) -%}
|
126
|
+
{%- set should_revoke = should_revoke(target_relation, full_refresh_mode) %}
|
127
|
+
{%- do apply_grants(target_relation, grant_config, should_revoke) %}
|
128
|
+
{%- set target_relations = target_relations.append(target_relation) %}
|
129
|
+
{%- endfor -%}
|
130
|
+
|
131
|
+
{{ run_hooks(post_hooks, inside_transaction=True) }}
|
132
|
+
{{ adapter.commit() }}
|
133
|
+
{{ run_hooks(post_hooks, inside_transaction=False) }}
|
134
|
+
|
135
|
+
{{ return({'relations': target_relations}) }}
|
136
|
+
{%- endmaterialization -%}
|
quollio_core-0.4.8/quollio_core/dbt_projects/snowflake/macros/materialization/divided_view.sql
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
{%- materialization divided_view, default %}
|
2
|
+
{%- set identifier = model['alias'] %}
|
3
|
+
{%- set target_relations = [] %}
|
4
|
+
{%- set grant_config = config.get('grants') %}
|
5
|
+
|
6
|
+
{{ run_hooks(pre_hooks, inside_transaction=False) }}
|
7
|
+
-- `BEGIN` happens here:
|
8
|
+
{{ run_hooks(pre_hooks, inside_transaction=True) }}
|
9
|
+
|
10
|
+
-- fetch target_tables
|
11
|
+
{%- set query_stats_target_tables -%}
|
12
|
+
SELECT
|
13
|
+
TABLE_CATALOG
|
14
|
+
, TABLE_SCHEMA
|
15
|
+
, TABLE_NAME
|
16
|
+
, OBJECT_AGG(COLUMN_NAME, IS_CALCULABLE) AS COLUMNS_OBJ
|
17
|
+
FROM
|
18
|
+
{{ ref('quollio_stats_profiling_columns') }}
|
19
|
+
WHERE NOT startswith(table_name, 'QUOLLIO_')
|
20
|
+
GROUP BY
|
21
|
+
TABLE_CATALOG
|
22
|
+
, TABLE_SCHEMA
|
23
|
+
, TABLE_NAME
|
24
|
+
{%- endset -%}
|
25
|
+
{%- set results = run_query(query_stats_target_tables) -%}
|
26
|
+
{%- if execute -%}
|
27
|
+
{%- set stats_target_tables = results.rows -%}
|
28
|
+
{%- else -%}
|
29
|
+
{%- set stats_target_tables = [] -%}
|
30
|
+
{%- endif -%}
|
31
|
+
|
32
|
+
-- skip creating views if the target profiling columns don't exist.
|
33
|
+
{%- if stats_target_tables | length == 0 -%}
|
34
|
+
{% call statement("main") %}
|
35
|
+
{{ log("No records found. Just execute select stmt for skipping call statement.", info=True) }}
|
36
|
+
select null
|
37
|
+
{% endcall %}
|
38
|
+
{%- set full_refresh_mode = (should_full_refresh()) -%}
|
39
|
+
{%- set should_revoke = should_revoke(target_relation, full_refresh_mode) %}
|
40
|
+
{%- endif -%}
|
41
|
+
|
42
|
+
-- create view for each table
|
43
|
+
{%- for stats_target_table in stats_target_tables -%}
|
44
|
+
-- build sql for column value aggregation.
|
45
|
+
{%- set sql_for_column_stats %}
|
46
|
+
{% set columns_json = fromjson(stats_target_table[3]) %}
|
47
|
+
{%- for col_name, is_calclable in columns_json.items() -%}
|
48
|
+
{%- if not loop.first %}UNION{% endif %}
|
49
|
+
SELECT
|
50
|
+
DISTINCT
|
51
|
+
'{{stats_target_table[0]}}' as db_name
|
52
|
+
, '{{stats_target_table[1]}}' as schema_name
|
53
|
+
, '{{stats_target_table[2]}}' as table_name
|
54
|
+
, '{{col_name}}' as column_name
|
55
|
+
, {% if is_calclable == True %}CAST(MAX("{{col_name}}") AS STRING){% else %}NULL{% endif %} AS max_value
|
56
|
+
, {% if is_calclable == True %}CAST(MIN("{{col_name}}") AS STRING){% else %}NULL{% endif %} AS min_value
|
57
|
+
, COUNT_IF("{{col_name}}" IS NULL) AS null_count
|
58
|
+
, APPROX_COUNT_DISTINCT("{{col_name}}") AS cardinality
|
59
|
+
, {% if is_calclable == True %}AVG("{{col_name}}"){% else %}NULL{% endif %} AS avg_value
|
60
|
+
, {% if is_calclable == True %}MEDIAN("{{col_name}}"){% else %}NULL{% endif %} AS median_value
|
61
|
+
, {% if is_calclable == True %}APPROX_TOP_K("{{col_name}}")[0][0]{% else %}NULL{% endif %} AS mode_value
|
62
|
+
, {% if is_calclable == True %}STDDEV("{{col_name}}"){% else %}NULL{% endif %} AS stddev_value
|
63
|
+
FROM "{{stats_target_table[0]}}"."{{stats_target_table[1]}}"."{{stats_target_table[2]}}" {{ var("sample_method") }}
|
64
|
+
{% endfor -%}
|
65
|
+
{%- endset %}
|
66
|
+
|
67
|
+
-- create a view with a index as suffix
|
68
|
+
{%- set stats_view_identifier = "%s_%s_%s_%s"|format(model['name'], stats_target_table[0], stats_target_table[1], stats_target_table[2]) %}
|
69
|
+
{%- set target_relation = api.Relation.create(identifier=stats_view_identifier, schema=schema, database=database, type='view') %}
|
70
|
+
{% call statement("main") %}
|
71
|
+
{{ get_create_view_as_sql(target_relation, sql_for_column_stats) }}
|
72
|
+
{% endcall %}
|
73
|
+
{%- set full_refresh_mode = (should_full_refresh()) -%}
|
74
|
+
{%- set should_revoke = should_revoke(target_relation, full_refresh_mode) %}
|
75
|
+
{%- do apply_grants(target_relation, grant_config, should_revoke) %}
|
76
|
+
{%- set target_relations = target_relations.append(target_relation) %}
|
77
|
+
{%- endfor -%}
|
78
|
+
|
79
|
+
{{ run_hooks(post_hooks, inside_transaction=True) }}
|
80
|
+
-- `COMMIT` happens here:
|
81
|
+
{{ adapter.commit() }}
|
82
|
+
{{ run_hooks(post_hooks, inside_transaction=False) }}
|
83
|
+
|
84
|
+
{{ return({'relations': target_relations}) }}
|
85
|
+
{%- endmaterialization -%}
|
quollio_core-0.4.8/quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.sql
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
WITH columns AS (
|
2
|
+
SELECT
|
3
|
+
table_catalog
|
4
|
+
, table_schema
|
5
|
+
, table_name
|
6
|
+
, column_name
|
7
|
+
, data_type
|
8
|
+
FROM
|
9
|
+
{{ source('account_usage', 'COLUMNS') }}
|
10
|
+
WHERE
|
11
|
+
deleted is null
|
12
|
+
AND table_name NOT LIKE 'QUOLLIO_%%'
|
13
|
+
GROUP BY
|
14
|
+
table_catalog
|
15
|
+
, table_schema
|
16
|
+
, table_name
|
17
|
+
, column_name
|
18
|
+
, data_type
|
19
|
+
ORDER BY
|
20
|
+
table_catalog
|
21
|
+
, table_schema
|
22
|
+
, table_name
|
23
|
+
), accessible_tables AS (
|
24
|
+
SELECT
|
25
|
+
table_catalog
|
26
|
+
, table_schema
|
27
|
+
, name
|
28
|
+
FROM
|
29
|
+
{{ source('account_usage', 'GRANTS_TO_ROLES') }}
|
30
|
+
WHERE
|
31
|
+
granted_on in ('TABLE', 'MATERIALIZED VIEW')
|
32
|
+
AND grantee_name = '{{ var("query_role") }}'
|
33
|
+
AND privilege in ('SELECT', 'OWNERSHIP', 'REFERENCES')
|
34
|
+
AND deleted_on IS NULL
|
35
|
+
GROUP BY
|
36
|
+
table_catalog
|
37
|
+
, table_schema
|
38
|
+
, name
|
39
|
+
), m_view_sys_columns AS (
|
40
|
+
SELECT
|
41
|
+
cols.table_catalog
|
42
|
+
, cols.table_schema
|
43
|
+
, cols.table_name
|
44
|
+
, cols.column_name
|
45
|
+
, cols.data_type
|
46
|
+
FROM
|
47
|
+
{{ source('account_usage', 'COLUMNS') }} cols
|
48
|
+
LEFT OUTER JOIN
|
49
|
+
{{ source('account_usage', 'TABLES') }} tbls
|
50
|
+
ON
|
51
|
+
cols.table_catalog = tbls.table_catalog
|
52
|
+
AND cols.table_schema = tbls.table_schema
|
53
|
+
AND cols.table_name = tbls.table_name
|
54
|
+
WHERE
|
55
|
+
tbls.table_type = 'MATERIALIZED VIEW'
|
56
|
+
AND cols.column_name = 'SYS_MV_SOURCE_PARTITION'
|
57
|
+
), implicit_columns_removed AS (
|
58
|
+
SELECT
|
59
|
+
c.table_catalog
|
60
|
+
, c.table_schema
|
61
|
+
, c.table_name
|
62
|
+
, c.column_name
|
63
|
+
, c.data_type
|
64
|
+
FROM
|
65
|
+
columns c
|
66
|
+
INNER JOIN
|
67
|
+
accessible_tables a
|
68
|
+
ON
|
69
|
+
c.table_catalog = a.table_catalog
|
70
|
+
AND c.table_schema = a.table_schema
|
71
|
+
AND c.table_name = a.name
|
72
|
+
MINUS
|
73
|
+
SELECT
|
74
|
+
table_catalog
|
75
|
+
, table_schema
|
76
|
+
, table_name
|
77
|
+
, column_name
|
78
|
+
, data_type
|
79
|
+
FROM
|
80
|
+
m_view_sys_columns
|
81
|
+
), final AS (
|
82
|
+
SELECT
|
83
|
+
table_catalog
|
84
|
+
, table_schema
|
85
|
+
, table_name
|
86
|
+
, column_name
|
87
|
+
, data_type
|
88
|
+
, case when data_type in('NUMBER','DECIMAL', 'DEC', 'NUMERIC',
|
89
|
+
'INT', 'INTEGER', 'BIGINT', 'SMALLINT',
|
90
|
+
'TINYINT', 'BYTEINT')
|
91
|
+
THEN true
|
92
|
+
else false END AS is_calculable
|
93
|
+
FROM
|
94
|
+
implicit_columns_removed
|
95
|
+
)
|
96
|
+
select * from final
|