quollio-core 0.4.7__tar.gz → 0.4.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {quollio_core-0.4.7 → quollio_core-0.4.9}/PKG-INFO +2 -2
- {quollio_core-0.4.7 → quollio_core-0.4.9}/README.md +1 -1
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/__init__.py +1 -1
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/bricks.py +41 -9
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.yml +1 -1
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/redshift/dbt_project.yml +1 -1
- quollio_core-0.4.9/quollio_core/dbt_projects/redshift/macros/materialization/divided_view.sql +136 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/redshift/models/quollio_stats_columns.sql +1 -2
- quollio_core-0.4.9/quollio_core/dbt_projects/snowflake/macros/materialization/divided_view.sql +85 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.sql +1 -2
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/helper/core.py +4 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/helper/env_default.py +24 -1
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/profilers/databricks.py +11 -4
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/redshift.py +8 -8
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/snowflake.py +21 -6
- quollio_core-0.4.7/quollio_core/dbt_projects/redshift/macros/materialization/divided_view.sql +0 -97
- quollio_core-0.4.7/quollio_core/dbt_projects/snowflake/macros/materialization/divided_view.sql +0 -63
- {quollio_core-0.4.7 → quollio_core-0.4.9}/LICENSE +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/pyproject.toml +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/bigquery.py +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/databricks/.gitignore +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/databricks/README.md +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/databricks/analyses/.gitkeep +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/databricks/dbt_project.yml +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/databricks/macros/.gitkeep +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/databricks/models/quollio_lineage_column_level.sql +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/databricks/models/quollio_lineage_column_level.yml +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.sql +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/databricks/models/sources.yml +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/databricks/package-lock.yml +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/databricks/packages.yml +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/databricks/profiles/profiles_template.yml +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/databricks/seeds/.gitkeep +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/databricks/snapshots/.gitkeep +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/redshift/README.md +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/redshift/analyses/.gitkeep +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/redshift/macros/.gitkeep +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/redshift/models/quollio_lineage_table_level.sql +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/redshift/models/quollio_lineage_table_level.yml +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/redshift/models/quollio_lineage_view_level.sql +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/redshift/models/quollio_lineage_view_level.yml +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/redshift/models/quollio_sqllineage_sources.sql +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/redshift/models/quollio_sqllineage_sources.yml +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/redshift/models/quollio_stats_columns.yml +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/redshift/models/quollio_stats_profiling_columns.sql +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/redshift/models/quollio_stats_profiling_columns.yml +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/redshift/models/sources.yml +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/redshift/package-lock.yml +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/redshift/packages.yml +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/redshift/profiles/profiles_template.yml +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/redshift/seeds/.gitkeep +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/redshift/snapshots/.gitkeep +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/seeds/.gitkeep +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/README.md +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/analyses/.gitkeep +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/dbt_project.yml +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/macros/.gitkeep +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_column_level.sql +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_column_level.yml +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_table_level.sql +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_table_level.yml +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/models/quollio_sqllineage_sources.sql +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/models/quollio_sqllineage_sources.yml +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.yml +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.sql +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.yml +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/models/sources.yml +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/package-lock.yml +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/packages.yml +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/profiles/profiles_template.yml +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/seeds/.gitkeep +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/snapshots/.gitkeep +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/helper/__init__.py +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/profilers/__init__.py +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/profilers/bigquery.py +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/profilers/lineage.py +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/profilers/redshift.py +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/profilers/snowflake.py +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/profilers/sqllineage.py +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/profilers/stats.py +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/repository/__init__.py +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/repository/bigquery.py +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/repository/databricks.py +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/repository/dbt.py +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/repository/qdc.py +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/repository/redshift.py +0 -0
- {quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/repository/snowflake.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: quollio-core
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.9
|
4
4
|
Summary: Quollio Core
|
5
5
|
Author-email: quollio-dev <qt.dev@quollio.com>
|
6
6
|
Maintainer-email: RyoAriyama <ryo.arym@gmail.com>, tharuta <35373297+TakumiHaruta@users.noreply.github.com>
|
@@ -79,7 +79,7 @@ To see available commands and options, please run the following command. (ex: Sn
|
|
79
79
|
コマンドやオプションの詳細については、下記のコマンドを実行してください。(例: Snowflake)
|
80
80
|
|
81
81
|
```
|
82
|
-
$
|
82
|
+
$ python -m quollio_core.snowflake -h
|
83
83
|
```
|
84
84
|
|
85
85
|
Then run commands with the options provided.
|
@@ -31,7 +31,7 @@ To see available commands and options, please run the following command. (ex: Sn
|
|
31
31
|
コマンドやオプションの詳細については、下記のコマンドを実行してください。(例: Snowflake)
|
32
32
|
|
33
33
|
```
|
34
|
-
$
|
34
|
+
$ python -m quollio_core.snowflake -h
|
35
35
|
```
|
36
36
|
|
37
37
|
Then run commands with the options provided.
|
@@ -2,7 +2,7 @@ import argparse
|
|
2
2
|
import logging
|
3
3
|
import os
|
4
4
|
|
5
|
-
from quollio_core.helper.core import setup_dbt_profile
|
5
|
+
from quollio_core.helper.core import setup_dbt_profile, trim_prefix
|
6
6
|
from quollio_core.helper.env_default import env_default
|
7
7
|
from quollio_core.profilers.databricks import (
|
8
8
|
databricks_column_level_lineage,
|
@@ -59,20 +59,35 @@ def build_view(
|
|
59
59
|
|
60
60
|
def load_lineage(
|
61
61
|
conn: db.DatabricksConnectionConfig,
|
62
|
+
endpoint: str,
|
62
63
|
qdc_client: qdc.QDCExternalAPIClient,
|
63
64
|
tenant_id: str,
|
65
|
+
enable_column_lineage: bool = False,
|
64
66
|
) -> None:
|
65
67
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
66
68
|
|
67
69
|
logger.info("Generate Databricks table to table lineage.")
|
68
70
|
databricks_table_level_lineage(
|
69
|
-
conn=conn,
|
71
|
+
conn=conn,
|
72
|
+
endpoint=endpoint,
|
73
|
+
qdc_client=qdc_client,
|
74
|
+
tenant_id=tenant_id,
|
75
|
+
dbt_table_name="quollio_lineage_table_level",
|
70
76
|
)
|
71
77
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
78
|
+
if enable_column_lineage:
|
79
|
+
logger.info(
|
80
|
+
f"enable_column_lineage is set to {enable_column_lineage}.Generate Databricks column to column lineage."
|
81
|
+
)
|
82
|
+
databricks_column_level_lineage(
|
83
|
+
conn=conn,
|
84
|
+
endpoint=endpoint,
|
85
|
+
qdc_client=qdc_client,
|
86
|
+
tenant_id=tenant_id,
|
87
|
+
dbt_table_name="quollio_lineage_column_level",
|
88
|
+
)
|
89
|
+
else:
|
90
|
+
logger.info("Skip column lineage ingestion. Set enable_column_lineage to True if you ingest column lineage.")
|
76
91
|
|
77
92
|
logger.info("Lineage data is successfully loaded.")
|
78
93
|
return
|
@@ -80,6 +95,7 @@ def load_lineage(
|
|
80
95
|
|
81
96
|
def load_column_stats(
|
82
97
|
conn: db.DatabricksConnectionConfig,
|
98
|
+
endpoint: str,
|
83
99
|
qdc_client: qdc.QDCExternalAPIClient,
|
84
100
|
tenant_id: str,
|
85
101
|
) -> None:
|
@@ -88,6 +104,7 @@ def load_column_stats(
|
|
88
104
|
logger.info("Generate Databricks column stats.")
|
89
105
|
databricks_column_stats(
|
90
106
|
conn=conn,
|
107
|
+
endpoint=endpoint,
|
91
108
|
qdc_client=qdc_client,
|
92
109
|
tenant_id=tenant_id,
|
93
110
|
)
|
@@ -207,7 +224,6 @@ if __name__ == "__main__":
|
|
207
224
|
Please specify table name with blank delimiter like tableA tableB \
|
208
225
|
if you want to create two or more tables",
|
209
226
|
)
|
210
|
-
|
211
227
|
parser.add_argument(
|
212
228
|
"--monitoring_table_suffix",
|
213
229
|
type=str,
|
@@ -217,11 +233,20 @@ if __name__ == "__main__":
|
|
217
233
|
This is used to identify the monitoring tables created by the databricks monitoring tool. \
|
218
234
|
Default value is _profile_metrics",
|
219
235
|
)
|
236
|
+
parser.add_argument(
|
237
|
+
"--enable_column_lineage",
|
238
|
+
type=bool,
|
239
|
+
action=env_default("ENABLE_COLUMN_LINEAGE", store_true=True),
|
240
|
+
default=False,
|
241
|
+
required=False,
|
242
|
+
help="Whether to ingest column lineage into QDIC or not. Default value is False",
|
243
|
+
)
|
220
244
|
|
221
245
|
args = parser.parse_args()
|
222
246
|
|
223
247
|
conn = db.DatabricksConnectionConfig(
|
224
|
-
host
|
248
|
+
# MEMO: Metadata agent allows the string 'https://' as a host name but is not allowed by intelligence agent.
|
249
|
+
host=trim_prefix(args.host, "https://"),
|
225
250
|
http_path=args.http_path,
|
226
251
|
client_id=args.databricks_client_id,
|
227
252
|
client_secret=args.databricks_client_secret,
|
@@ -243,7 +268,13 @@ if __name__ == "__main__":
|
|
243
268
|
qdc_client = qdc.QDCExternalAPIClient(
|
244
269
|
base_url=args.api_url, client_id=args.client_id, client_secret=args.client_secret
|
245
270
|
)
|
246
|
-
load_lineage(
|
271
|
+
load_lineage(
|
272
|
+
conn=conn,
|
273
|
+
endpoint=args.host,
|
274
|
+
qdc_client=qdc_client,
|
275
|
+
tenant_id=args.tenant_id,
|
276
|
+
enable_column_lineage=args.enable_column_lineage,
|
277
|
+
)
|
247
278
|
|
248
279
|
if "load_stats" in args.commands:
|
249
280
|
qdc_client = qdc.QDCExternalAPIClient(
|
@@ -251,6 +282,7 @@ if __name__ == "__main__":
|
|
251
282
|
)
|
252
283
|
databricks_column_stats(
|
253
284
|
conn=conn,
|
285
|
+
endpoint=args.host,
|
254
286
|
qdc_client=qdc_client,
|
255
287
|
tenant_id=args.tenant_id,
|
256
288
|
monitoring_table_suffix=args.monitoring_table_suffix,
|
@@ -0,0 +1,136 @@
|
|
1
|
+
{%- materialization divided_view, default %}
|
2
|
+
{%- set identifier = model['alias'] %}
|
3
|
+
{%- set target_relations = [] %}
|
4
|
+
{%- set grant_config = config.get('grants') %}
|
5
|
+
|
6
|
+
{{ run_hooks(pre_hooks, inside_transaction=False) }}
|
7
|
+
-- `BEGIN` happens here:
|
8
|
+
{{ run_hooks(pre_hooks, inside_transaction=True) }}
|
9
|
+
|
10
|
+
-- fetch target_tables
|
11
|
+
{%- set query_stats_target_tables -%}
|
12
|
+
SELECT
|
13
|
+
distinct
|
14
|
+
database_name
|
15
|
+
, schema_name
|
16
|
+
, table_name
|
17
|
+
FROM
|
18
|
+
{{ ref('quollio_stats_profiling_columns') }}
|
19
|
+
WHERE
|
20
|
+
table_name not like 'quollio_%%'
|
21
|
+
{%- endset -%}
|
22
|
+
{%- set results = run_query(query_stats_target_tables) -%}
|
23
|
+
{%- if execute -%}
|
24
|
+
{%- set stats_target_tables = results.rows -%}
|
25
|
+
{%- else -%}
|
26
|
+
{%- set stats_target_tables = [] -%}
|
27
|
+
{%- endif -%}
|
28
|
+
|
29
|
+
-- skip creating views if the target profiling columns don't exist.
|
30
|
+
{%- if stats_target_tables | length == 0 -%}
|
31
|
+
{% call statement("main") %}
|
32
|
+
{{ log("No records found. Just execute select stmt for skipping call statement.", info=True) }}
|
33
|
+
select null
|
34
|
+
{% endcall %}
|
35
|
+
{%- set full_refresh_mode = (should_full_refresh()) -%}
|
36
|
+
{%- set should_revoke = should_revoke(target_relation, full_refresh_mode) %}
|
37
|
+
{%- endif -%}
|
38
|
+
|
39
|
+
-- build sql
|
40
|
+
{%- for stats_target_table in stats_target_tables -%}
|
41
|
+
-- get columns for statistics.
|
42
|
+
-- LISTAGG function can't be used for sys table, then it's necessary to get column for each table.
|
43
|
+
-- See https://docs.aws.amazon.com/redshift/latest/dg/c_join_PG.html.
|
44
|
+
{%- set stats_target_columns %}
|
45
|
+
SELECT
|
46
|
+
database_name
|
47
|
+
, schema_name
|
48
|
+
, table_name
|
49
|
+
, column_name
|
50
|
+
, is_bool
|
51
|
+
, is_calculable
|
52
|
+
FROM
|
53
|
+
{{ ref('quollio_stats_profiling_columns') }}
|
54
|
+
WHERE
|
55
|
+
database_name = '{{stats_target_table[0]}}'
|
56
|
+
AND schema_name = '{{stats_target_table[1]}}'
|
57
|
+
AND table_name = '{{stats_target_table[2]}}'
|
58
|
+
{%- endset -%}
|
59
|
+
|
60
|
+
{%- set results = run_query(stats_target_columns) -%}
|
61
|
+
{%- set stats_target_columns = results.rows -%}
|
62
|
+
|
63
|
+
{%- set sql_for_column_stats %}
|
64
|
+
{%- for stats_target_column in stats_target_columns -%}
|
65
|
+
{%- if not loop.first -%}UNION{% endif %}
|
66
|
+
SELECT
|
67
|
+
main.db_name
|
68
|
+
, main.schema_name
|
69
|
+
, main.table_name
|
70
|
+
, main.column_name
|
71
|
+
, main.max_value
|
72
|
+
, main.min_value
|
73
|
+
, main.null_count
|
74
|
+
, main.cardinality
|
75
|
+
, main.avg_value
|
76
|
+
, main.median_value
|
77
|
+
, mode.mode_value
|
78
|
+
, main.stddev_value
|
79
|
+
FROM
|
80
|
+
(
|
81
|
+
SELECT
|
82
|
+
DISTINCT
|
83
|
+
'{{stats_target_column[0]}}'::varchar as db_name
|
84
|
+
, '{{stats_target_column[1]}}'::varchar as schema_name
|
85
|
+
, '{{stats_target_column[2]}}'::varchar as table_name
|
86
|
+
, '{{stats_target_column[3]}}'::varchar as column_name
|
87
|
+
, {% if var("aggregate_all") == True and stats_target_column[5] == True %}cast(max("{{stats_target_column[3]}}") as varchar){% else %}null::varchar{% endif %} AS max_value
|
88
|
+
, {% if var("aggregate_all") == True and stats_target_column[5] == True %}cast(min("{{stats_target_column[3]}}") as varchar){% else %}null::varchar{% endif %} AS min_value
|
89
|
+
-- requires full table scan
|
90
|
+
, {% if var("aggregate_all") == True %}cast(SUM(NVL2("{{stats_target_column[3]}}", 0, 1)) as integer){% else %}null::integer{% endif %} AS null_count
|
91
|
+
, APPROXIMATE COUNT(DISTINCT "{{stats_target_column[3]}}") AS cardinality
|
92
|
+
-- requires full table scan
|
93
|
+
, {% if var("aggregate_all") == True and stats_target_column[5] == True %}cast(avg("{{stats_target_column[3]}}")as varchar){% else %}null::varchar{% endif %} AS avg_value
|
94
|
+
, {% if var("aggregate_all") == True and stats_target_column[5] == True %}cast(median("{{stats_target_column[3]}}") as varchar){% else %}null::varchar{% endif %} AS median_value
|
95
|
+
-- requires full table scan
|
96
|
+
, {% if stats_target_column[5] == True %}cast(STDDEV_SAMP("{{stats_target_column[3]}}") as integer){% else %}null::integer{% endif %} AS stddev_value
|
97
|
+
FROM {{ stats_target_column[0] }}.{{ stats_target_column[1] }}.{{ stats_target_column[2] }}
|
98
|
+
) main, (
|
99
|
+
{%- if var("aggregate_all") == True and stats_target_column[4] == false %}
|
100
|
+
SELECT
|
101
|
+
cast("{{stats_target_column[3]}}" as varchar) mode_value
|
102
|
+
FROM (
|
103
|
+
SELECT
|
104
|
+
DISTINCT
|
105
|
+
"{{stats_target_column[3]}}"
|
106
|
+
, ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC) AS row_num
|
107
|
+
FROM {{ stats_target_column[0] }}.{{ stats_target_column[1] }}.{{ stats_target_column[2] }}
|
108
|
+
GROUP BY
|
109
|
+
"{{stats_target_column[3]}}"
|
110
|
+
)
|
111
|
+
WHERE
|
112
|
+
row_num = 1
|
113
|
+
{% else %}
|
114
|
+
SELECT null as mode_value {%- endif -%}
|
115
|
+
) mode
|
116
|
+
{% endfor -%}
|
117
|
+
{%- endset %}
|
118
|
+
-- create a view with a index as suffix
|
119
|
+
{%- set target_identifier = "%s_%s_%s_%s"|format(model['name'], stats_target_table[0], stats_target_table[1], stats_target_table[2]) %}
|
120
|
+
{%- set target_relation = api.Relation.create(identifier=target_identifier, schema=schema, database=database, type='view') %}
|
121
|
+
-- {{ drop_relation_if_exists(target_relation) }}
|
122
|
+
{% call statement("main") %}
|
123
|
+
{{ get_replace_view_sql(target_relation, sql_for_column_stats) }}
|
124
|
+
{% endcall %}
|
125
|
+
{%- set full_refresh_mode = (should_full_refresh()) -%}
|
126
|
+
{%- set should_revoke = should_revoke(target_relation, full_refresh_mode) %}
|
127
|
+
{%- do apply_grants(target_relation, grant_config, should_revoke) %}
|
128
|
+
{%- set target_relations = target_relations.append(target_relation) %}
|
129
|
+
{%- endfor -%}
|
130
|
+
|
131
|
+
{{ run_hooks(post_hooks, inside_transaction=True) }}
|
132
|
+
{{ adapter.commit() }}
|
133
|
+
{{ run_hooks(post_hooks, inside_transaction=False) }}
|
134
|
+
|
135
|
+
{{ return({'relations': target_relations}) }}
|
136
|
+
{%- endmaterialization -%}
|
quollio_core-0.4.9/quollio_core/dbt_projects/snowflake/macros/materialization/divided_view.sql
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
{%- materialization divided_view, default %}
|
2
|
+
{%- set identifier = model['alias'] %}
|
3
|
+
{%- set target_relations = [] %}
|
4
|
+
{%- set grant_config = config.get('grants') %}
|
5
|
+
|
6
|
+
{{ run_hooks(pre_hooks, inside_transaction=False) }}
|
7
|
+
-- `BEGIN` happens here:
|
8
|
+
{{ run_hooks(pre_hooks, inside_transaction=True) }}
|
9
|
+
|
10
|
+
-- fetch target_tables
|
11
|
+
{%- set query_stats_target_tables -%}
|
12
|
+
SELECT
|
13
|
+
TABLE_CATALOG
|
14
|
+
, TABLE_SCHEMA
|
15
|
+
, TABLE_NAME
|
16
|
+
, OBJECT_AGG(COLUMN_NAME, IS_CALCULABLE) AS COLUMNS_OBJ
|
17
|
+
FROM
|
18
|
+
{{ ref('quollio_stats_profiling_columns') }}
|
19
|
+
WHERE NOT startswith(table_name, 'QUOLLIO_')
|
20
|
+
GROUP BY
|
21
|
+
TABLE_CATALOG
|
22
|
+
, TABLE_SCHEMA
|
23
|
+
, TABLE_NAME
|
24
|
+
{%- endset -%}
|
25
|
+
{%- set results = run_query(query_stats_target_tables) -%}
|
26
|
+
{%- if execute -%}
|
27
|
+
{%- set stats_target_tables = results.rows -%}
|
28
|
+
{%- else -%}
|
29
|
+
{%- set stats_target_tables = [] -%}
|
30
|
+
{%- endif -%}
|
31
|
+
|
32
|
+
-- skip creating views if the target profiling columns don't exist.
|
33
|
+
{%- if stats_target_tables | length == 0 -%}
|
34
|
+
{% call statement("main") %}
|
35
|
+
{{ log("No records found. Just execute select stmt for skipping call statement.", info=True) }}
|
36
|
+
select null
|
37
|
+
{% endcall %}
|
38
|
+
{%- set full_refresh_mode = (should_full_refresh()) -%}
|
39
|
+
{%- set should_revoke = should_revoke(target_relation, full_refresh_mode) %}
|
40
|
+
{%- endif -%}
|
41
|
+
|
42
|
+
-- create view for each table
|
43
|
+
{%- for stats_target_table in stats_target_tables -%}
|
44
|
+
-- build sql for column value aggregation.
|
45
|
+
{%- set sql_for_column_stats %}
|
46
|
+
{% set columns_json = fromjson(stats_target_table[3]) %}
|
47
|
+
{%- for col_name, is_calclable in columns_json.items() -%}
|
48
|
+
{%- if not loop.first %}UNION{% endif %}
|
49
|
+
SELECT
|
50
|
+
DISTINCT
|
51
|
+
'{{stats_target_table[0]}}' as db_name
|
52
|
+
, '{{stats_target_table[1]}}' as schema_name
|
53
|
+
, '{{stats_target_table[2]}}' as table_name
|
54
|
+
, '{{col_name}}' as column_name
|
55
|
+
, {% if is_calclable == True %}CAST(MAX("{{col_name}}") AS STRING){% else %}NULL{% endif %} AS max_value
|
56
|
+
, {% if is_calclable == True %}CAST(MIN("{{col_name}}") AS STRING){% else %}NULL{% endif %} AS min_value
|
57
|
+
, COUNT_IF("{{col_name}}" IS NULL) AS null_count
|
58
|
+
, APPROX_COUNT_DISTINCT("{{col_name}}") AS cardinality
|
59
|
+
, {% if is_calclable == True %}AVG("{{col_name}}"){% else %}NULL{% endif %} AS avg_value
|
60
|
+
, {% if is_calclable == True %}MEDIAN("{{col_name}}"){% else %}NULL{% endif %} AS median_value
|
61
|
+
, {% if is_calclable == True %}APPROX_TOP_K("{{col_name}}")[0][0]{% else %}NULL{% endif %} AS mode_value
|
62
|
+
, {% if is_calclable == True %}STDDEV("{{col_name}}"){% else %}NULL{% endif %} AS stddev_value
|
63
|
+
FROM "{{stats_target_table[0]}}"."{{stats_target_table[1]}}"."{{stats_target_table[2]}}" {{ var("sample_method") }}
|
64
|
+
{% endfor -%}
|
65
|
+
{%- endset %}
|
66
|
+
|
67
|
+
-- create a view with a index as suffix
|
68
|
+
{%- set stats_view_identifier = "%s_%s_%s_%s"|format(model['name'], stats_target_table[0], stats_target_table[1], stats_target_table[2]) %}
|
69
|
+
{%- set target_relation = api.Relation.create(identifier=stats_view_identifier, schema=schema, database=database, type='view') %}
|
70
|
+
{% call statement("main") %}
|
71
|
+
{{ get_create_view_as_sql(target_relation, sql_for_column_stats) }}
|
72
|
+
{% endcall %}
|
73
|
+
{%- set full_refresh_mode = (should_full_refresh()) -%}
|
74
|
+
{%- set should_revoke = should_revoke(target_relation, full_refresh_mode) %}
|
75
|
+
{%- do apply_grants(target_relation, grant_config, should_revoke) %}
|
76
|
+
{%- set target_relations = target_relations.append(target_relation) %}
|
77
|
+
{%- endfor -%}
|
78
|
+
|
79
|
+
{{ run_hooks(post_hooks, inside_transaction=True) }}
|
80
|
+
-- `COMMIT` happens here:
|
81
|
+
{{ adapter.commit() }}
|
82
|
+
{{ run_hooks(post_hooks, inside_transaction=False) }}
|
83
|
+
|
84
|
+
{{ return({'relations': target_relations}) }}
|
85
|
+
{%- endmaterialization -%}
|
@@ -31,3 +31,7 @@ def setup_dbt_profile(connections_json: Dict[str, str], template_path: str, temp
|
|
31
31
|
with open(profile_path, "w") as profiles:
|
32
32
|
yaml.dump(yaml.safe_load(profiles_body), profiles, default_flow_style=False, allow_unicode=True)
|
33
33
|
return
|
34
|
+
|
35
|
+
|
36
|
+
def trim_prefix(s: str, prefix: str) -> str:
|
37
|
+
return s.lstrip(prefix)
|
@@ -6,6 +6,8 @@ Currently requires explicit naming of env vars to check for
|
|
6
6
|
|
7
7
|
import argparse
|
8
8
|
import os
|
9
|
+
from distutils.util import strtobool
|
10
|
+
from typing import Union
|
9
11
|
|
10
12
|
|
11
13
|
# Courtesy of http://stackoverflow.com/a/10551190 with env-var retrieval fixed
|
@@ -28,9 +30,30 @@ class EnvDefault(argparse.Action):
|
|
28
30
|
setattr(namespace, self.dest, values)
|
29
31
|
|
30
32
|
|
33
|
+
class EnvStoreTrue(argparse._StoreTrueAction):
|
34
|
+
"""An argparse action class that auto-sets missing default values from env vars for store_true."""
|
35
|
+
|
36
|
+
def __init__(self, envvar, required=True, default=None, **kwargs):
|
37
|
+
# Only pass the arguments that argparse._StoreTrueAction expects
|
38
|
+
action_kwargs = {key: value for key, value in kwargs.items() if key in ("option_strings", "dest")}
|
39
|
+
if envvar in os.environ:
|
40
|
+
default = _convert_value_to_bool(os.environ[envvar])
|
41
|
+
if required and default:
|
42
|
+
required = False
|
43
|
+
super(EnvStoreTrue, self).__init__(default=default, required=required, **action_kwargs)
|
44
|
+
|
45
|
+
|
31
46
|
# functional sugar for the above
|
32
|
-
def env_default(envvar):
|
47
|
+
def env_default(envvar, store_true=False):
|
33
48
|
def wrapper(**kwargs):
|
49
|
+
if store_true:
|
50
|
+
return EnvStoreTrue(envvar, **kwargs)
|
34
51
|
return EnvDefault(envvar, **kwargs)
|
35
52
|
|
36
53
|
return wrapper
|
54
|
+
|
55
|
+
|
56
|
+
def _convert_value_to_bool(v: Union[str, bool]) -> bool:
|
57
|
+
if isinstance(v, str):
|
58
|
+
return bool(strtobool(v))
|
59
|
+
return v
|
@@ -14,6 +14,7 @@ logger = logging.getLogger(__name__)
|
|
14
14
|
|
15
15
|
def databricks_table_level_lineage(
|
16
16
|
conn: databricks.DatabricksConnectionConfig,
|
17
|
+
endpoint: str,
|
17
18
|
qdc_client: qdc.QDCExternalAPIClient,
|
18
19
|
tenant_id: str,
|
19
20
|
dbt_table_name: str = "quollio_lineage_table_level",
|
@@ -31,7 +32,7 @@ def databricks_table_level_lineage(
|
|
31
32
|
tables = parse_databricks_table_lineage(results)
|
32
33
|
update_table_lineage_inputs = gen_table_lineage_payload(
|
33
34
|
tenant_id=tenant_id,
|
34
|
-
endpoint=
|
35
|
+
endpoint=endpoint,
|
35
36
|
tables=tables,
|
36
37
|
)
|
37
38
|
|
@@ -55,6 +56,7 @@ def databricks_table_level_lineage(
|
|
55
56
|
|
56
57
|
def databricks_column_level_lineage(
|
57
58
|
conn: databricks.DatabricksConnectionConfig,
|
59
|
+
endpoint: str,
|
58
60
|
qdc_client: qdc.QDCExternalAPIClient,
|
59
61
|
tenant_id: str,
|
60
62
|
dbt_table_name: str = "quollio_lineage_column_level",
|
@@ -72,7 +74,7 @@ def databricks_column_level_lineage(
|
|
72
74
|
|
73
75
|
update_column_lineage_inputs = gen_column_lineage_payload(
|
74
76
|
tenant_id=tenant_id,
|
75
|
-
endpoint=
|
77
|
+
endpoint=endpoint,
|
76
78
|
columns=results,
|
77
79
|
)
|
78
80
|
|
@@ -110,7 +112,9 @@ def _get_monitoring_tables(
|
|
110
112
|
CONCAT(table_catalog, '.', table_schema, '.', table_name) AS table_fqdn
|
111
113
|
FROM
|
112
114
|
system.information_schema.tables
|
113
|
-
WHERE
|
115
|
+
WHERE
|
116
|
+
table_name LIKE "%{monitoring_table_suffix}"
|
117
|
+
AND table_name NOT LIKE ('quollio_%')
|
114
118
|
"""
|
115
119
|
with databricks.DatabricksQueryExecutor(config=conn) as databricks_executor:
|
116
120
|
tables = databricks_executor.get_query_results(query)
|
@@ -153,6 +157,8 @@ def _get_column_stats(
|
|
153
157
|
MAX(t.window) AS LATEST
|
154
158
|
FROM
|
155
159
|
{monitoring_table} t
|
160
|
+
WHERE
|
161
|
+
t.column_name not in (':table')
|
156
162
|
GROUP BY
|
157
163
|
t.COLUMN_NAME,
|
158
164
|
t.DATA_TYPE,
|
@@ -176,13 +182,14 @@ def _get_column_stats(
|
|
176
182
|
|
177
183
|
def databricks_column_stats(
|
178
184
|
conn: databricks.DatabricksConnectionConfig,
|
185
|
+
endpoint: str,
|
179
186
|
qdc_client: qdc.QDCExternalAPIClient,
|
180
187
|
tenant_id: str,
|
181
188
|
monitoring_table_suffix: str = "_profile_metrics",
|
182
189
|
) -> None:
|
183
190
|
table_stats = _get_column_stats(conn, monitoring_table_suffix)
|
184
191
|
for table in table_stats:
|
185
|
-
stats = gen_table_stats_payload(tenant_id,
|
192
|
+
stats = gen_table_stats_payload(tenant_id=tenant_id, endpoint=endpoint, stats=table)
|
186
193
|
for stat in stats:
|
187
194
|
status_code = qdc_client.update_stats_by_id(
|
188
195
|
global_id=stat.global_id,
|
@@ -16,7 +16,7 @@ logger = logging.getLogger(__name__)
|
|
16
16
|
|
17
17
|
def build_view(
|
18
18
|
conn: redshift.RedshiftConnectionConfig,
|
19
|
-
|
19
|
+
aggregate_all: bool = False,
|
20
20
|
target_tables: str = "",
|
21
21
|
log_level: str = "info",
|
22
22
|
) -> None:
|
@@ -29,9 +29,9 @@ def build_view(
|
|
29
29
|
project_path = f"{current_dir}/dbt_projects/redshift"
|
30
30
|
template_path = f"{current_dir}/dbt_projects/redshift/profiles"
|
31
31
|
template_name = "profiles_template.yml"
|
32
|
-
options = '{{"query_user": {query_user}, "
|
32
|
+
options = '{{"query_user": {query_user}, "aggregate_all": {aggregate_all}, "target_database": {database}}}'.format(
|
33
33
|
query_user=conn.query_user,
|
34
|
-
|
34
|
+
aggregate_all=aggregate_all,
|
35
35
|
database=conn.database,
|
36
36
|
)
|
37
37
|
|
@@ -210,12 +210,12 @@ if __name__ == "__main__":
|
|
210
210
|
help="Target schema name where the views are built by dbt",
|
211
211
|
)
|
212
212
|
parser.add_argument(
|
213
|
-
"--
|
213
|
+
"--aggregate_all",
|
214
214
|
type=bool,
|
215
|
-
action=env_default("
|
216
|
-
default=
|
215
|
+
action=env_default("REDSHIFT_AGGREGATE_ALL", store_true=True),
|
216
|
+
default=False,
|
217
217
|
required=False,
|
218
|
-
help="
|
218
|
+
help="Aggregate all stats values. False by default.",
|
219
219
|
)
|
220
220
|
parser.add_argument(
|
221
221
|
"--target_tables",
|
@@ -283,7 +283,7 @@ if __name__ == "__main__":
|
|
283
283
|
if "build_view" in args.commands:
|
284
284
|
build_view(
|
285
285
|
conn=conn,
|
286
|
-
|
286
|
+
aggregate_all=args.aggregate_all,
|
287
287
|
target_tables=args.target_tables,
|
288
288
|
log_level=args.log_level,
|
289
289
|
)
|
@@ -72,6 +72,7 @@ def load_lineage(
|
|
72
72
|
conn: snowflake.SnowflakeConnectionConfig,
|
73
73
|
qdc_client: qdc.QDCExternalAPIClient,
|
74
74
|
tenant_id: str,
|
75
|
+
enable_column_lineage: bool = False,
|
75
76
|
) -> None:
|
76
77
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
77
78
|
|
@@ -82,12 +83,17 @@ def load_lineage(
|
|
82
83
|
tenant_id=tenant_id,
|
83
84
|
)
|
84
85
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
86
|
+
if enable_column_lineage:
|
87
|
+
logger.info(
|
88
|
+
f"enable_column_lineage is set to {enable_column_lineage}.Generate Snowflake column to column lineage."
|
89
|
+
)
|
90
|
+
snowflake_column_to_column_lineage(
|
91
|
+
conn=conn,
|
92
|
+
qdc_client=qdc_client,
|
93
|
+
tenant_id=tenant_id,
|
94
|
+
)
|
95
|
+
else:
|
96
|
+
logger.info("Skip column lineage ingestion. Set enable_column_lineage to True if you ingest column lineage.")
|
91
97
|
|
92
98
|
logger.info("Lineage data is successfully loaded.")
|
93
99
|
|
@@ -264,6 +270,14 @@ if __name__ == "__main__":
|
|
264
270
|
required=False,
|
265
271
|
help="The client secrete that is created on Quollio console to let clients access Quollio External API",
|
266
272
|
)
|
273
|
+
parser.add_argument(
|
274
|
+
"--enable_column_lineage",
|
275
|
+
type=bool,
|
276
|
+
action=env_default("ENABLE_COLUMN_LINEAGE", store_true=True),
|
277
|
+
default=False,
|
278
|
+
required=False,
|
279
|
+
help="Whether to ingest column lineage into QDIC or not. Default value is False",
|
280
|
+
)
|
267
281
|
args = parser.parse_args()
|
268
282
|
conn = snowflake.SnowflakeConnectionConfig(
|
269
283
|
account_id=args.account_id,
|
@@ -296,6 +310,7 @@ if __name__ == "__main__":
|
|
296
310
|
conn=conn,
|
297
311
|
qdc_client=qdc_client,
|
298
312
|
tenant_id=args.tenant_id,
|
313
|
+
enable_column_lineage=args.enable_column_lineage,
|
299
314
|
)
|
300
315
|
if "load_stats" in args.commands:
|
301
316
|
qdc_client = qdc.QDCExternalAPIClient(
|
quollio_core-0.4.7/quollio_core/dbt_projects/redshift/macros/materialization/divided_view.sql
DELETED
@@ -1,97 +0,0 @@
|
|
1
|
-
{%- materialization divided_view, default %}
|
2
|
-
{%- set identifier = model['alias'] %}
|
3
|
-
{%- set target_relations = [] %}
|
4
|
-
{%- set chunk = config.get('chunk') %}
|
5
|
-
{%- set grant_config = config.get('grants') %}
|
6
|
-
|
7
|
-
{{ run_hooks(pre_hooks, inside_transaction=False) }}
|
8
|
-
-- `BEGIN` happens here:
|
9
|
-
{{ run_hooks(pre_hooks, inside_transaction=True) }}
|
10
|
-
|
11
|
-
-- fetch records
|
12
|
-
{%- set query_quollio_stats_profiling_columns -%}
|
13
|
-
SELECT * FROM {{ ref('quollio_stats_profiling_columns') }} WHERE table_name not like 'quollio_%'
|
14
|
-
{%- endset -%}
|
15
|
-
{%- set results = run_query(query_quollio_stats_profiling_columns) -%}
|
16
|
-
{%- if execute -%}
|
17
|
-
{%- set records = results.rows -%}
|
18
|
-
{%- else -%}
|
19
|
-
{%- set records = [] -%}
|
20
|
-
{%- endif -%}
|
21
|
-
|
22
|
-
-- build sql
|
23
|
-
{%- for i in range(0, records|length, chunk) -%}
|
24
|
-
{%- set build_sql %}
|
25
|
-
{%- for record in records[i: i+chunk] -%}
|
26
|
-
{%- if not loop.first -%}UNION{% endif %}
|
27
|
-
SELECT
|
28
|
-
main.db_name
|
29
|
-
, main.schema_name
|
30
|
-
, main.table_name
|
31
|
-
, main.column_name
|
32
|
-
, main.max_value
|
33
|
-
, main.min_value
|
34
|
-
, main.null_count
|
35
|
-
, main.cardinality
|
36
|
-
, main.avg_value
|
37
|
-
, main.median_value
|
38
|
-
, mode.mode_value
|
39
|
-
, main.stddev_value
|
40
|
-
FROM
|
41
|
-
(
|
42
|
-
SELECT
|
43
|
-
DISTINCT
|
44
|
-
'{{record[0]}}'::varchar as db_name
|
45
|
-
, '{{record[1]}}'::varchar as schema_name
|
46
|
-
, '{{record[2]}}'::varchar as table_name
|
47
|
-
, '{{record[3]}}'::varchar as column_name
|
48
|
-
, {% if var("skip_heavy") == false and record[5] == true %}cast(max("{{record[3]}}") as varchar){% else %}null::varchar{% endif %} AS max_value
|
49
|
-
, {% if var("skip_heavy") == false and record[5] == true %}cast(min("{{record[3]}}") as varchar){% else %}null::varchar{% endif %} AS min_value
|
50
|
-
-- requires full table scan
|
51
|
-
, {% if var("skip_heavy") == false %}cast(SUM(NVL2("{{record[3]}}", 0, 1)) as integer){% else %}null::integer{% endif %} AS null_count
|
52
|
-
, APPROXIMATE COUNT(DISTINCT "{{record[3]}}") AS cardinality
|
53
|
-
-- requires full table scan
|
54
|
-
, {% if var("skip_heavy") == false and record[5] == true %}cast(avg("{{record[3]}}")as varchar){% else %}null::varchar{% endif %} AS avg_value
|
55
|
-
, {% if var("skip_heavy") == false and record[5] == true %}cast(median("{{record[3]}}") as varchar){% else %}null::varchar{% endif %} AS median_value
|
56
|
-
-- requires full table scan
|
57
|
-
, {% if record[5] == true %}cast(STDDEV_SAMP("{{record[3]}}") as integer){% else %}null::integer{% endif %} AS stddev_value
|
58
|
-
FROM {{ record[0] }}.{{ record[1] }}.{{ record[2] }}
|
59
|
-
) main, (
|
60
|
-
{%- if var("skip_heavy") == false and record[4] == false %}
|
61
|
-
SELECT
|
62
|
-
cast("{{record[3]}}" as varchar) mode_value
|
63
|
-
FROM (
|
64
|
-
SELECT
|
65
|
-
DISTINCT
|
66
|
-
"{{record[3]}}"
|
67
|
-
, ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC) AS row_num
|
68
|
-
FROM {{ record[0] }}.{{ record[1] }}.{{ record[2] }}
|
69
|
-
GROUP BY
|
70
|
-
"{{record[3]}}"
|
71
|
-
)
|
72
|
-
WHERE
|
73
|
-
row_num = 1
|
74
|
-
{% else %}
|
75
|
-
SELECT null as mode_value {%- endif -%}
|
76
|
-
) mode
|
77
|
-
{% endfor -%}
|
78
|
-
{%- endset %}
|
79
|
-
-- create a view with a index as suffix
|
80
|
-
{%- set target_identifier = "%s_%d"|format(model['name'], loop.index) %}
|
81
|
-
{%- set target_relation = api.Relation.create(identifier=target_identifier, schema=schema, database=database, type='view') %}
|
82
|
-
-- {{ drop_relation_if_exists(target_relation) }}
|
83
|
-
{% call statement("main") %}
|
84
|
-
{{ get_replace_view_sql(target_relation, build_sql) }}
|
85
|
-
{% endcall %}
|
86
|
-
{%- set full_refresh_mode = (should_full_refresh()) -%}
|
87
|
-
{%- set should_revoke = should_revoke(target_relation, full_refresh_mode) %}
|
88
|
-
{%- do apply_grants(target_relation, grant_config, should_revoke) %}
|
89
|
-
{%- set target_relations = target_relations.append(target_relation) %}
|
90
|
-
{%- endfor -%}
|
91
|
-
|
92
|
-
{{ run_hooks(post_hooks, inside_transaction=True) }}
|
93
|
-
{{ adapter.commit() }}
|
94
|
-
{{ run_hooks(post_hooks, inside_transaction=False) }}
|
95
|
-
|
96
|
-
{{ return({'relations': target_relations}) }}
|
97
|
-
{%- endmaterialization -%}
|
quollio_core-0.4.7/quollio_core/dbt_projects/snowflake/macros/materialization/divided_view.sql
DELETED
@@ -1,63 +0,0 @@
|
|
1
|
-
{%- materialization divided_view, default %}
|
2
|
-
{%- set identifier = model['alias'] %}
|
3
|
-
{%- set target_relations = [] %}
|
4
|
-
{%- set chunk = config.get('chunk') %}
|
5
|
-
{%- set grant_config = config.get('grants') %}
|
6
|
-
|
7
|
-
{{ run_hooks(pre_hooks, inside_transaction=False) }}
|
8
|
-
-- `BEGIN` happens here:
|
9
|
-
{{ run_hooks(pre_hooks, inside_transaction=True) }}
|
10
|
-
|
11
|
-
-- fetch records
|
12
|
-
{%- set query_quollio_stats_profiling_columns -%}
|
13
|
-
SELECT * FROM {{ ref('quollio_stats_profiling_columns') }} WHERE NOT startswith(table_name, 'QUOLLIO_')
|
14
|
-
{%- endset -%}
|
15
|
-
{%- set results = run_query(query_quollio_stats_profiling_columns) -%}
|
16
|
-
{%- if execute -%}
|
17
|
-
{%- set records = results.rows -%}
|
18
|
-
{%- else -%}
|
19
|
-
{%- set records = [] -%}
|
20
|
-
{%- endif -%}
|
21
|
-
|
22
|
-
-- build sql
|
23
|
-
{%- for i in range(0, records|length, chunk) -%}
|
24
|
-
{%- set build_sql %}
|
25
|
-
{%- for record in records[i: i+chunk] -%}
|
26
|
-
{%- if not loop.first %}UNION{% endif %}
|
27
|
-
|
28
|
-
SELECT
|
29
|
-
DISTINCT
|
30
|
-
'{{record[0]}}' as db_name
|
31
|
-
, '{{record[1]}}' as schema_name
|
32
|
-
, '{{record[2]}}' as table_name
|
33
|
-
, '{{record[3]}}' as column_name
|
34
|
-
, {% if record[5] == true %}CAST(max("{{record[3]}}") AS STRING){% else %}null{% endif %} AS max_value
|
35
|
-
, {% if record[5] == true %}CAST(min("{{record[3]}}") AS STRING){% else %}null{% endif %} AS min_value
|
36
|
-
, COUNT_IF("{{record[3]}}" IS NULL) AS null_count
|
37
|
-
, APPROX_COUNT_DISTINCT("{{record[3]}}") AS cardinality
|
38
|
-
, {% if record[5] == true %}avg("{{record[3]}}"){% else %}null{% endif %} AS avg_value
|
39
|
-
, {% if record[5] == true %}median("{{record[3]}}"){% else %}null{% endif %} AS median_value
|
40
|
-
, {% if record[5] == true %}approx_top_k("{{record[3]}}")[0][0]{% else %}null{% endif %} AS mode_value
|
41
|
-
, {% if record[5] == true %}stddev("{{record[3]}}"){% else %}null{% endif %} AS stddev_value
|
42
|
-
FROM "{{record[0]}}"."{{record[1]}}"."{{record[2]}}" {{ var("sample_method") }}
|
43
|
-
{% endfor -%}
|
44
|
-
{%- endset %}
|
45
|
-
-- create a view with a index as suffix
|
46
|
-
{%- set target_identifier = "%s_%d"|format(model['name'], loop.index) %}
|
47
|
-
{%- set target_relation = api.Relation.create(identifier=target_identifier, schema=schema, database=database, type='view') %}
|
48
|
-
{% call statement("main") %}
|
49
|
-
{{ get_create_view_as_sql(target_relation, build_sql) }}
|
50
|
-
{% endcall %}
|
51
|
-
{%- set full_refresh_mode = (should_full_refresh()) -%}
|
52
|
-
{%- set should_revoke = should_revoke(target_relation, full_refresh_mode) %}
|
53
|
-
{%- do apply_grants(target_relation, grant_config, should_revoke) %}
|
54
|
-
{%- set target_relations = target_relations.append(target_relation) %}
|
55
|
-
{%- endfor -%}
|
56
|
-
|
57
|
-
{{ run_hooks(post_hooks, inside_transaction=True) }}
|
58
|
-
-- `COMMIT` happens here:
|
59
|
-
{{ adapter.commit() }}
|
60
|
-
{{ run_hooks(post_hooks, inside_transaction=False) }}
|
61
|
-
|
62
|
-
{{ return({'relations': target_relations}) }}
|
63
|
-
{%- endmaterialization -%}
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/databricks/analyses/.gitkeep
RENAMED
File without changes
|
{quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/databricks/dbt_project.yml
RENAMED
File without changes
|
{quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/databricks/macros/.gitkeep
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/databricks/models/sources.yml
RENAMED
File without changes
|
{quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/databricks/package-lock.yml
RENAMED
File without changes
|
File without changes
|
File without changes
|
{quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/databricks/seeds/.gitkeep
RENAMED
File without changes
|
{quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/databricks/snapshots/.gitkeep
RENAMED
File without changes
|
File without changes
|
{quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/redshift/analyses/.gitkeep
RENAMED
File without changes
|
{quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/redshift/macros/.gitkeep
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/redshift/models/sources.yml
RENAMED
File without changes
|
{quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/redshift/package-lock.yml
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/redshift/snapshots/.gitkeep
RENAMED
File without changes
|
File without changes
|
File without changes
|
{quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/analyses/.gitkeep
RENAMED
File without changes
|
{quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/dbt_project.yml
RENAMED
File without changes
|
{quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/macros/.gitkeep
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/models/sources.yml
RENAMED
File without changes
|
{quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/package-lock.yml
RENAMED
File without changes
|
File without changes
|
File without changes
|
{quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/seeds/.gitkeep
RENAMED
File without changes
|
{quollio_core-0.4.7 → quollio_core-0.4.9}/quollio_core/dbt_projects/snowflake/snapshots/.gitkeep
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|