quollio-core 0.4.4__py3-none-any.whl → 0.4.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. quollio_core/__init__.py +1 -1
  2. quollio_core/bigquery.py +123 -0
  3. quollio_core/bricks.py +288 -0
  4. quollio_core/dbt_projects/databricks/.gitignore +4 -0
  5. quollio_core/dbt_projects/databricks/README.md +5 -0
  6. quollio_core/dbt_projects/databricks/analyses/.gitkeep +0 -0
  7. quollio_core/dbt_projects/databricks/dbt_project.yml +21 -0
  8. quollio_core/dbt_projects/databricks/macros/.gitkeep +0 -0
  9. quollio_core/dbt_projects/databricks/models/quollio_lineage_column_level.sql +73 -0
  10. quollio_core/dbt_projects/databricks/models/quollio_lineage_column_level.yml +14 -0
  11. quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.sql +63 -0
  12. quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.yml +11 -0
  13. quollio_core/dbt_projects/databricks/models/sources.yml +84 -0
  14. quollio_core/dbt_projects/databricks/package-lock.yml +14 -0
  15. quollio_core/dbt_projects/databricks/packages.yml +13 -0
  16. quollio_core/dbt_projects/databricks/profiles/profiles_template.yml +14 -0
  17. quollio_core/dbt_projects/databricks/seeds/.gitkeep +0 -0
  18. quollio_core/dbt_projects/databricks/snapshots/.gitkeep +0 -0
  19. quollio_core/dbt_projects/redshift/dbt_project.yml +1 -1
  20. quollio_core/dbt_projects/redshift/macros/materialization/divided_view.sql +101 -34
  21. quollio_core/dbt_projects/redshift/models/quollio_stats_columns.sql +1 -2
  22. quollio_core/dbt_projects/redshift/package-lock.yml +1 -1
  23. quollio_core/dbt_projects/seeds/.gitkeep +0 -0
  24. quollio_core/dbt_projects/snowflake/macros/materialization/divided_view.sql +50 -27
  25. quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.sql +1 -2
  26. quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.sql +57 -20
  27. quollio_core/helper/core.py +4 -0
  28. quollio_core/helper/env_default.py +28 -2
  29. quollio_core/helper/log.py +17 -0
  30. quollio_core/profilers/bigquery.py +81 -0
  31. quollio_core/profilers/databricks.py +198 -0
  32. quollio_core/profilers/lineage.py +26 -0
  33. quollio_core/profilers/redshift.py +41 -74
  34. quollio_core/profilers/snowflake.py +138 -169
  35. quollio_core/profilers/sqllineage.py +0 -1
  36. quollio_core/profilers/stats.py +0 -1
  37. quollio_core/redshift.py +15 -18
  38. quollio_core/repository/bigquery.py +61 -0
  39. quollio_core/repository/databricks.py +62 -0
  40. quollio_core/repository/dbt.py +0 -1
  41. quollio_core/repository/qdc.py +0 -3
  42. quollio_core/repository/redshift.py +0 -1
  43. quollio_core/repository/snowflake.py +6 -1
  44. quollio_core/snowflake.py +29 -16
  45. {quollio_core-0.4.4.dist-info → quollio_core-0.4.10.dist-info}/METADATA +11 -2
  46. {quollio_core-0.4.4.dist-info → quollio_core-0.4.10.dist-info}/RECORD +48 -25
  47. {quollio_core-0.4.4.dist-info → quollio_core-0.4.10.dist-info}/LICENSE +0 -0
  48. {quollio_core-0.4.4.dist-info → quollio_core-0.4.10.dist-info}/WHEEL +0 -0
@@ -0,0 +1,84 @@
1
+ version: 2
2
+
3
+ sources:
4
+ - name: access
5
+ database: system
6
+ schema: access
7
+ tables:
8
+ - name: table_lineage
9
+ description: Describes table level lineage
10
+ columns:
11
+ - name: source_table_full_name
12
+ description: ''
13
+ type: string
14
+ - name: target_table_full_name
15
+ description: ''
16
+ type: string
17
+ - name: target_type
18
+ description: ''
19
+ type: string
20
+ - name: event_time
21
+ description: ''
22
+ type: timestamp
23
+
24
+ - name: column_lineage
25
+ description: Describes column level lineage
26
+ columns:
27
+ - name: source_table_full_name
28
+ description: ''
29
+ type: string
30
+ - name: target_table_full_name
31
+ description: ''
32
+ type: string
33
+ - name: event_time
34
+ description: ''
35
+ type: timestamp
36
+ - name: source_column_name
37
+ description: ''
38
+ type: string
39
+ - name: target_column_name
40
+ description: ''
41
+ type: string
42
+
43
+ - name: inf_sch
44
+ database: system
45
+ schema: information_schema
46
+ tables:
47
+ - name: tables
48
+ description: Lists existing tables (i.e., not deleted).
49
+ columns:
50
+ - name: table_catalog
51
+ description: ''
52
+ type: string
53
+ - name: table_schema
54
+ description: ''
55
+ type: string
56
+ - name: table_name
57
+ description: ''
58
+ type: string
59
+
60
+ - name: views
61
+ description: Lists existing views (i.e., not deleted). Views are treated as tables.
62
+ columns:
63
+ - name: table_catalog
64
+ description: ''
65
+ type: string
66
+ - name: table_schema
67
+ description: ''
68
+ type: string
69
+ - name: table_name
70
+ description: ''
71
+ type: string
72
+
73
+ - name: columns
74
+ description: ''
75
+ columns:
76
+ - name: table_catalog
77
+ description: ''
78
+ type: string
79
+ - name: table_schema
80
+ description: ''
81
+ type: string
82
+ - name: table_name
83
+ description: ''
84
+ type: string
@@ -0,0 +1,14 @@
1
+ packages:
2
+ - package: dbt-labs/dbt_utils
3
+ version: 1.1.1
4
+ - package: dbt-labs/spark_utils
5
+ version: 0.3.0
6
+ - package: dbt-labs/codegen
7
+ version: 0.12.1
8
+ - package: dbt-labs/dbt_external_tables
9
+ version: 0.8.7
10
+ - package: dbt-labs/dbt_project_evaluator
11
+ version: 0.8.1
12
+ - package: brooklyn-data/dbt_artifacts
13
+ version: 2.6.2
14
+ sha1_hash: cbb324267dbf6c6fb7de11b162e4fbafd1e32a9c
@@ -0,0 +1,13 @@
1
+ packages:
2
+ - package: dbt-labs/dbt_utils
3
+ version: [">=0.0.0", "<2.0.0"]
4
+ - package: dbt-labs/spark_utils
5
+ version: [">=0.0.0", "<1.0.0"]
6
+ - package: dbt-labs/codegen
7
+ version: [">=0.0.0", "<1.0.0"]
8
+ - package: dbt-labs/dbt_external_tables
9
+ version: [">=0.0.0", "<1.0.0"]
10
+ - package: dbt-labs/dbt_project_evaluator
11
+ version: [">=0.0.0", "<1.0.0"]
12
+ - package: brooklyn-data/dbt_artifacts
13
+ version: [">=2.0.0", "<3.0.0"]
@@ -0,0 +1,14 @@
1
+ quollio_intelligence_databricks:
2
+ target: project
3
+ outputs:
4
+ project:
5
+ type: databricks
6
+ host: {{ host }}
7
+ http_path: {{ http_path }}
8
+ catalog: {{ catalog }}
9
+ schema: {{ schema }}
10
+ auth_type: oauth
11
+ client_id: {{ client_id }}
12
+ client_secret: {{ client_secret }}
13
+ databricks_port: {{ databricks_port }}
14
+
File without changes
@@ -18,4 +18,4 @@ clean-targets:
18
18
  models:
19
19
  +dbt-osmosis: "{model}.yml"
20
20
  +grants:
21
- select: ["{{ var('query_user') }}"]
21
+ select: ["\"{{ var('query_user') }}\""]
@@ -1,59 +1,126 @@
1
1
  {%- materialization divided_view, default %}
2
2
  {%- set identifier = model['alias'] %}
3
3
  {%- set target_relations = [] %}
4
- {%- set chunk = config.get('chunk') %}
5
4
  {%- set grant_config = config.get('grants') %}
6
5
 
7
6
  {{ run_hooks(pre_hooks, inside_transaction=False) }}
8
7
  -- `BEGIN` happens here:
9
8
  {{ run_hooks(pre_hooks, inside_transaction=True) }}
10
9
 
11
- -- fetch records
12
- {%- set query_quollio_stats_profiling_columns -%}
13
- SELECT * FROM {{ ref('quollio_stats_profiling_columns') }} WHERE table_name not like 'quollio_%'
10
+ -- fetch target_tables
11
+ {%- set query_stats_target_tables -%}
12
+ SELECT
13
+ distinct
14
+ database_name
15
+ , schema_name
16
+ , table_name
17
+ FROM
18
+ {{ ref('quollio_stats_profiling_columns') }}
19
+ WHERE
20
+ table_name not like 'quollio_%%'
14
21
  {%- endset -%}
15
- {%- set results = run_query(query_quollio_stats_profiling_columns) -%}
22
+ {%- set results = run_query(query_stats_target_tables) -%}
16
23
  {%- if execute -%}
17
- {%- set records = results.rows -%}
24
+ {%- set stats_target_tables = results.rows -%}
18
25
  {%- else -%}
19
- {%- set records = [] -%}
26
+ {%- set stats_target_tables = [] -%}
27
+ {%- endif -%}
28
+
29
+ -- skip creating views if the target profiling columns don't exist.
30
+ {%- if stats_target_tables | length == 0 -%}
31
+ {% call statement("main") %}
32
+ {{ log("No records found. Just execute select stmt for skipping call statement.", info=True) }}
33
+ select null
34
+ {% endcall %}
35
+ {%- set full_refresh_mode = (should_full_refresh()) -%}
36
+ {%- set should_revoke = should_revoke(target_relation, full_refresh_mode) %}
20
37
  {%- endif -%}
21
38
 
22
39
  -- build sql
23
- {%- for i in range(0, records|length, chunk) -%}
24
- {%- set build_sql %}
25
- {%- for record in records[i: i+chunk] -%}
26
- {%- if not loop.first %}UNION{% endif %}
40
+ {%- for stats_target_table in stats_target_tables -%}
41
+ -- get columns for statistics.
42
+ -- LISTAGG function can't be used for sys table, then it's necessary to get column for each table.
43
+ -- See https://docs.aws.amazon.com/redshift/latest/dg/c_join_PG.html.
44
+ {%- set stats_target_columns %}
45
+ SELECT
46
+ database_name
47
+ , schema_name
48
+ , table_name
49
+ , column_name
50
+ , is_bool
51
+ , is_calculable
52
+ FROM
53
+ {{ ref('quollio_stats_profiling_columns') }}
54
+ WHERE
55
+ database_name = '{{stats_target_table[0]}}'
56
+ AND schema_name = '{{stats_target_table[1]}}'
57
+ AND table_name = '{{stats_target_table[2]}}'
58
+ {%- endset -%}
59
+
60
+ {%- set results = run_query(stats_target_columns) -%}
61
+ {%- set stats_target_columns = results.rows -%}
62
+
63
+ {%- set sql_for_column_stats %}
64
+ {%- for stats_target_column in stats_target_columns -%}
65
+ {%- if not loop.first -%}UNION{% endif %}
27
66
  SELECT
28
- DISTINCT
29
- '{{record[0]}}'::varchar as db_name
30
- , '{{record[1]}}'::varchar as schema_name
31
- , '{{record[2]}}'::varchar as table_name
32
- , '{{record[3]}}'::varchar as column_name
33
- , {% if var("skip_heavy") == false and record[5] == true %}cast(max("{{record[3]}}") as varchar){% else %}null::varchar{% endif %} AS max_value
34
- , {% if var("skip_heavy") == false and record[5] == true %}cast(min("{{record[3]}}") as varchar){% else %}null::varchar{% endif %} AS min_value
35
- -- requires full table scan
36
- , {% if var("skip_heavy") == false %}cast(SUM(NVL2("{{record[3]}}", 0, 1)) as integer){% else %}null::integer{% endif %} AS null_count
37
- , APPROXIMATE COUNT(DISTINCT "{{record[3]}}") AS cardinality
38
- -- requires full table scan
39
- , {% if var("skip_heavy") == false and record[5] == true %}cast(avg("{{record[3]}}")as varchar){% else %}null::varchar{% endif %} AS avg_value
40
- , {% if var("skip_heavy") == false and record[5] == true %}(SELECT cast(median("{{record[3]}}") as varchar) FROM {{record[2]}}){% else %}null::varchar{% endif %} AS median_value
41
- -- requires full table scan
42
- , {% if var("skip_heavy") == false and record[4] == false %}
43
- (SELECT cast("{{record[3]}}" as varchar) FROM (
44
- SELECT "{{record[3]}}", ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC) AS row_num FROM {{record[2]}} GROUP BY "{{record[3]}}"
45
- ) WHERE row_num = 1)
46
- {% else %}null::varchar{% endif %} AS mode_value
47
- , {% if record[5] == true %}cast(STDDEV_SAMP("{{record[3]}}") as integer){% else %}null::integer{% endif %} AS stddev_value
48
- FROM {{ record[0] }}.{{ record[1] }}.{{ record[2] }}
67
+ main.db_name
68
+ , main.schema_name
69
+ , main.table_name
70
+ , main.column_name
71
+ , main.max_value
72
+ , main.min_value
73
+ , main.null_count
74
+ , main.cardinality
75
+ , main.avg_value
76
+ , main.median_value
77
+ , mode.mode_value
78
+ , main.stddev_value
79
+ FROM
80
+ (
81
+ SELECT
82
+ DISTINCT
83
+ '{{stats_target_column[0]}}'::varchar as db_name
84
+ , '{{stats_target_column[1]}}'::varchar as schema_name
85
+ , '{{stats_target_column[2]}}'::varchar as table_name
86
+ , '{{stats_target_column[3]}}'::varchar as column_name
87
+ , {% if var("aggregate_all") == True and stats_target_column[5] == True %}cast(max("{{stats_target_column[3]}}") as varchar){% else %}null::varchar{% endif %} AS max_value
88
+ , {% if var("aggregate_all") == True and stats_target_column[5] == True %}cast(min("{{stats_target_column[3]}}") as varchar){% else %}null::varchar{% endif %} AS min_value
89
+ -- requires full table scan
90
+ , {% if var("aggregate_all") == True %}cast(SUM(NVL2("{{stats_target_column[3]}}", 0, 1)) as integer){% else %}null::integer{% endif %} AS null_count
91
+ , APPROXIMATE COUNT(DISTINCT "{{stats_target_column[3]}}") AS cardinality
92
+ -- requires full table scan
93
+ , {% if var("aggregate_all") == True and stats_target_column[5] == True %}cast(avg("{{stats_target_column[3]}}")as varchar){% else %}null::varchar{% endif %} AS avg_value
94
+ , {% if var("aggregate_all") == True and stats_target_column[5] == True %}cast(median("{{stats_target_column[3]}}") as varchar){% else %}null::varchar{% endif %} AS median_value
95
+ -- requires full table scan
96
+ , {% if stats_target_column[5] == True %}cast(STDDEV_SAMP("{{stats_target_column[3]}}") as integer){% else %}null::integer{% endif %} AS stddev_value
97
+ FROM {{ stats_target_column[0] }}.{{ stats_target_column[1] }}.{{ stats_target_column[2] }}
98
+ ) main, (
99
+ {%- if var("aggregate_all") == True and stats_target_column[4] == false %}
100
+ SELECT
101
+ cast("{{stats_target_column[3]}}" as varchar) mode_value
102
+ FROM (
103
+ SELECT
104
+ DISTINCT
105
+ "{{stats_target_column[3]}}"
106
+ , ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC) AS row_num
107
+ FROM {{ stats_target_column[0] }}.{{ stats_target_column[1] }}.{{ stats_target_column[2] }}
108
+ GROUP BY
109
+ "{{stats_target_column[3]}}"
110
+ )
111
+ WHERE
112
+ row_num = 1
113
+ {% else %}
114
+ SELECT null as mode_value {%- endif -%}
115
+ ) mode
49
116
  {% endfor -%}
50
117
  {%- endset %}
51
118
  -- create a view with a index as suffix
52
- {%- set target_identifier = "%s_%d"|format(model['name'], loop.index) %}
119
+ {%- set target_identifier = "%s_%s_%s_%s"|format(model['name'], stats_target_table[0], stats_target_table[1], stats_target_table[2]) %}
53
120
  {%- set target_relation = api.Relation.create(identifier=target_identifier, schema=schema, database=database, type='view') %}
54
121
  -- {{ drop_relation_if_exists(target_relation) }}
55
122
  {% call statement("main") %}
56
- {{ get_replace_view_sql(target_relation, build_sql) }}
123
+ {{ get_replace_view_sql(target_relation, sql_for_column_stats) }}
57
124
  {% endcall %}
58
125
  {%- set full_refresh_mode = (should_full_refresh()) -%}
59
126
  {%- set should_revoke = should_revoke(target_relation, full_refresh_mode) %}
@@ -1,7 +1,6 @@
1
1
  {{
2
2
  config(
3
- materialized='divided_view',
4
- chunk=20
3
+ materialized='divided_view'
5
4
  )
6
5
  }}
7
6
  -- depends_on: {{ ref('quollio_stats_profiling_columns') }}
@@ -1,4 +1,4 @@
1
1
  packages:
2
2
  - package: dbt-labs/dbt_utils
3
3
  version: 1.1.1
4
- sha1_hash: 23451c207c1d4dd71b5925f12a5cd66f2ebb2b3b
4
+ sha1_hash: a158c48c59c2bb7d729d2a4e215aabe5bb4f3353
File without changes
@@ -1,51 +1,74 @@
1
1
  {%- materialization divided_view, default %}
2
2
  {%- set identifier = model['alias'] %}
3
3
  {%- set target_relations = [] %}
4
- {%- set chunk = config.get('chunk') %}
5
4
  {%- set grant_config = config.get('grants') %}
6
5
 
7
6
  {{ run_hooks(pre_hooks, inside_transaction=False) }}
8
7
  -- `BEGIN` happens here:
9
8
  {{ run_hooks(pre_hooks, inside_transaction=True) }}
10
9
 
11
- -- fetch records
12
- {%- set query_quollio_stats_profiling_columns -%}
13
- SELECT * FROM {{ ref('quollio_stats_profiling_columns') }} WHERE NOT startswith(table_name, 'QUOLLIO_')
10
+ -- fetch target_tables
11
+ {%- set query_stats_target_tables -%}
12
+ SELECT
13
+ TABLE_CATALOG
14
+ , TABLE_SCHEMA
15
+ , TABLE_NAME
16
+ , OBJECT_AGG(COLUMN_NAME, IS_CALCULABLE) AS COLUMNS_OBJ
17
+ FROM
18
+ {{ ref('quollio_stats_profiling_columns') }}
19
+ WHERE NOT startswith(table_name, 'QUOLLIO_')
20
+ GROUP BY
21
+ TABLE_CATALOG
22
+ , TABLE_SCHEMA
23
+ , TABLE_NAME
14
24
  {%- endset -%}
15
- {%- set results = run_query(query_quollio_stats_profiling_columns) -%}
25
+ {%- set results = run_query(query_stats_target_tables) -%}
16
26
  {%- if execute -%}
17
- {%- set records = results.rows -%}
27
+ {%- set stats_target_tables = results.rows -%}
18
28
  {%- else -%}
19
- {%- set records = [] -%}
29
+ {%- set stats_target_tables = [] -%}
30
+ {%- endif -%}
31
+
32
+ -- skip creating views if the target profiling columns don't exist.
33
+ {%- if stats_target_tables | length == 0 -%}
34
+ {% call statement("main") %}
35
+ {{ log("No records found. Just execute select stmt for skipping call statement.", info=True) }}
36
+ select null
37
+ {% endcall %}
38
+ {%- set full_refresh_mode = (should_full_refresh()) -%}
39
+ {%- set should_revoke = should_revoke(target_relation, full_refresh_mode) %}
20
40
  {%- endif -%}
21
41
 
22
- -- build sql
23
- {%- for i in range(0, records|length, chunk) -%}
24
- {%- set build_sql %}
25
- {%- for record in records[i: i+chunk] -%}
42
+ -- create view for each table
43
+ {%- for stats_target_table in stats_target_tables -%}
44
+ -- build sql for column value aggregation.
45
+ {%- set sql_for_column_stats %}
46
+ {% set columns_json = fromjson(stats_target_table[3]) %}
47
+ {%- for col_name, is_calclable in columns_json.items() -%}
26
48
  {%- if not loop.first %}UNION{% endif %}
27
49
  SELECT
28
50
  DISTINCT
29
- '{{record[0]}}' as db_name
30
- , '{{record[1]}}' as schema_name
31
- , '{{record[2]}}' as table_name
32
- , '{{record[3]}}' as column_name
33
- , {% if record[5] == true %}CAST(max("{{record[3]}}") AS STRING){% else %}null{% endif %} AS max_value
34
- , {% if record[5] == true %}CAST(min("{{record[3]}}") AS STRING){% else %}null{% endif %} AS min_value
35
- , COUNT_IF("{{record[3]}}" IS NULL) AS null_count
36
- , APPROX_COUNT_DISTINCT("{{record[3]}}") AS cardinality
37
- , {% if record[5] == true %}avg("{{record[3]}}"){% else %}null{% endif %} AS avg_value
38
- , {% if record[5] == true %}median("{{record[3]}}"){% else %}null{% endif %} AS median_value
39
- , {% if record[5] == true %}approx_top_k("{{record[3]}}")[0][0]{% else %}null{% endif %} AS mode_value
40
- , {% if record[5] == true %}stddev("{{record[3]}}"){% else %}null{% endif %} AS stddev_value
41
- FROM {{ record[0] }}.{{ record[1] }}.{{ record[2] }} {{ var("sample_method") }}
51
+ '{{stats_target_table[0]}}' as db_name
52
+ , '{{stats_target_table[1]}}' as schema_name
53
+ , '{{stats_target_table[2]}}' as table_name
54
+ , '{{col_name}}' as column_name
55
+ , {% if is_calclable == True %}CAST(MAX("{{col_name}}") AS STRING){% else %}NULL{% endif %} AS max_value
56
+ , {% if is_calclable == True %}CAST(MIN("{{col_name}}") AS STRING){% else %}NULL{% endif %} AS min_value
57
+ , COUNT_IF("{{col_name}}" IS NULL) AS null_count
58
+ , APPROX_COUNT_DISTINCT("{{col_name}}") AS cardinality
59
+ , {% if is_calclable == True %}AVG("{{col_name}}"){% else %}NULL{% endif %} AS avg_value
60
+ , {% if is_calclable == True %}MEDIAN("{{col_name}}"){% else %}NULL{% endif %} AS median_value
61
+ , {% if is_calclable == True %}APPROX_TOP_K("{{col_name}}")[0][0]{% else %}NULL{% endif %} AS mode_value
62
+ , {% if is_calclable == True %}STDDEV("{{col_name}}"){% else %}NULL{% endif %} AS stddev_value
63
+ FROM "{{stats_target_table[0]}}"."{{stats_target_table[1]}}"."{{stats_target_table[2]}}" {{ var("sample_method") }}
42
64
  {% endfor -%}
43
65
  {%- endset %}
66
+
44
67
  -- create a view with a index as suffix
45
- {%- set target_identifier = "%s_%d"|format(model['name'], loop.index) %}
46
- {%- set target_relation = api.Relation.create(identifier=target_identifier, schema=schema, database=database, type='view') %}
68
+ {%- set stats_view_identifier = "%s_%s_%s_%s"|format(model['name'], stats_target_table[0], stats_target_table[1], stats_target_table[2]) %}
69
+ {%- set target_relation = api.Relation.create(identifier=stats_view_identifier, schema=schema, database=database, type='view') %}
47
70
  {% call statement("main") %}
48
- {{ get_create_view_as_sql(target_relation, build_sql) }}
71
+ {{ get_create_view_as_sql(target_relation, sql_for_column_stats) }}
49
72
  {% endcall %}
50
73
  {%- set full_refresh_mode = (should_full_refresh()) -%}
51
74
  {%- set should_revoke = should_revoke(target_relation, full_refresh_mode) %}
@@ -1,7 +1,6 @@
1
1
  {{
2
2
  config(
3
- materialized='divided_view',
4
- chunk=20
3
+ materialized='divided_view'
5
4
  )
6
5
  }}
7
6
  -- depends_on: {{ ref('quollio_stats_profiling_columns') }}
@@ -36,24 +36,61 @@ WITH columns AS (
36
36
  table_catalog
37
37
  , table_schema
38
38
  , name
39
+ ), m_view_sys_columns AS (
40
+ SELECT
41
+ cols.table_catalog
42
+ , cols.table_schema
43
+ , cols.table_name
44
+ , cols.column_name
45
+ , cols.data_type
46
+ FROM
47
+ {{ source('account_usage', 'COLUMNS') }} cols
48
+ LEFT OUTER JOIN
49
+ {{ source('account_usage', 'TABLES') }} tbls
50
+ ON
51
+ cols.table_catalog = tbls.table_catalog
52
+ AND cols.table_schema = tbls.table_schema
53
+ AND cols.table_name = tbls.table_name
54
+ WHERE
55
+ tbls.table_type = 'MATERIALIZED VIEW'
56
+ AND cols.column_name = 'SYS_MV_SOURCE_PARTITION'
57
+ ), implicit_columns_removed AS (
58
+ SELECT
59
+ c.table_catalog
60
+ , c.table_schema
61
+ , c.table_name
62
+ , c.column_name
63
+ , c.data_type
64
+ FROM
65
+ columns c
66
+ INNER JOIN
67
+ accessible_tables a
68
+ ON
69
+ c.table_catalog = a.table_catalog
70
+ AND c.table_schema = a.table_schema
71
+ AND c.table_name = a.name
72
+ MINUS
73
+ SELECT
74
+ table_catalog
75
+ , table_schema
76
+ , table_name
77
+ , column_name
78
+ , data_type
79
+ FROM
80
+ m_view_sys_columns
81
+ ), final AS (
82
+ SELECT
83
+ table_catalog
84
+ , table_schema
85
+ , table_name
86
+ , column_name
87
+ , data_type
88
+ , case when data_type in('NUMBER','DECIMAL', 'DEC', 'NUMERIC',
89
+ 'INT', 'INTEGER', 'BIGINT', 'SMALLINT',
90
+ 'TINYINT', 'BYTEINT')
91
+ THEN true
92
+ else false END AS is_calculable
93
+ FROM
94
+ implicit_columns_removed
39
95
  )
40
-
41
- SELECT
42
- c.table_catalog
43
- , c.table_schema
44
- , c.table_name
45
- , c.column_name
46
- , c.data_type
47
- , case when c.data_type in('NUMBER','DECIMAL', 'DEC', 'NUMERIC',
48
- 'INT', 'INTEGER', 'BIGINT', 'SMALLINT',
49
- 'TINYINT', 'BYTEINT')
50
- THEN true
51
- else false END AS is_calculable
52
- FROM
53
- columns c
54
- INNER JOIN
55
- accessible_tables a
56
- ON
57
- c.table_catalog = a.table_catalog
58
- AND c.table_schema = a.table_schema
59
- AND c.table_name = a.name
96
+ select * from final
@@ -31,3 +31,7 @@ def setup_dbt_profile(connections_json: Dict[str, str], template_path: str, temp
31
31
  with open(profile_path, "w") as profiles:
32
32
  yaml.dump(yaml.safe_load(profiles_body), profiles, default_flow_style=False, allow_unicode=True)
33
33
  return
34
+
35
+
36
+ def trim_prefix(s: str, prefix: str) -> str:
37
+ return s.lstrip(prefix)
@@ -6,6 +6,8 @@ Currently requires explicit naming of env vars to check for
6
6
 
7
7
  import argparse
8
8
  import os
9
+ from distutils.util import strtobool
10
+ from typing import Union
9
11
 
10
12
 
11
13
  # Courtesy of http://stackoverflow.com/a/10551190 with env-var retrieval fixed
@@ -16,7 +18,10 @@ class EnvDefault(argparse.Action):
16
18
  def __init__(self, envvar, required=True, default=None, **kwargs):
17
19
  # override values if envvar exists
18
20
  if envvar in os.environ:
19
- default = os.environ[envvar]
21
+ if kwargs.get("nargs", None) is None:
22
+ default = os.environ[envvar]
23
+ else:
24
+ default = os.environ[envvar].split(" ")
20
25
  if required and default:
21
26
  required = False
22
27
  super(EnvDefault, self).__init__(default=default, required=required, **kwargs)
@@ -25,9 +30,30 @@ class EnvDefault(argparse.Action):
25
30
  setattr(namespace, self.dest, values)
26
31
 
27
32
 
33
+ class EnvStoreTrue(argparse._StoreTrueAction):
34
+ """An argparse action class that auto-sets missing default values from env vars for store_true."""
35
+
36
+ def __init__(self, envvar, required=True, default=None, **kwargs):
37
+ # Only pass the arguments that argparse._StoreTrueAction expects
38
+ action_kwargs = {key: value for key, value in kwargs.items() if key in ("option_strings", "dest")}
39
+ if envvar in os.environ:
40
+ default = _convert_value_to_bool(os.environ[envvar])
41
+ if required and default:
42
+ required = False
43
+ super(EnvStoreTrue, self).__init__(default=default, required=required, **action_kwargs)
44
+
45
+
28
46
  # functional sugar for the above
29
- def env_default(envvar):
47
+ def env_default(envvar, store_true=False):
30
48
  def wrapper(**kwargs):
49
+ if store_true:
50
+ return EnvStoreTrue(envvar, **kwargs)
31
51
  return EnvDefault(envvar, **kwargs)
32
52
 
33
53
  return wrapper
54
+
55
+
56
+ def _convert_value_to_bool(v: Union[str, bool]) -> bool:
57
+ if isinstance(v, str):
58
+ return bool(strtobool(v))
59
+ return v
@@ -0,0 +1,17 @@
1
+ import logging
2
+
3
+
4
+ def set_log_level(level: str = "info") -> None:
5
+ fmt = "%(asctime)s - %(levelname)s - %(name)s - %(message)s"
6
+ if level == "info":
7
+ logging.basicConfig(level=logging.INFO, format=fmt)
8
+ elif level == "debug":
9
+ logging.basicConfig(level=logging.DEBUG, format=fmt)
10
+ elif level == "warn":
11
+ logging.basicConfig(level=logging.WARNING, format=fmt)
12
+ elif level == "error":
13
+ logging.basicConfig(level=logging.ERROR, format=fmt)
14
+ elif level == "critical":
15
+ logging.basicConfig(level=logging.CRITICAL, format=fmt)
16
+ else:
17
+ logging.basicConfig(level=logging.NOTSET, format=fmt)