quollio-core 0.4.3__py3-none-any.whl → 0.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. quollio_core/__init__.py +1 -1
  2. quollio_core/bricks.py +237 -0
  3. quollio_core/dbt_projects/databricks/.gitignore +4 -0
  4. quollio_core/dbt_projects/databricks/README.md +5 -0
  5. quollio_core/dbt_projects/databricks/analyses/.gitkeep +0 -0
  6. quollio_core/dbt_projects/databricks/dbt_project.yml +21 -0
  7. quollio_core/dbt_projects/databricks/macros/.gitkeep +0 -0
  8. quollio_core/dbt_projects/databricks/models/quollio_lineage_column_level.sql +73 -0
  9. quollio_core/dbt_projects/databricks/models/quollio_lineage_column_level.yml +14 -0
  10. quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.sql +63 -0
  11. quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.yml +11 -0
  12. quollio_core/dbt_projects/databricks/models/sources.yml +84 -0
  13. quollio_core/dbt_projects/databricks/package-lock.yml +14 -0
  14. quollio_core/dbt_projects/databricks/packages.yml +13 -0
  15. quollio_core/dbt_projects/databricks/profiles/profiles_template.yml +14 -0
  16. quollio_core/dbt_projects/databricks/seeds/.gitkeep +0 -0
  17. quollio_core/dbt_projects/databricks/snapshots/.gitkeep +0 -0
  18. quollio_core/dbt_projects/redshift/macros/materialization/divided_view.sql +54 -22
  19. quollio_core/dbt_projects/redshift/models/quollio_stats_columns.sql +1 -1
  20. quollio_core/dbt_projects/redshift/package-lock.yml +1 -1
  21. quollio_core/dbt_projects/seeds/.gitkeep +0 -0
  22. quollio_core/dbt_projects/snowflake/macros/materialization/divided_view.sql +4 -0
  23. quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.sql +1 -1
  24. quollio_core/helper/env_default.py +4 -1
  25. quollio_core/profilers/databricks.py +196 -0
  26. quollio_core/profilers/lineage.py +12 -0
  27. quollio_core/profilers/stats.py +0 -1
  28. quollio_core/redshift.py +4 -5
  29. quollio_core/repository/databricks.py +62 -0
  30. quollio_core/snowflake.py +4 -5
  31. {quollio_core-0.4.3.dist-info → quollio_core-0.4.5.dist-info}/METADATA +5 -1
  32. {quollio_core-0.4.3.dist-info → quollio_core-0.4.5.dist-info}/RECORD +34 -15
  33. {quollio_core-0.4.3.dist-info → quollio_core-0.4.5.dist-info}/LICENSE +0 -0
  34. {quollio_core-0.4.3.dist-info → quollio_core-0.4.5.dist-info}/WHEEL +0 -0
quollio_core/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  """Quollio Core"""
2
2
 
3
- __version__ = "0.4.3"
3
+ __version__ = "0.4.5"
4
4
  __author__ = "Quollio Technologies, Inc"
quollio_core/bricks.py ADDED
@@ -0,0 +1,237 @@
1
+ import argparse
2
+ import logging
3
+ import os
4
+
5
+ from quollio_core.helper.core import setup_dbt_profile
6
+ from quollio_core.helper.env_default import env_default
7
+ from quollio_core.profilers.databricks import (
8
+ databricks_column_level_lineage,
9
+ databricks_column_stats,
10
+ databricks_table_level_lineage,
11
+ )
12
+ from quollio_core.repository import databricks as db
13
+ from quollio_core.repository import dbt, qdc
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ def build_view(
19
+ conn: db.DatabricksConnectionConfig,
20
+ target_tables: str,
21
+ log_level: str = "info",
22
+ ) -> None:
23
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
24
+
25
+ logger.info("Build profiler views using dbt")
26
+ # set parameters
27
+ dbt_client = dbt.DBTClient()
28
+ current_dir = os.path.dirname(os.path.abspath(__file__))
29
+ project_path = f"{current_dir}/dbt_projects/databricks"
30
+ template_path = f"{current_dir}/dbt_projects/databricks/profiles"
31
+ template_name = "profiles_template.yml"
32
+
33
+ # build views using dbt
34
+ setup_dbt_profile(connections_json=conn.as_dict(), template_path=template_path, template_name=template_name)
35
+ # FIXME: when executing some of the commands, directory changes due to the library bug.
36
+ # https://github.com/dbt-labs/dbt-core/issues/8997
37
+ dbt_client.invoke(
38
+ cmd="deps",
39
+ project_dir=project_path,
40
+ profile_dir=template_path,
41
+ options=["--no-use-colors", "--log-level", log_level],
42
+ )
43
+
44
+ run_options = ["--no-use-colors", "--log-level", log_level, "--select", target_tables]
45
+ dbt_client.invoke(
46
+ cmd="run",
47
+ project_dir=project_path,
48
+ profile_dir=template_path,
49
+ options=run_options,
50
+ )
51
+ return
52
+
53
+
54
+ def load_lineage(
55
+ conn: db.DatabricksConnectionConfig,
56
+ qdc_client: qdc.QDCExternalAPIClient,
57
+ tenant_id: str,
58
+ ) -> None:
59
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
60
+
61
+ logger.info("Generate Databricks table to table lineage.")
62
+ databricks_table_level_lineage(
63
+ conn=conn, qdc_client=qdc_client, tenant_id=tenant_id, dbt_table_name="quollio_lineage_table_level"
64
+ )
65
+
66
+ logger.info("Generate Databricks column to column lineage.")
67
+ databricks_column_level_lineage(
68
+ conn=conn, qdc_client=qdc_client, tenant_id=tenant_id, dbt_table_name="quollio_lineage_column_level"
69
+ )
70
+
71
+ logger.info("Lineage data is successfully loaded.")
72
+ return
73
+
74
+
75
+ def load_column_stats(
76
+ conn: db.DatabricksConnectionConfig,
77
+ qdc_client: qdc.QDCExternalAPIClient,
78
+ tenant_id: str,
79
+ ) -> None:
80
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
81
+
82
+ logger.info("Generate Databricks column stats.")
83
+ databricks_column_stats(
84
+ conn=conn,
85
+ qdc_client=qdc_client,
86
+ tenant_id=tenant_id,
87
+ )
88
+
89
+ logger.info("Column stats are successfully loaded.")
90
+ return
91
+
92
+
93
+ if __name__ == "__main__":
94
+ parser = argparse.ArgumentParser(
95
+ prog="Quollio Intelligence Agent for Databricks",
96
+ description="Build views and load lineage and stats to Quollio from Databricks using dbt.",
97
+ epilog="Copyright (c) 2024 Quollio Technologies, Inc.",
98
+ )
99
+ parser.add_argument(
100
+ "commands",
101
+ choices=["build_view", "load_lineage", "load_stats"],
102
+ type=str,
103
+ nargs="+",
104
+ help="""
105
+ The command to execute.
106
+ 'build_view': Build views using dbt,
107
+ 'load_lineage': Load lineage data from created views to Quollio,
108
+ 'load_stats': Load stats from created views to Quollio,
109
+ 'load_sqllineage': Load lineage data from sql parse result(alpha),
110
+ """,
111
+ )
112
+ parser.add_argument(
113
+ "--host", type=str, action=env_default("DATABRICKS_HOST"), required=False, help="Host for Databricks workspace"
114
+ )
115
+ parser.add_argument(
116
+ "--http_path",
117
+ type=str,
118
+ action=env_default("DATABRICKS_HTTP_PATH"),
119
+ required=False,
120
+ help="HTTP path for a Databricks compute resource (i.e warehouse)",
121
+ )
122
+ parser.add_argument(
123
+ "--port",
124
+ type=int,
125
+ action=env_default("DATABRICKS_PORT"),
126
+ required=False,
127
+ help="Port for Databricks compute resource",
128
+ )
129
+ parser.add_argument(
130
+ "--databricks_client_secret",
131
+ type=str,
132
+ action=env_default("DATABRICKS_CLIENT_SECRET"),
133
+ required=False,
134
+ help="Secret for the service principal",
135
+ )
136
+ parser.add_argument(
137
+ "--databricks_client_id",
138
+ type=str,
139
+ action=env_default("DATABRICKS_CLIENT_ID"),
140
+ required=False,
141
+ help="Client id for the service principal",
142
+ )
143
+ parser.add_argument(
144
+ "--catalog",
145
+ type=str,
146
+ required=False,
147
+ action=env_default("DATABRICKS_TARGET_CATALOG"),
148
+ help="Target database name where the views are built by dbt",
149
+ )
150
+ parser.add_argument(
151
+ "--schema",
152
+ type=str,
153
+ action=env_default("DATABRICKS_TARGET_SCHEMA"),
154
+ required=False,
155
+ help="Target schema name where the views are built by dbt",
156
+ )
157
+ parser.add_argument(
158
+ "--log_level",
159
+ type=str,
160
+ choices=["debug", "info", "warn", "error", "none"],
161
+ action=env_default("LOG_LEVEL"),
162
+ required=False,
163
+ help="The log level for dbt commands. Default value is info",
164
+ )
165
+ parser.add_argument(
166
+ "--api_url",
167
+ type=str,
168
+ action=env_default("QDC_API_URL"),
169
+ required=False,
170
+ help="The base URL of Quollio External API",
171
+ )
172
+ parser.add_argument(
173
+ "--client_id",
174
+ type=str,
175
+ action=env_default("QDC_CLIENT_ID"),
176
+ required=False,
177
+ help="The client id that is created on Quollio console to let clients access Quollio External API",
178
+ )
179
+ parser.add_argument(
180
+ "--client_secret",
181
+ type=str,
182
+ action=env_default("QDC_CLIENT_SECRET"),
183
+ required=False,
184
+ help="The client secrete that is created on Quollio console to let clients access Quollio External API",
185
+ )
186
+ parser.add_argument(
187
+ "--tenant_id",
188
+ type=str,
189
+ action=env_default("TENANT_ID"),
190
+ required=False,
191
+ help="The tenant id (company id) where the lineage and stats are loaded",
192
+ )
193
+ parser.add_argument(
194
+ "--target_tables",
195
+ type=str,
196
+ nargs="*",
197
+ choices=["quollio_lineage_table_level", "quollio_lineage_view_level"],
198
+ action=env_default("DATABRICKS_TARGET_TABLES"),
199
+ required=False,
200
+ help="Target tables you want to create with dbt module. \
201
+ You need to specify this parameter if you want to specify tables, not all ones. \
202
+ Please specify table name with blank delimiter like tableA tableB \
203
+ if you want to create two or more tables",
204
+ )
205
+
206
+ args = parser.parse_args()
207
+
208
+ conn = db.DatabricksConnectionConfig(
209
+ host=args.host,
210
+ http_path=args.http_path,
211
+ client_id=args.databricks_client_id,
212
+ client_secret=args.databricks_client_secret,
213
+ catalog=args.catalog,
214
+ schema=args.schema,
215
+ )
216
+
217
+ if len(args.commands) == 0:
218
+ raise ValueError("No command is provided")
219
+
220
+ if "build_view" in args.commands:
221
+ build_view(
222
+ conn=conn,
223
+ target_tables=args.target_tables,
224
+ log_level=args.log_level,
225
+ )
226
+
227
+ if "load_lineage" in args.commands:
228
+ qdc_client = qdc.QDCExternalAPIClient(
229
+ base_url=args.api_url, client_id=args.client_id, client_secret=args.client_secret
230
+ )
231
+ load_lineage(conn=conn, qdc_client=qdc_client, tenant_id=args.tenant_id)
232
+
233
+ if "load_stats" in args.commands:
234
+ qdc_client = qdc.QDCExternalAPIClient(
235
+ base_url=args.api_url, client_id=args.client_id, client_secret=args.client_secret
236
+ )
237
+ databricks_column_stats(conn=conn, qdc_client=qdc_client, tenant_id=args.tenant_id)
@@ -0,0 +1,4 @@
1
+
2
+ target/
3
+ dbt_packages/
4
+ logs/
@@ -0,0 +1,5 @@
1
+ ### Quollio Intelligence Agent Support For Databricks
2
+ Notable Files:
3
+ 1. [quollio_lineage_table_level.sql](models/quollio_lineage_table_level.sql) - Generates table lineage data from Databricks system tables.
4
+ 2. [quollio_lineage_column_level.sql](models/quollio_lineage_table_level.sql) - Generates column lineage data from Databricks system tables.
5
+ 3. [sources.yml](models/sources.yml) - Refrences sources in the Databricks system catalog.
File without changes
@@ -0,0 +1,21 @@
1
+ name: 'quollio_intelligence_databricks'
2
+ version: '1.0.0'
3
+ config-version: 2
4
+
5
+ profile: 'quollio_intelligence_databricks'
6
+
7
+ model-paths: ["models"]
8
+ analysis-paths: ["analyses"]
9
+ test-paths: ["tests"]
10
+ seed-paths: ["seeds"]
11
+ macro-paths: ["macros"]
12
+ snapshot-paths: ["snapshots"]
13
+
14
+ clean-targets:
15
+ - "target"
16
+ - "dbt_packages"
17
+
18
+ models:
19
+ +dbt-osmosis: "{model}.yml"
20
+ # Databricks automatically enables grants on SQL endpoints
21
+ # https://docs.getdbt.com/reference/resource-configs/grants
File without changes
@@ -0,0 +1,73 @@
1
+ -- Gets full table lineage from Databricks
2
+ WITH columns_lineage_history AS (
3
+ SELECT
4
+ -- The databricks columns table does not have a full table name, create with CONCAT()
5
+ source_table_full_name AS upstream_table,
6
+ target_table_full_name as downstream_table,
7
+ source_column_name as upstream_column,
8
+ target_column_name as downstream_column,
9
+ event_time,
10
+ RANK() OVER (
11
+ PARTITION BY target_table_full_name
12
+ ORDER BY
13
+ event_time DESC
14
+ ) AS rank
15
+ FROM
16
+ {{ source('access','column_lineage') }}
17
+ WHERE
18
+ source_table_full_name IS NOT NULL
19
+ AND target_table_full_name IS NOT NULL
20
+ AND source_table_full_name NOT LIKE "%quollio%"
21
+ AND target_table_full_name NOT LIKE "%quollio%"
22
+ ),
23
+ -- Gets list of existing columns in catalogs
24
+ existing_columns (
25
+ SELECT
26
+ CONCAT(table_catalog, '.', table_schema, '.', table_name) AS table_full_name,
27
+ column_name
28
+ FROM
29
+ {{ source('inf_sch','columns') }}
30
+ ),
31
+
32
+ -- Checks if the downstream tables exists and group operations.
33
+ downstream_column_exists (
34
+ SELECT
35
+ upstream_table AS UPSTREAM_TABLE_NAME,
36
+ upstream_column AS UPSTREAM_COLUMN_NAME,
37
+ downstream_table AS DOWNSTREAM_TABLE_NAME,
38
+ downstream_column AS DOWNSTREAM_COLUMN_NAME,
39
+ event_time
40
+ FROM
41
+ columns_lineage_history clh
42
+ INNER JOIN existing_columns ec ON clh.downstream_table = ec.table_full_name
43
+ AND clh.downstream_column = ec.column_name
44
+ WHERE
45
+ rank = 1
46
+ GROUP BY UPSTREAM_TABLE, UPSTREAM_COLUMN, DOWNSTREAM_TABLE, DOWNSTREAM_COLUMN, EVENT_TIME
47
+ ),
48
+
49
+ -- Aggregates the column lineage
50
+ aggregated_column_lineage AS (
51
+ SELECT
52
+ downstream_table_name,
53
+ downstream_column_name,
54
+ collect_set(
55
+ named_struct(
56
+ 'upstream_table_name', upstream_table_name,
57
+ 'upstream_column_name', upstream_column_name
58
+ )
59
+ ) AS upstream_columns
60
+ FROM
61
+ downstream_column_exists
62
+ GROUP BY
63
+ downstream_table_name,
64
+ downstream_column_name
65
+ )
66
+
67
+ SELECT
68
+ downstream_table_name AS DOWNSTREAM_TABLE_NAME,
69
+ downstream_column_name AS DOWNSTREAM_COLUMN_NAME,
70
+ to_json(upstream_columns) AS UPSTREAM_COLUMNS
71
+ FROM
72
+ aggregated_column_lineage
73
+
@@ -0,0 +1,14 @@
1
+ version: 2
2
+
3
+ model:
4
+ - name: quollio_lineage_column_level
5
+ columns:
6
+ - name: UPSTREAM_COLUMNS
7
+ description: 'String column with all upstream columns in JSON format'
8
+ type: string
9
+ - name: DOWNSTREAM_TABLE_NAME
10
+ description: 'Full downstream table name in <catalog>.<schema>.<table> format'
11
+ type: string
12
+ - name: DOWNSTREAM_COLUMN_NAME
13
+ description: 'Downstream column name'
14
+ type: string
@@ -0,0 +1,63 @@
1
+ -- Gets full table lineage from Databricks
2
+ WITH table_lineage_history AS (
3
+ SELECT
4
+ source_table_full_name as upstream_table,
5
+ target_table_full_name as downstream_table,
6
+ target_type,
7
+ event_time,
8
+ RANK() OVER (
9
+ PARTITION BY target_table_full_name
10
+ ORDER BY
11
+ event_time DESC
12
+ ) AS rank
13
+ FROM
14
+ {{ source('access','table_lineage') }}
15
+ WHERE
16
+ source_table_full_name IS NOT NULL
17
+ AND target_table_full_name IS NOT NULL
18
+ AND source_table_full_name NOT LIKE "%quollio%"
19
+ AND target_table_full_name NOT LIKE "%quollio%"
20
+ ),
21
+ -- Gets list of existing tables in catalogs
22
+ existing_tables (
23
+ SELECT
24
+ CONCAT(table_catalog, '.', table_schema, '.', table_name) AS table_full_name
25
+ FROM
26
+ {{ source('inf_sch','tables') }}
27
+ ),
28
+
29
+ -- Checks if the downstream tables exists and group operations.
30
+ downstream_table_exists (
31
+ SELECT
32
+ upstream_table,
33
+ downstream_table,
34
+ target_type,
35
+ event_time
36
+ FROM
37
+ table_lineage_history tlh
38
+ INNER JOIN existing_tables et ON tlh.downstream_table = et.table_full_name
39
+ WHERE
40
+ rank = 1
41
+ GROUP BY upstream_table, downstream_table, target_type, event_time
42
+ ),
43
+
44
+ aggregated_table_lineage AS (
45
+ SELECT
46
+ downstream_table,
47
+ collect_set(
48
+ named_struct(
49
+ 'upstream_object_name', upstream_table
50
+ )
51
+ ) AS upstream_tables
52
+ FROM
53
+ downstream_table_exists
54
+ GROUP BY
55
+ downstream_table
56
+ )
57
+ SELECT
58
+ downstream_table as DOWNSTREAM_TABLE_NAME,
59
+ to_json(upstream_tables) as UPSTREAM_TABLES
60
+
61
+ FROM
62
+ aggregated_table_lineage
63
+
@@ -0,0 +1,11 @@
1
+ version: 2
2
+
3
+ model:
4
+ - name: quollio_lineage_column_level
5
+ columns:
6
+ - name: UPSTREAM_TABLES
7
+ description: 'String column with all upstream tables in JSON format'
8
+ type: string
9
+ - name: DOWNSTREAM_TABLE_NAME
10
+ description: 'Full downstream table name in <catalog>.<schema>.<table> format'
11
+ type: string
@@ -0,0 +1,84 @@
1
+ version: 2
2
+
3
+ sources:
4
+ - name: access
5
+ database: system
6
+ schema: access
7
+ tables:
8
+ - name: table_lineage
9
+ description: Describes table level lineage
10
+ columns:
11
+ - name: source_table_full_name
12
+ description: ''
13
+ type: string
14
+ - name: target_table_full_name
15
+ description: ''
16
+ type: string
17
+ - name: target_type
18
+ description: ''
19
+ type: string
20
+ - name: event_time
21
+ description: ''
22
+ type: timestamp
23
+
24
+ - name: column_lineage
25
+ description: Describes column level lineage
26
+ columns:
27
+ - name: source_table_full_name
28
+ description: ''
29
+ type: string
30
+ - name: target_table_full_name
31
+ description: ''
32
+ type: string
33
+ - name: event_time
34
+ description: ''
35
+ type: timestamp
36
+ - name: source_column_name
37
+ description: ''
38
+ type: string
39
+ - name: target_column_name
40
+ description: ''
41
+ type: string
42
+
43
+ - name: inf_sch
44
+ database: system
45
+ schema: information_schema
46
+ tables:
47
+ - name: tables
48
+ description: Lists existing tables (i.e., not deleted).
49
+ columns:
50
+ - name: table_catalog
51
+ description: ''
52
+ type: string
53
+ - name: table_schema
54
+ description: ''
55
+ type: string
56
+ - name: table_name
57
+ description: ''
58
+ type: string
59
+
60
+ - name: views
61
+ description: Lists existing views (i.e., not deleted). Views are treated as tables.
62
+ columns:
63
+ - name: table_catalog
64
+ description: ''
65
+ type: string
66
+ - name: table_schema
67
+ description: ''
68
+ type: string
69
+ - name: table_name
70
+ description: ''
71
+ type: string
72
+
73
+ - name: columns
74
+ description: ''
75
+ columns:
76
+ - name: table_catalog
77
+ description: ''
78
+ type: string
79
+ - name: table_schema
80
+ description: ''
81
+ type: string
82
+ - name: table_name
83
+ description: ''
84
+ type: string
@@ -0,0 +1,14 @@
1
+ packages:
2
+ - package: dbt-labs/dbt_utils
3
+ version: 1.1.1
4
+ - package: dbt-labs/spark_utils
5
+ version: 0.3.0
6
+ - package: dbt-labs/codegen
7
+ version: 0.12.1
8
+ - package: dbt-labs/dbt_external_tables
9
+ version: 0.8.7
10
+ - package: dbt-labs/dbt_project_evaluator
11
+ version: 0.8.1
12
+ - package: brooklyn-data/dbt_artifacts
13
+ version: 2.6.2
14
+ sha1_hash: cbb324267dbf6c6fb7de11b162e4fbafd1e32a9c
@@ -0,0 +1,13 @@
1
+ packages:
2
+ - package: dbt-labs/dbt_utils
3
+ version: [">=0.0.0", "<2.0.0"]
4
+ - package: dbt-labs/spark_utils
5
+ version: [">=0.0.0", "<1.0.0"]
6
+ - package: dbt-labs/codegen
7
+ version: [">=0.0.0", "<1.0.0"]
8
+ - package: dbt-labs/dbt_external_tables
9
+ version: [">=0.0.0", "<1.0.0"]
10
+ - package: dbt-labs/dbt_project_evaluator
11
+ version: [">=0.0.0", "<1.0.0"]
12
+ - package: brooklyn-data/dbt_artifacts
13
+ version: [">=2.0.0", "<3.0.0"]
@@ -0,0 +1,14 @@
1
+ quollio_intelligence_databricks:
2
+ target: project
3
+ outputs:
4
+ project:
5
+ type: databricks
6
+ host: {{ host }}
7
+ http_path: {{ http_path }}
8
+ catalog: {{ catalog }}
9
+ schema: {{ schema }}
10
+ auth_type: oauth
11
+ client_id: {{ client_id }}
12
+ client_secret: {{ client_secret }}
13
+ databricks_port: {{ databricks_port }}
14
+
File without changes
@@ -2,6 +2,7 @@
2
2
  {%- set identifier = model['alias'] %}
3
3
  {%- set target_relations = [] %}
4
4
  {%- set chunk = config.get('chunk') %}
5
+ {%- set grant_config = config.get('grants') %}
5
6
 
6
7
  {{ run_hooks(pre_hooks, inside_transaction=False) }}
7
8
  -- `BEGIN` happens here:
@@ -22,29 +23,57 @@ SELECT * FROM {{ ref('quollio_stats_profiling_columns') }} WHERE table_name no
22
23
  {%- for i in range(0, records|length, chunk) -%}
23
24
  {%- set build_sql %}
24
25
  {%- for record in records[i: i+chunk] -%}
25
- {%- if not loop.first %}UNION{% endif %}
26
+ {%- if not loop.first -%}UNION{% endif %}
26
27
  SELECT
27
- DISTINCT
28
- '{{record[0]}}'::varchar as db_name
29
- , '{{record[1]}}'::varchar as schema_name
30
- , '{{record[2]}}'::varchar as table_name
31
- , '{{record[3]}}'::varchar as column_name
32
- , {% if var("skip_heavy") == false and record[5] == true %}cast(max("{{record[3]}}") as varchar){% else %}null::varchar{% endif %} AS max_value
33
- , {% if var("skip_heavy") == false and record[5] == true %}cast(min("{{record[3]}}") as varchar){% else %}null::varchar{% endif %} AS min_value
34
- -- requires full table scan
35
- , {% if var("skip_heavy") == false %}cast(SUM(NVL2("{{record[3]}}", 0, 1)) as integer){% else %}null::integer{% endif %} AS null_count
36
- , APPROXIMATE COUNT(DISTINCT "{{record[3]}}") AS cardinality
37
- -- requires full table scan
38
- , {% if var("skip_heavy") == false and record[5] == true %}cast(avg("{{record[3]}}")as varchar){% else %}null::varchar{% endif %} AS avg_value
39
- , {% if var("skip_heavy") == false and record[5] == true %}(SELECT cast(median("{{record[3]}}") as varchar) FROM {{record[2]}}){% else %}null::varchar{% endif %} AS median_value
40
- -- requires full table scan
41
- , {% if var("skip_heavy") == false and record[4] == false %}
42
- (SELECT cast("{{record[3]}}" as varchar) FROM (
43
- SELECT "{{record[3]}}", ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC) AS row_num FROM {{record[2]}} GROUP BY "{{record[3]}}"
44
- ) WHERE row_num = 1)
45
- {% else %}null::varchar{% endif %} AS mode_value
46
- , {% if record[5] == true %}cast(STDDEV_SAMP("{{record[3]}}") as integer){% else %}null::integer{% endif %} AS stddev_value
47
- FROM {{ record[0] }}.{{ record[1] }}.{{ record[2] }}
28
+ main.db_name
29
+ , main.schema_name
30
+ , main.table_name
31
+ , main.column_name
32
+ , main.max_value
33
+ , main.min_value
34
+ , main.null_count
35
+ , main.cardinality
36
+ , main.avg_value
37
+ , main.median_value
38
+ , mode.mode_value
39
+ , main.stddev_value
40
+ FROM
41
+ (
42
+ SELECT
43
+ DISTINCT
44
+ '{{record[0]}}'::varchar as db_name
45
+ , '{{record[1]}}'::varchar as schema_name
46
+ , '{{record[2]}}'::varchar as table_name
47
+ , '{{record[3]}}'::varchar as column_name
48
+ , {% if var("skip_heavy") == false and record[5] == true %}cast(max("{{record[3]}}") as varchar){% else %}null::varchar{% endif %} AS max_value
49
+ , {% if var("skip_heavy") == false and record[5] == true %}cast(min("{{record[3]}}") as varchar){% else %}null::varchar{% endif %} AS min_value
50
+ -- requires full table scan
51
+ , {% if var("skip_heavy") == false %}cast(SUM(NVL2("{{record[3]}}", 0, 1)) as integer){% else %}null::integer{% endif %} AS null_count
52
+ , APPROXIMATE COUNT(DISTINCT "{{record[3]}}") AS cardinality
53
+ -- requires full table scan
54
+ , {% if var("skip_heavy") == false and record[5] == true %}cast(avg("{{record[3]}}")as varchar){% else %}null::varchar{% endif %} AS avg_value
55
+ , {% if var("skip_heavy") == false and record[5] == true %}cast(median("{{record[3]}}") as varchar){% else %}null::varchar{% endif %} AS median_value
56
+ -- requires full table scan
57
+ , {% if record[5] == true %}cast(STDDEV_SAMP("{{record[3]}}") as integer){% else %}null::integer{% endif %} AS stddev_value
58
+ FROM {{ record[0] }}.{{ record[1] }}.{{ record[2] }}
59
+ ) main, (
60
+ {%- if var("skip_heavy") == false and record[4] == false %}
61
+ SELECT
62
+ cast("{{record[3]}}" as varchar) mode_value
63
+ FROM (
64
+ SELECT
65
+ DISTINCT
66
+ "{{record[3]}}"
67
+ , ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC) AS row_num
68
+ FROM {{ record[0] }}.{{ record[1] }}.{{ record[2] }}
69
+ GROUP BY
70
+ "{{record[3]}}"
71
+ )
72
+ WHERE
73
+ row_num = 1
74
+ {% else %}
75
+ SELECT null as mode_value {%- endif -%}
76
+ ) mode
48
77
  {% endfor -%}
49
78
  {%- endset %}
50
79
  -- create a view with a index as suffix
@@ -54,6 +83,9 @@ SELECT * FROM {{ ref('quollio_stats_profiling_columns') }} WHERE table_name no
54
83
  {% call statement("main") %}
55
84
  {{ get_replace_view_sql(target_relation, build_sql) }}
56
85
  {% endcall %}
86
+ {%- set full_refresh_mode = (should_full_refresh()) -%}
87
+ {%- set should_revoke = should_revoke(target_relation, full_refresh_mode) %}
88
+ {%- do apply_grants(target_relation, grant_config, should_revoke) %}
57
89
  {%- set target_relations = target_relations.append(target_relation) %}
58
90
  {%- endfor -%}
59
91
 
@@ -1,7 +1,7 @@
1
1
  {{
2
2
  config(
3
3
  materialized='divided_view',
4
- chunk=1000
4
+ chunk=20
5
5
  )
6
6
  }}
7
7
  -- depends_on: {{ ref('quollio_stats_profiling_columns') }}
@@ -1,4 +1,4 @@
1
1
  packages:
2
2
  - package: dbt-labs/dbt_utils
3
3
  version: 1.1.1
4
- sha1_hash: 23451c207c1d4dd71b5925f12a5cd66f2ebb2b3b
4
+ sha1_hash: a158c48c59c2bb7d729d2a4e215aabe5bb4f3353
File without changes
@@ -2,6 +2,7 @@
2
2
  {%- set identifier = model['alias'] %}
3
3
  {%- set target_relations = [] %}
4
4
  {%- set chunk = config.get('chunk') %}
5
+ {%- set grant_config = config.get('grants') %}
5
6
 
6
7
  {{ run_hooks(pre_hooks, inside_transaction=False) }}
7
8
  -- `BEGIN` happens here:
@@ -46,6 +47,9 @@ SELECT * FROM {{ ref('quollio_stats_profiling_columns') }} WHERE NOT startswit
46
47
  {% call statement("main") %}
47
48
  {{ get_create_view_as_sql(target_relation, build_sql) }}
48
49
  {% endcall %}
50
+ {%- set full_refresh_mode = (should_full_refresh()) -%}
51
+ {%- set should_revoke = should_revoke(target_relation, full_refresh_mode) %}
52
+ {%- do apply_grants(target_relation, grant_config, should_revoke) %}
49
53
  {%- set target_relations = target_relations.append(target_relation) %}
50
54
  {%- endfor -%}
51
55
 
@@ -1,7 +1,7 @@
1
1
  {{
2
2
  config(
3
3
  materialized='divided_view',
4
- chunk=1000
4
+ chunk=20
5
5
  )
6
6
  }}
7
7
  -- depends_on: {{ ref('quollio_stats_profiling_columns') }}
@@ -16,7 +16,10 @@ class EnvDefault(argparse.Action):
16
16
  def __init__(self, envvar, required=True, default=None, **kwargs):
17
17
  # override values if envvar exists
18
18
  if envvar in os.environ:
19
- default = os.environ[envvar]
19
+ if kwargs.get("nargs", None) is None:
20
+ default = os.environ[envvar]
21
+ else:
22
+ default = os.environ[envvar].split(" ")
20
23
  if required and default:
21
24
  required = False
22
25
  super(EnvDefault, self).__init__(default=default, required=required, **kwargs)
@@ -0,0 +1,196 @@
1
+ import logging
2
+ from typing import Dict, List
3
+
4
+ from quollio_core.profilers.lineage import (
5
+ gen_column_lineage_payload,
6
+ gen_table_lineage_payload,
7
+ parse_databricks_table_lineage,
8
+ )
9
+ from quollio_core.profilers.stats import gen_table_stats_payload
10
+ from quollio_core.repository import databricks, qdc
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def databricks_table_level_lineage(
16
+ conn: databricks.DatabricksConnectionConfig,
17
+ qdc_client: qdc.QDCExternalAPIClient,
18
+ tenant_id: str,
19
+ dbt_table_name: str = "quollio_lineage_table_level",
20
+ ) -> None:
21
+ logging.basicConfig(level=logging.info, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
22
+ with databricks.DatabricksQueryExecutor(config=conn) as databricks_executor:
23
+ results = databricks_executor.get_query_results(
24
+ query=f"""
25
+ SELECT
26
+ DOWNSTREAM_TABLE_NAME,
27
+ UPSTREAM_TABLES
28
+ FROM {conn.catalog}.{conn.schema}.{dbt_table_name}
29
+ """
30
+ )
31
+ tables = parse_databricks_table_lineage(results)
32
+ update_table_lineage_inputs = gen_table_lineage_payload(
33
+ tenant_id=tenant_id,
34
+ endpoint=conn.host,
35
+ tables=tables,
36
+ )
37
+
38
+ req_count = 0
39
+ for update_table_lineage_input in update_table_lineage_inputs:
40
+ logger.info(
41
+ "Generating table lineage. downstream: %s -> %s-> %s",
42
+ update_table_lineage_input.downstream_database_name,
43
+ update_table_lineage_input.downstream_schema_name,
44
+ update_table_lineage_input.downstream_table_name,
45
+ )
46
+ status_code = qdc_client.update_lineage_by_id(
47
+ global_id=update_table_lineage_input.downstream_global_id,
48
+ payload=update_table_lineage_input.upstreams.as_dict(),
49
+ )
50
+ if status_code == 200:
51
+ req_count += 1
52
+ logger.info("Generating table lineage is finished. %s lineages are ingested.", req_count)
53
+ return
54
+
55
+
56
+ def databricks_column_level_lineage(
57
+ conn: databricks.DatabricksConnectionConfig,
58
+ qdc_client: qdc.QDCExternalAPIClient,
59
+ tenant_id: str,
60
+ dbt_table_name: str = "quollio_lineage_column_level",
61
+ ) -> None:
62
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
63
+ with databricks.DatabricksQueryExecutor(config=conn) as databricks_executor:
64
+ results = databricks_executor.get_query_results(
65
+ query=f"""
66
+ SELECT
67
+ *
68
+ FROM
69
+ {conn.catalog}.{conn.schema}.{dbt_table_name}
70
+ """
71
+ )
72
+
73
+ update_column_lineage_inputs = gen_column_lineage_payload(
74
+ tenant_id=tenant_id,
75
+ endpoint=conn.host,
76
+ columns=results,
77
+ )
78
+
79
+ req_count = 0
80
+ for update_column_lineage_input in update_column_lineage_inputs:
81
+ logger.info(
82
+ "Generating column lineage. downstream: %s -> %s -> %s -> %s",
83
+ update_column_lineage_input.downstream_database_name,
84
+ update_column_lineage_input.downstream_schema_name,
85
+ update_column_lineage_input.downstream_table_name,
86
+ update_column_lineage_input.downstream_column_name,
87
+ )
88
+ status_code = qdc_client.update_lineage_by_id(
89
+ global_id=update_column_lineage_input.downstream_global_id,
90
+ payload=update_column_lineage_input.upstreams.as_dict(),
91
+ )
92
+ if status_code == 200:
93
+ req_count += 1
94
+ logger.info(
95
+ "Generating column lineage is finished. %s lineages are ingested.",
96
+ req_count,
97
+ )
98
+ return
99
+
100
+
101
+ def _get_monitoring_tables(
102
+ conn: databricks.DatabricksConnectionConfig, monitoring_table_id: str = "_profile_metrics"
103
+ ) -> List[Dict[str, str]]:
104
+ tables = []
105
+ query = f"""
106
+ SELECT
107
+ table_catalog,
108
+ table_schema,
109
+ table_name,
110
+ CONCAT(table_catalog, '.', table_schema, '.', table_name) AS table_fqdn
111
+ FROM
112
+ system.information_schema.tables
113
+ WHERE table_name LIKE "%{monitoring_table_id}"
114
+ """
115
+ with databricks.DatabricksQueryExecutor(config=conn) as databricks_executor:
116
+ tables = databricks_executor.get_query_results(query)
117
+ if len(tables) > 0:
118
+ logger.info("Found %s monitoring tables.", len(tables))
119
+ return tables
120
+ else:
121
+ logger.info("No monitoring tables found.")
122
+ return []
123
+
124
+
125
+ def _get_column_stats(
126
+ conn: databricks.DatabricksConnectionConfig, monitoring_table_id: str = "_profile_metrics"
127
+ ) -> List[Dict[str, str]]:
128
+ tables = _get_monitoring_tables(conn, monitoring_table_id)
129
+ if not tables:
130
+ return []
131
+ stats = []
132
+ for table in tables:
133
+ monitored_table = table["table_fqdn"].removesuffix("_profile_metrics")
134
+ monitored_table = monitored_table.split(".")
135
+ if len(monitored_table) != 3:
136
+ raise ValueError(f"Invalid table name: {table['table_fqdn']}")
137
+ with databricks.DatabricksQueryExecutor(config=conn) as databricks_executor:
138
+ query = """
139
+ WITH MaxCounts AS (
140
+ SELECT
141
+ t.COLUMN_NAME,
142
+ MAX(item.count) AS max_count,
143
+ MAX(t.window) AS latest
144
+ FROM
145
+ {monitoring_table} t
146
+ LATERAL VIEW EXPLODE(t.frequent_items) AS item
147
+ GROUP BY t.COLUMN_NAME
148
+ )
149
+ SELECT
150
+ "{monitored_table_catalog}" as DB_NAME,
151
+ "{monitored_table_schema}" as SCHEMA_NAME,
152
+ "{monitored_table_name}" as TABLE_NAME,
153
+ t.COLUMN_NAME,
154
+ t.DATA_TYPE,
155
+ t.distinct_count as CARDINALITY,
156
+ t.MAX as MAX_VALUE,
157
+ t.MIN as MIN_VALUE,
158
+ t.AVG as AVG_VALUE,
159
+ t.MEDIAN as MEDIAN_VALUE,
160
+ t.STDDEV as STDDEV_VALUE,
161
+ t.NUM_NULLS as NULL_COUNT,
162
+ item.item AS MODE_VALUE
163
+ FROM
164
+ {monitoring_table} t
165
+ JOIN MaxCounts mc ON t.COLUMN_NAME = mc.COLUMN_NAME
166
+ LATERAL VIEW EXPLODE(t.frequent_items) AS item
167
+ WHERE
168
+ item.count = mc.max_count
169
+ AND t.window = mc.latest
170
+ """.format(
171
+ monitoring_table=table["table_fqdn"],
172
+ monitored_table_catalog=monitored_table[0],
173
+ monitored_table_schema=monitored_table[1],
174
+ monitored_table_name=monitored_table[2],
175
+ )
176
+ stats.append(databricks_executor.get_query_results(query))
177
+ return stats
178
+
179
+
180
+ def databricks_column_stats(
181
+ conn: databricks.DatabricksConnectionConfig,
182
+ qdc_client: qdc.QDCExternalAPIClient,
183
+ tenant_id: str,
184
+ monitoring_table_id: str = "_profile_metrics",
185
+ ) -> None:
186
+ table_stats = _get_column_stats(conn, monitoring_table_id)
187
+ for table in table_stats:
188
+ stats = gen_table_stats_payload(tenant_id, conn.host, table)
189
+ for stat in stats:
190
+ status_code = qdc_client.update_stats_by_id(
191
+ global_id=stat.global_id,
192
+ payload=stat.body.as_dict(),
193
+ )
194
+ if status_code == 200:
195
+ logger.info("Stats for %s is successfully ingested.", stat.global_id)
196
+ return
@@ -141,3 +141,15 @@ def parse_snowflake_results(results: List[Dict[str, str]]):
141
141
  payload["UPSTREAM_TABLES"] = json.loads(result["UPSTREAM_TABLES"])
142
142
  payloads.append(payload)
143
143
  return payloads
144
+
145
+
146
+ def parse_databricks_table_lineage(results: List) -> List[Dict[str, Dict]]:
147
+ # Parses results from Quollio Databricks lineage table
148
+ # Returns tuple of downstream_table_name (0) and upstream_tables (1)
149
+ payloads = list()
150
+ for result in results:
151
+ payload = dict()
152
+ payload["DOWNSTREAM_TABLE_NAME"] = result["DOWNSTREAM_TABLE_NAME"]
153
+ payload["UPSTREAM_TABLES"] = json.loads(result["UPSTREAM_TABLES"])
154
+ payloads.append(payload)
155
+ return payloads
@@ -77,7 +77,6 @@ def gen_table_stats_payload(tenant_id: str, endpoint: str, stats: List[Dict[str,
77
77
  table_global_id = new_global_id(
78
78
  tenant_id=tenant_id, cluster_id=endpoint, data_id=global_id_arg, data_type="column"
79
79
  )
80
-
81
80
  stats_request = StatsRequest(
82
81
  global_id=table_global_id,
83
82
  db=stat["DB_NAME"],
quollio_core/redshift.py CHANGED
@@ -50,11 +50,10 @@ def build_view(
50
50
  options=["--no-use-colors", "--log-level", log_level, "--vars", options],
51
51
  )
52
52
  run_options = ["--no-use-colors", "--log-level", log_level, "--vars", options]
53
- target_tables_list = target_tables.split()
54
- if target_tables_list is not None:
55
- if "quollio_stats_columns" in target_tables_list:
56
- target_tables_list.append("quollio_stats_profiling_columns")
57
- target_tables_str = " ".join(target_tables_list)
53
+ if target_tables is not None:
54
+ if "quollio_stats_columns" in target_tables:
55
+ target_tables.append("quollio_stats_profiling_columns")
56
+ target_tables_str = " ".join(target_tables)
58
57
  run_options.append("--select")
59
58
  run_options.append(target_tables_str)
60
59
 
@@ -0,0 +1,62 @@
1
+ import logging
2
+ from dataclasses import asdict, dataclass
3
+ from typing import Dict, List, Optional
4
+
5
+ from databricks.sdk.core import Config, HeaderFactory, oauth_service_principal
6
+ from databricks.sql.client import Connection, connect
7
+
8
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
9
+
10
+
11
+ @dataclass
12
+ class DatabricksConnectionConfig:
13
+ host: str
14
+ http_path: str
15
+ client_id: str
16
+ client_secret: str
17
+ catalog: str
18
+ schema: str
19
+
20
+ def as_dict(self) -> Dict[str, str]:
21
+ return asdict(self)
22
+
23
+
24
+ class DatabricksQueryExecutor:
25
+ def __init__(self, config: DatabricksConnectionConfig) -> None:
26
+ self.config = config
27
+ self.conn = self.__initialize()
28
+
29
+ def __enter__(self):
30
+ return self
31
+
32
+ def __exit__(self, exc_type, exc_value, traceback):
33
+ self.conn.close()
34
+
35
+ def __initialize(self) -> Connection:
36
+ conn = connect(
37
+ server_hostname=self.config.host,
38
+ http_path=self.config.http_path,
39
+ credentials_provider=self.credential_provider,
40
+ )
41
+ return conn
42
+
43
+ def get_query_results(self, query: str) -> List[Dict[str, str]]:
44
+ results_asdict: List[Dict[str, str]] = []
45
+ with self.conn.cursor() as cur:
46
+ try:
47
+ cur.execute(query)
48
+ result: List[Dict[str, str]] = cur.fetchall()
49
+ except Exception as e:
50
+ logging.error(query, exc_info=True)
51
+ logging.error("databricks get_query_results failed. %s", e)
52
+ raise
53
+
54
+ for row in result:
55
+ results_asdict.append(row.asDict())
56
+ return results_asdict
57
+
58
+ def credential_provider(self) -> Optional[HeaderFactory]:
59
+ config = Config(
60
+ host=f"https://{self.config.host}", client_id=self.config.client_id, client_secret=self.config.client_secret
61
+ )
62
+ return oauth_service_principal(config)
quollio_core/snowflake.py CHANGED
@@ -50,11 +50,10 @@ def build_view(
50
50
  options=["--no-use-colors", "--log-level", log_level, "--vars", options],
51
51
  )
52
52
  run_options = ["--no-use-colors", "--log-level", log_level, "--vars", options]
53
- target_tables_list = target_tables.split()
54
- if target_tables_list is not None:
55
- if "quollio_stats_columns" in target_tables_list:
56
- target_tables_list.append("quollio_stats_profiling_columns")
57
- target_tables_str = " ".join(target_tables_list)
53
+ if target_tables is not None:
54
+ if "quollio_stats_columns" in target_tables:
55
+ target_tables.append("quollio_stats_profiling_columns")
56
+ target_tables_str = " ".join(target_tables)
58
57
  run_options.append("--select")
59
58
  run_options.append(target_tables_str)
60
59
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: quollio-core
3
- Version: 0.4.3
3
+ Version: 0.4.5
4
4
  Summary: Quollio Core
5
5
  Author-email: quollio-dev <qt.dev@quollio.com>
6
6
  Maintainer-email: RyoAriyama <ryo.arym@gmail.com>, tharuta <35373297+TakumiHaruta@users.noreply.github.com>
@@ -21,14 +21,18 @@ Requires-Dist: blake3==0.3.3
21
21
  Requires-Dist: dbt-core==1.7.10
22
22
  Requires-Dist: dbt-snowflake==1.7.0
23
23
  Requires-Dist: dbt-redshift==1.7.1
24
+ Requires-Dist: dbt-databricks==1.7.1
24
25
  Requires-Dist: jinja2==3.1.3
25
26
  Requires-Dist: PyYAML==6.0.1
26
27
  Requires-Dist: requests==2.31.0
27
28
  Requires-Dist: pyjwt==2.8.0
28
29
  Requires-Dist: redshift-connector==2.0.915
29
30
  Requires-Dist: snowflake-connector-python==3.5.0
31
+ Requires-Dist: databricks-sdk==0.17.0
32
+ Requires-Dist: databricks-sql-connector==2.9.5
30
33
  Requires-Dist: sqlglot==20.8.0
31
34
  Requires-Dist: black>=22.3.0 ; extra == "test"
35
+ Requires-Dist: coverage>=7.3.2 ; extra == "test"
32
36
  Requires-Dist: isort>=5.10.1 ; extra == "test"
33
37
  Requires-Dist: pyproject-flake8>=0.0.1-alpha.2 ; extra == "test"
34
38
  Requires-Dist: pytest>=5.2 ; extra == "test"
@@ -1,20 +1,36 @@
1
- quollio_core/__init__.py,sha256=v7qeULEwxg6wNqWICrvNSrkxgZTWbgvhU4C6DUBvsVw,83
2
- quollio_core/redshift.py,sha256=81cFHWzTHgO7u6wpW32w1dsvqttVRxpkhj-n7VL0Blo,10234
3
- quollio_core/snowflake.py,sha256=VtslGFeCWpLotGk2RE9JTLRFP3rEm0JBG08Gd3oGChY,10333
1
+ quollio_core/__init__.py,sha256=BXXaDg79qecIHTSRqA3Yh4FvgDbWzUjgYYWbwaMTRJY,83
2
+ quollio_core/bricks.py,sha256=PCHyh_I6M4PBRpLDtc5DTr7rpharllu-vcSAhySM4xg,8001
3
+ quollio_core/redshift.py,sha256=wap7QmV-YuHZAomIrHXytGUuxhQ5MFEb38QDY3XrThQ,10167
4
+ quollio_core/snowflake.py,sha256=8IMbdTjCDBIiS_GF8APWRTVWNj6EM3ZT8MRN12T-1v0,10266
5
+ quollio_core/dbt_projects/databricks/.gitignore,sha256=1jJAyXSzJ3YUm0nx3i7wUSE4RjQMX3ad6F8O88UbtzI,29
6
+ quollio_core/dbt_projects/databricks/README.md,sha256=ZpRQyhFAODAiS8dc1Kb_ndkul4cu4o4udN_EMa49CU4,440
7
+ quollio_core/dbt_projects/databricks/dbt_project.yml,sha256=3sH98RNk7TnphvI3yEdXDstb92kW5BNxr-cT0tXhwzk,480
8
+ quollio_core/dbt_projects/databricks/package-lock.yml,sha256=0s8qBWevHon05NSj37p8NMa-rMVs_6McdKmddUVyGQ8,376
9
+ quollio_core/dbt_projects/databricks/packages.yml,sha256=JtwWg3XK-nJp4tPv2QEER0kc5F2sPeVYqTkE54zMsIM,443
10
+ quollio_core/dbt_projects/databricks/analyses/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ quollio_core/dbt_projects/databricks/macros/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ quollio_core/dbt_projects/databricks/models/quollio_lineage_column_level.sql,sha256=mZ4mDCEZTwiSgCUr-w2QGze2-NQapt45EyQNQkCOI5I,2171
13
+ quollio_core/dbt_projects/databricks/models/quollio_lineage_column_level.yml,sha256=tidAK_FMhYYuPTxFoactwcXYQPSMZwQTxWrGBly4-1o,450
14
+ quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.sql,sha256=K63J7n7NIM2Jc7c4IF21JcW8AYOm9HxBNDiveUE4kzU,1558
15
+ quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.yml,sha256=ZGjz6C2bguDJxJyA7LhCHbuyZSRPEaRMXln9rxcotuo,344
16
+ quollio_core/dbt_projects/databricks/models/sources.yml,sha256=JXU-8lNsKm8dxIjmWos1vbTsWiea-9-pXnntik63ZpA,2231
17
+ quollio_core/dbt_projects/databricks/profiles/profiles_template.yml,sha256=Dw1RuTrE04yvGIaPQL7uc6pgSWloKHhu0KrduzJ1Z6M,353
18
+ quollio_core/dbt_projects/databricks/seeds/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
+ quollio_core/dbt_projects/databricks/snapshots/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
20
  quollio_core/dbt_projects/redshift/README.md,sha256=55nDkX5uQXWmawpQbgG1hbyn64j_CegDBQddQ2C85C8,571
5
21
  quollio_core/dbt_projects/redshift/dbt_project.yml,sha256=WVCmT-2usdGSm6EBM6MCdzEeEFwv9ANsyknreoNXgBc,405
6
- quollio_core/dbt_projects/redshift/package-lock.yml,sha256=1N71QqV5p07pG5JVP73nSkz-cndl6UtjYLfmF0KrVsk,109
22
+ quollio_core/dbt_projects/redshift/package-lock.yml,sha256=Gef3zDCLF41j_FL-_h3sIZOUVj6j7nTTvxXrQPLcBP0,109
7
23
  quollio_core/dbt_projects/redshift/packages.yml,sha256=p9Bl2C44gdC6iYTUkz_15yq3xahSJf2IA3WOXLF_ahA,61
8
24
  quollio_core/dbt_projects/redshift/analyses/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
25
  quollio_core/dbt_projects/redshift/macros/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- quollio_core/dbt_projects/redshift/macros/materialization/divided_view.sql,sha256=hED0GRVxbiJaXggJDWqHKJDhTXhC2GruI5lqYH2iId8,3237
26
+ quollio_core/dbt_projects/redshift/macros/materialization/divided_view.sql,sha256=3tRQeXXdjn3aSZ94DgMN6A6yMlTCE8aMKXeIdcZVDIM,3998
11
27
  quollio_core/dbt_projects/redshift/models/quollio_lineage_table_level.sql,sha256=AVPcNXfVYHwyutJzg61QT_VF9umfoC4i8C2HecAU4d4,2042
12
28
  quollio_core/dbt_projects/redshift/models/quollio_lineage_table_level.yml,sha256=UcrXpUTT3ihBHKPljvjw8xHz-ND60PfvMJaXqGKOEic,236
13
29
  quollio_core/dbt_projects/redshift/models/quollio_lineage_view_level.sql,sha256=A0CTgQwlz8InabA0cHuygV2GMZGYuAa7Zd5DIUOYzQI,1289
14
30
  quollio_core/dbt_projects/redshift/models/quollio_lineage_view_level.yml,sha256=7Npwo3svL9715HpNU2MKzRI014Da4tIStLzAHmd0UaU,235
15
31
  quollio_core/dbt_projects/redshift/models/quollio_sqllineage_sources.sql,sha256=e0A_Wqv_OcC8gG_yzTbI59vT-4vCI3JiAzFlmkvLnMk,1049
16
32
  quollio_core/dbt_projects/redshift/models/quollio_sqllineage_sources.yml,sha256=qgazupx3ca4P8R0loY5F9hyCz2fmAcWqZ6iOySo_NoY,377
17
- quollio_core/dbt_projects/redshift/models/quollio_stats_columns.sql,sha256=lT5ApMHu_-8_k4TsJmR_5nMtEck1IoZiPRyeejGudsw,304
33
+ quollio_core/dbt_projects/redshift/models/quollio_stats_columns.sql,sha256=lH8xPmAzSW-6wi_g1y_LFVhtFgHzBvTweVX-MKeJzUQ,302
18
34
  quollio_core/dbt_projects/redshift/models/quollio_stats_columns.yml,sha256=V_BESPk6IqE52ExT26-78As9l9AlWW86-Geb5PIhThU,67
19
35
  quollio_core/dbt_projects/redshift/models/quollio_stats_profiling_columns.sql,sha256=IPmHf51Er2jE9cMQHybT4adRxwwi2CEmgrBSv1Oeduc,1592
20
36
  quollio_core/dbt_projects/redshift/models/quollio_stats_profiling_columns.yml,sha256=s-p9F44TdwoFYlQN-b9gHzcFYOMqhqDGA9ORS_M4lhs,523
@@ -22,20 +38,21 @@ quollio_core/dbt_projects/redshift/models/sources.yml,sha256=NOSoR4ces2XivuenuG7
22
38
  quollio_core/dbt_projects/redshift/profiles/profiles_template.yml,sha256=8nS-IE25cLo6uhHdtiZG9YX0cd2fhUG0pPrFUJWs2AY,291
23
39
  quollio_core/dbt_projects/redshift/seeds/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
40
  quollio_core/dbt_projects/redshift/snapshots/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
+ quollio_core/dbt_projects/seeds/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
42
  quollio_core/dbt_projects/snowflake/README.md,sha256=55nDkX5uQXWmawpQbgG1hbyn64j_CegDBQddQ2C85C8,571
26
43
  quollio_core/dbt_projects/snowflake/dbt_project.yml,sha256=LN5NDOyakQjIK99IogQX4Whh_1zmqUfD2gqDU9JR3As,407
27
44
  quollio_core/dbt_projects/snowflake/package-lock.yml,sha256=Gef3zDCLF41j_FL-_h3sIZOUVj6j7nTTvxXrQPLcBP0,109
28
45
  quollio_core/dbt_projects/snowflake/packages.yml,sha256=p9Bl2C44gdC6iYTUkz_15yq3xahSJf2IA3WOXLF_ahA,61
29
46
  quollio_core/dbt_projects/snowflake/analyses/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
47
  quollio_core/dbt_projects/snowflake/macros/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
- quollio_core/dbt_projects/snowflake/macros/materialization/divided_view.sql,sha256=WnAeuTNUUsr47be2vPz9ZB580dPvN4KuGMnojBn1I1Y,2527
48
+ quollio_core/dbt_projects/snowflake/macros/materialization/divided_view.sql,sha256=T4nFL76AbuQHBiLSAvNchoJnRjb1IRj4nToOyTNvLvw,2782
32
49
  quollio_core/dbt_projects/snowflake/models/quollio_lineage_column_level.sql,sha256=Cxt2U2aXNG_LUm63jwTyxUkapkrB7_uHmesx1PTcMJM,4721
33
50
  quollio_core/dbt_projects/snowflake/models/quollio_lineage_column_level.yml,sha256=a2uNIAh-xw51eu-GmHVuAnGnTbwK7h8-DjDeQtK3KaQ,711
34
51
  quollio_core/dbt_projects/snowflake/models/quollio_lineage_table_level.sql,sha256=Q_7vY1N1Hi1LFv5CxkkdR3gQw8fTDnoKECTLSK4gd3o,5112
35
52
  quollio_core/dbt_projects/snowflake/models/quollio_lineage_table_level.yml,sha256=QXlMBIkHo1Y-ANveKVx1FwyoYTMRXKgE2Z-PNouhQTw,325
36
53
  quollio_core/dbt_projects/snowflake/models/quollio_sqllineage_sources.sql,sha256=gd6JhQO13xBIvOoeXcce1I7amNGytwE8pwUApXehwqM,1520
37
54
  quollio_core/dbt_projects/snowflake/models/quollio_sqllineage_sources.yml,sha256=qgazupx3ca4P8R0loY5F9hyCz2fmAcWqZ6iOySo_NoY,377
38
- quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.sql,sha256=lT5ApMHu_-8_k4TsJmR_5nMtEck1IoZiPRyeejGudsw,304
55
+ quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.sql,sha256=lH8xPmAzSW-6wi_g1y_LFVhtFgHzBvTweVX-MKeJzUQ,302
39
56
  quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.yml,sha256=V_BESPk6IqE52ExT26-78As9l9AlWW86-Geb5PIhThU,67
40
57
  quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.sql,sha256=kt2aFimIPkgKI_UQTjvfRlAjrdSbO8z6C_749pnXrnE,1382
41
58
  quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.yml,sha256=W39VAmFnnX6RBoW7B_4CConC1lm0Jm9o50Jsz9bYZzY,538
@@ -45,19 +62,21 @@ quollio_core/dbt_projects/snowflake/seeds/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JC
45
62
  quollio_core/dbt_projects/snowflake/snapshots/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
63
  quollio_core/helper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
64
  quollio_core/helper/core.py,sha256=-3vCDlKExWPHJmWuZQNpYnvPP55uoGwRpTtnFvsDxIo,1127
48
- quollio_core/helper/env_default.py,sha256=XsBpbMNeOVqvHTDoNctk_vGuMdLInMsQpnZ5u5xzhSU,1074
65
+ quollio_core/helper/env_default.py,sha256=YIL9hfrPs1ViL1AXohnbWEjVBUDXbVVakH0ZoSZWOlc,1202
49
66
  quollio_core/profilers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
- quollio_core/profilers/lineage.py,sha256=N2_-sM0iNACl4ztcQ9NQdUwkqPWReBKbICPZe30qeNo,5892
67
+ quollio_core/profilers/databricks.py,sha256=skTTlqogJGauZkN7c9uVSYalAACIB43yblGc1jEIM1U,7501
68
+ quollio_core/profilers/lineage.py,sha256=HrTjXxrchETRmHEb5tSFzzHdb6z2KMw-DTnUSeKxmr0,6379
51
69
  quollio_core/profilers/redshift.py,sha256=obdHVIsOM1bwHGdvYKalsJcTXwLK02kAKQMSBzSvsDo,7862
52
70
  quollio_core/profilers/snowflake.py,sha256=C1LC19ZaUMwNoXjsbnez0xANydJYs8oNRt6tixWKDq8,9090
53
71
  quollio_core/profilers/sqllineage.py,sha256=oCyl4tpXL5bkfguXAzTHSB9kZBL3tQK_rfcJ4XQMrLo,5177
54
- quollio_core/profilers/stats.py,sha256=0AbxlJ_QtkdxN4W1yS14NAB3N3LkhB59MWX-d1jmQPE,4721
72
+ quollio_core/profilers/stats.py,sha256=PG1NbbUSpc1JuEYvBzD66rd24tp0C13_Y5Y7vRjYG1c,4720
55
73
  quollio_core/repository/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
+ quollio_core/repository/databricks.py,sha256=m68tja5N-QxH3VqEq-mOJKBeR2qldSgj_L9iIxvWwm0,1945
56
75
  quollio_core/repository/dbt.py,sha256=HXqW_xa4xYPh9CnKkg4L1gwG3SGjj2BAYoWgzWMFU4U,770
57
76
  quollio_core/repository/qdc.py,sha256=VCmzAUvjLemw1os5TaPtfBFkMCOMuPeftjZmUPhFj2Y,4702
58
77
  quollio_core/repository/redshift.py,sha256=UVHIpYzDQ2AbBTAGa8DgmEenG0NZsHfYroR1MmEPQGA,2991
59
78
  quollio_core/repository/snowflake.py,sha256=1YVMDfb9euJKvikv1pk_IxVF6SVsiemSvZ-WMTSbY7E,1874
60
- quollio_core-0.4.3.dist-info/LICENSE,sha256=V8j_M8nAz8PvAOZQocyRDX7keai8UJ9skgmnwqETmdY,34520
61
- quollio_core-0.4.3.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
62
- quollio_core-0.4.3.dist-info/METADATA,sha256=ePyyL6UzINeA26hX-tOQ-uAKyyHmJNKIwe3nOwJT3iE,6400
63
- quollio_core-0.4.3.dist-info/RECORD,,
79
+ quollio_core-0.4.5.dist-info/LICENSE,sha256=V8j_M8nAz8PvAOZQocyRDX7keai8UJ9skgmnwqETmdY,34520
80
+ quollio_core-0.4.5.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
81
+ quollio_core-0.4.5.dist-info/METADATA,sha256=fbpCG8MiXchuHMyatF_kiThLXWSP0gMoMV_ffVDU4MA,6571
82
+ quollio_core-0.4.5.dist-info/RECORD,,