quollio-core 0.4.4__py3-none-any.whl → 0.4.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. quollio_core/__init__.py +1 -1
  2. quollio_core/bigquery.py +123 -0
  3. quollio_core/bricks.py +288 -0
  4. quollio_core/dbt_projects/databricks/.gitignore +4 -0
  5. quollio_core/dbt_projects/databricks/README.md +5 -0
  6. quollio_core/dbt_projects/databricks/analyses/.gitkeep +0 -0
  7. quollio_core/dbt_projects/databricks/dbt_project.yml +21 -0
  8. quollio_core/dbt_projects/databricks/macros/.gitkeep +0 -0
  9. quollio_core/dbt_projects/databricks/models/quollio_lineage_column_level.sql +73 -0
  10. quollio_core/dbt_projects/databricks/models/quollio_lineage_column_level.yml +14 -0
  11. quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.sql +63 -0
  12. quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.yml +11 -0
  13. quollio_core/dbt_projects/databricks/models/sources.yml +84 -0
  14. quollio_core/dbt_projects/databricks/package-lock.yml +14 -0
  15. quollio_core/dbt_projects/databricks/packages.yml +13 -0
  16. quollio_core/dbt_projects/databricks/profiles/profiles_template.yml +14 -0
  17. quollio_core/dbt_projects/databricks/seeds/.gitkeep +0 -0
  18. quollio_core/dbt_projects/databricks/snapshots/.gitkeep +0 -0
  19. quollio_core/dbt_projects/redshift/dbt_project.yml +1 -1
  20. quollio_core/dbt_projects/redshift/macros/materialization/divided_view.sql +101 -34
  21. quollio_core/dbt_projects/redshift/models/quollio_stats_columns.sql +1 -2
  22. quollio_core/dbt_projects/redshift/package-lock.yml +1 -1
  23. quollio_core/dbt_projects/seeds/.gitkeep +0 -0
  24. quollio_core/dbt_projects/snowflake/macros/materialization/divided_view.sql +50 -27
  25. quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.sql +1 -2
  26. quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.sql +57 -20
  27. quollio_core/helper/core.py +4 -0
  28. quollio_core/helper/env_default.py +28 -2
  29. quollio_core/helper/log.py +17 -0
  30. quollio_core/profilers/bigquery.py +81 -0
  31. quollio_core/profilers/databricks.py +198 -0
  32. quollio_core/profilers/lineage.py +26 -0
  33. quollio_core/profilers/redshift.py +41 -74
  34. quollio_core/profilers/snowflake.py +138 -169
  35. quollio_core/profilers/sqllineage.py +0 -1
  36. quollio_core/profilers/stats.py +0 -1
  37. quollio_core/redshift.py +15 -18
  38. quollio_core/repository/bigquery.py +61 -0
  39. quollio_core/repository/databricks.py +62 -0
  40. quollio_core/repository/dbt.py +0 -1
  41. quollio_core/repository/qdc.py +0 -3
  42. quollio_core/repository/redshift.py +0 -1
  43. quollio_core/repository/snowflake.py +6 -1
  44. quollio_core/snowflake.py +29 -16
  45. {quollio_core-0.4.4.dist-info → quollio_core-0.4.10.dist-info}/METADATA +11 -2
  46. {quollio_core-0.4.4.dist-info → quollio_core-0.4.10.dist-info}/RECORD +48 -25
  47. {quollio_core-0.4.4.dist-info → quollio_core-0.4.10.dist-info}/LICENSE +0 -0
  48. {quollio_core-0.4.4.dist-info → quollio_core-0.4.10.dist-info}/WHEEL +0 -0
quollio_core/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  """Quollio Core"""
2
2
 
3
- __version__ = "0.4.4"
3
+ __version__ = "0.4.10"
4
4
  __author__ = "Quollio Technologies, Inc"
@@ -0,0 +1,123 @@
1
+ import argparse
2
+ import json
3
+ import logging
4
+
5
+ from quollio_core.helper.env_default import env_default
6
+ from quollio_core.helper.log import set_log_level
7
+ from quollio_core.profilers.bigquery import bigquery_table_lineage
8
+ from quollio_core.repository import qdc
9
+ from quollio_core.repository.bigquery import get_credentials, get_org_id
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def load_lineage(
15
+ qdc_client: qdc.QDCExternalAPIClient, project_id: str, regions: list, tenant_id: str, credentials: dict, org_id: str
16
+ ):
17
+ bigquery_table_lineage(
18
+ qdc_client=qdc_client,
19
+ tenant_id=tenant_id,
20
+ project_id=project_id,
21
+ regions=regions,
22
+ credentials=credentials,
23
+ org_id=org_id,
24
+ )
25
+
26
+
27
+ if __name__ == "__main__":
28
+ parser = argparse.ArgumentParser(
29
+ prog="Quollio Intelligence Agent for Google BigQuery",
30
+ description="Collect lineage and stats from Google BigQuery and load to Quollio Data Catalog",
31
+ epilog="Copyright (c) 2024 Quollio Technologies, Inc.",
32
+ )
33
+ parser.add_argument(
34
+ "commands",
35
+ choices=["load_lineage"],
36
+ type=str,
37
+ nargs="+",
38
+ help="""
39
+ The command to execute.
40
+ 'load_lineage': Load lineage data from Google Data Catalog to Quollio,
41
+ """,
42
+ )
43
+ parser.add_argument(
44
+ "--credentials",
45
+ type=str,
46
+ action=env_default("GOOGLE_APPLICATION_CREDENTIALS"),
47
+ help="Crendentials for Google Cloud Platform",
48
+ )
49
+ parser.add_argument(
50
+ "--tenant_id",
51
+ type=str,
52
+ action=env_default("TENANT_ID"),
53
+ required=False,
54
+ help="The tenant id (company id) where the lineage and stats are loaded",
55
+ )
56
+ parser.add_argument(
57
+ "--api_url",
58
+ type=str,
59
+ action=env_default("QDC_API_URL"),
60
+ required=False,
61
+ help="The base URL of Quollio External API",
62
+ )
63
+ parser.add_argument(
64
+ "--client_id",
65
+ type=str,
66
+ action=env_default("QDC_CLIENT_ID"),
67
+ required=False,
68
+ help="The client id that is created on Quollio console to let clients access Quollio External API",
69
+ )
70
+ parser.add_argument(
71
+ "--client_secret",
72
+ type=str,
73
+ action=env_default("QDC_CLIENT_SECRET"),
74
+ required=False,
75
+ help="The client secret that is created on Quollio console to let clients access Quollio External API",
76
+ )
77
+ parser.add_argument(
78
+ "--project_id",
79
+ type=str,
80
+ action=env_default("GCP_PROJECT_ID"),
81
+ required=False,
82
+ help="GCP Project ID",
83
+ )
84
+ parser.add_argument(
85
+ "--regions",
86
+ type=str,
87
+ action=env_default("GCP_REGIONS"),
88
+ required=False,
89
+ help="GCP regions where the data is located. Multiple regions can be provided separated by space.",
90
+ nargs="+",
91
+ )
92
+ parser.add_argument(
93
+ "--log_level",
94
+ type=str,
95
+ choices=["debug", "info", "warn", "error", "none"],
96
+ action=env_default("LOG_LEVEL"),
97
+ required=False,
98
+ help="The log level for dbt commands. Default value is info",
99
+ )
100
+
101
+ args = parser.parse_args()
102
+ set_log_level(level=args.log_level)
103
+
104
+ if len(args.commands) == 0:
105
+ raise ValueError("No command is provided")
106
+
107
+ if "load_lineage" in args.commands:
108
+ qdc_client = qdc.QDCExternalAPIClient(
109
+ base_url=args.api_url, client_id=args.client_id, client_secret=args.client_secret
110
+ )
111
+
112
+ credentials_json = json.loads(args.credentials)
113
+ credentials = get_credentials(credentials_json=credentials_json)
114
+ org_id = get_org_id(credentials_json=credentials_json)
115
+
116
+ load_lineage(
117
+ qdc_client=qdc_client,
118
+ project_id=args.project_id,
119
+ regions=args.regions,
120
+ tenant_id=args.tenant_id,
121
+ credentials=credentials,
122
+ org_id=org_id,
123
+ )
quollio_core/bricks.py ADDED
@@ -0,0 +1,288 @@
1
+ import argparse
2
+ import logging
3
+ import os
4
+
5
+ from quollio_core.helper.core import setup_dbt_profile, trim_prefix
6
+ from quollio_core.helper.env_default import env_default
7
+ from quollio_core.helper.log import set_log_level
8
+ from quollio_core.profilers.databricks import (
9
+ databricks_column_level_lineage,
10
+ databricks_column_stats,
11
+ databricks_table_level_lineage,
12
+ )
13
+ from quollio_core.repository import databricks as db
14
+ from quollio_core.repository import dbt, qdc
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ def build_view(
20
+ conn: db.DatabricksConnectionConfig,
21
+ target_tables: str = "",
22
+ log_level: str = "info",
23
+ ) -> None:
24
+
25
+ logger.info("Build profiler views using dbt")
26
+ # set parameters
27
+ dbt_client = dbt.DBTClient()
28
+ current_dir = os.path.dirname(os.path.abspath(__file__))
29
+ project_path = f"{current_dir}/dbt_projects/databricks"
30
+ template_path = f"{current_dir}/dbt_projects/databricks/profiles"
31
+ template_name = "profiles_template.yml"
32
+
33
+ # build views using dbt
34
+ setup_dbt_profile(connections_json=conn.as_dict(), template_path=template_path, template_name=template_name)
35
+ # FIXME: when executing some of the commands, directory changes due to the library bug.
36
+ # https://github.com/dbt-labs/dbt-core/issues/8997
37
+ dbt_client.invoke(
38
+ cmd="deps",
39
+ project_dir=project_path,
40
+ profile_dir=template_path,
41
+ options=["--no-use-colors", "--log-level", log_level],
42
+ )
43
+
44
+ run_options = ["--no-use-colors", "--log-level", log_level]
45
+
46
+ if target_tables is not None:
47
+ target_tables_str = " ".join(target_tables)
48
+ run_options.append("--select")
49
+ run_options.append(target_tables_str)
50
+
51
+ dbt_client.invoke(
52
+ cmd="run",
53
+ project_dir=project_path,
54
+ profile_dir=template_path,
55
+ options=run_options,
56
+ )
57
+ return
58
+
59
+
60
+ def load_lineage(
61
+ conn: db.DatabricksConnectionConfig,
62
+ endpoint: str,
63
+ qdc_client: qdc.QDCExternalAPIClient,
64
+ tenant_id: str,
65
+ enable_column_lineage: bool = False,
66
+ ) -> None:
67
+
68
+ logger.info("Generate Databricks table to table lineage.")
69
+ databricks_table_level_lineage(
70
+ conn=conn,
71
+ endpoint=endpoint,
72
+ qdc_client=qdc_client,
73
+ tenant_id=tenant_id,
74
+ dbt_table_name="quollio_lineage_table_level",
75
+ )
76
+
77
+ if enable_column_lineage:
78
+ logger.info(
79
+ f"enable_column_lineage is set to {enable_column_lineage}.Generate Databricks column to column lineage."
80
+ )
81
+ databricks_column_level_lineage(
82
+ conn=conn,
83
+ endpoint=endpoint,
84
+ qdc_client=qdc_client,
85
+ tenant_id=tenant_id,
86
+ dbt_table_name="quollio_lineage_column_level",
87
+ )
88
+ else:
89
+ logger.info("Skip column lineage ingestion. Set enable_column_lineage to True if you ingest column lineage.")
90
+
91
+ logger.info("Lineage data is successfully loaded.")
92
+ return
93
+
94
+
95
+ def load_column_stats(
96
+ conn: db.DatabricksConnectionConfig,
97
+ endpoint: str,
98
+ qdc_client: qdc.QDCExternalAPIClient,
99
+ tenant_id: str,
100
+ ) -> None:
101
+
102
+ logger.info("Generate Databricks column stats.")
103
+ databricks_column_stats(
104
+ conn=conn,
105
+ endpoint=endpoint,
106
+ qdc_client=qdc_client,
107
+ tenant_id=tenant_id,
108
+ )
109
+
110
+ logger.info("Column stats are successfully loaded.")
111
+ return
112
+
113
+
114
+ if __name__ == "__main__":
115
+ parser = argparse.ArgumentParser(
116
+ prog="Quollio Intelligence Agent for Databricks",
117
+ description="Build views and load lineage and stats to Quollio from Databricks using dbt.",
118
+ epilog="Copyright (c) 2024 Quollio Technologies, Inc.",
119
+ )
120
+ parser.add_argument(
121
+ "commands",
122
+ choices=["build_view", "load_lineage", "load_stats"],
123
+ type=str,
124
+ nargs="+",
125
+ help="""
126
+ The command to execute.
127
+ 'build_view': Build views using dbt,
128
+ 'load_lineage': Load lineage data from created views to Quollio,
129
+ 'load_stats': Load stats from created views to Quollio,
130
+ """,
131
+ )
132
+ parser.add_argument(
133
+ "--host", type=str, action=env_default("DATABRICKS_HOST"), required=False, help="Host for Databricks workspace"
134
+ )
135
+ parser.add_argument(
136
+ "--http_path",
137
+ type=str,
138
+ action=env_default("DATABRICKS_HTTP_PATH"),
139
+ required=False,
140
+ help="HTTP path for a Databricks compute resource (i.e warehouse)",
141
+ )
142
+ parser.add_argument(
143
+ "--port",
144
+ type=int,
145
+ action=env_default("DATABRICKS_PORT"),
146
+ required=False,
147
+ help="Port for Databricks compute resource",
148
+ )
149
+ parser.add_argument(
150
+ "--databricks_client_secret",
151
+ type=str,
152
+ action=env_default("DATABRICKS_CLIENT_SECRET"),
153
+ required=False,
154
+ help="Secret for the service principal",
155
+ )
156
+ parser.add_argument(
157
+ "--databricks_client_id",
158
+ type=str,
159
+ action=env_default("DATABRICKS_CLIENT_ID"),
160
+ required=False,
161
+ help="Client id for the service principal",
162
+ )
163
+ parser.add_argument(
164
+ "--catalog",
165
+ type=str,
166
+ required=False,
167
+ action=env_default("DATABRICKS_TARGET_CATALOG"),
168
+ help="Target database name where the views are built by dbt",
169
+ )
170
+ parser.add_argument(
171
+ "--schema",
172
+ type=str,
173
+ action=env_default("DATABRICKS_TARGET_SCHEMA"),
174
+ required=False,
175
+ help="Target schema name where the views are built by dbt",
176
+ )
177
+ parser.add_argument(
178
+ "--log_level",
179
+ type=str,
180
+ choices=["debug", "info", "warn", "error", "none"],
181
+ action=env_default("LOG_LEVEL"),
182
+ required=False,
183
+ help="The log level for dbt commands. Default value is info",
184
+ )
185
+ parser.add_argument(
186
+ "--api_url",
187
+ type=str,
188
+ action=env_default("QDC_API_URL"),
189
+ required=False,
190
+ help="The base URL of Quollio External API",
191
+ )
192
+ parser.add_argument(
193
+ "--client_id",
194
+ type=str,
195
+ action=env_default("QDC_CLIENT_ID"),
196
+ required=False,
197
+ help="The client id that is created on Quollio console to let clients access Quollio External API",
198
+ )
199
+ parser.add_argument(
200
+ "--client_secret",
201
+ type=str,
202
+ action=env_default("QDC_CLIENT_SECRET"),
203
+ required=False,
204
+ help="The client secrete that is created on Quollio console to let clients access Quollio External API",
205
+ )
206
+ parser.add_argument(
207
+ "--tenant_id",
208
+ type=str,
209
+ action=env_default("TENANT_ID"),
210
+ required=False,
211
+ help="The tenant id (company id) where the lineage and stats are loaded",
212
+ )
213
+ parser.add_argument(
214
+ "--target_tables",
215
+ type=str,
216
+ nargs="+",
217
+ choices=["quollio_lineage_table_level", "quollio_lineage_column_level"],
218
+ action=env_default("DATABRICKS_TARGET_TABLES"),
219
+ required=False,
220
+ help="Target tables you want to create with dbt module. \
221
+ You need to specify this parameter if you want to specify tables, not all ones. \
222
+ Please specify table name with blank delimiter like tableA tableB \
223
+ if you want to create two or more tables",
224
+ )
225
+ parser.add_argument(
226
+ "--monitoring_table_suffix",
227
+ type=str,
228
+ action=env_default("DATABRICKS_MONITORING_TABLE_SUFFIX"),
229
+ required=False,
230
+ help="Sets the monitoring tables suffix for databricks. \
231
+ This is used to identify the monitoring tables created by the databricks monitoring tool. \
232
+ Default value is _profile_metrics",
233
+ )
234
+ parser.add_argument(
235
+ "--enable_column_lineage",
236
+ type=bool,
237
+ action=env_default("ENABLE_COLUMN_LINEAGE", store_true=True),
238
+ default=False,
239
+ required=False,
240
+ help="Whether to ingest column lineage into QDIC or not. Default value is False",
241
+ )
242
+
243
+ args = parser.parse_args()
244
+ set_log_level(level=args.log_level)
245
+
246
+ conn = db.DatabricksConnectionConfig(
247
+ # MEMO: Metadata agent allows the string 'https://' as a host name but is not allowed by intelligence agent.
248
+ host=trim_prefix(args.host, "https://"),
249
+ http_path=args.http_path,
250
+ client_id=args.databricks_client_id,
251
+ client_secret=args.databricks_client_secret,
252
+ catalog=args.catalog,
253
+ schema=args.schema,
254
+ )
255
+
256
+ if len(args.commands) == 0:
257
+ raise ValueError("No command is provided")
258
+
259
+ if "build_view" in args.commands:
260
+ build_view(
261
+ conn=conn,
262
+ target_tables=args.target_tables,
263
+ log_level=args.log_level,
264
+ )
265
+
266
+ if "load_lineage" in args.commands:
267
+ qdc_client = qdc.QDCExternalAPIClient(
268
+ base_url=args.api_url, client_id=args.client_id, client_secret=args.client_secret
269
+ )
270
+ load_lineage(
271
+ conn=conn,
272
+ endpoint=args.host,
273
+ qdc_client=qdc_client,
274
+ tenant_id=args.tenant_id,
275
+ enable_column_lineage=args.enable_column_lineage,
276
+ )
277
+
278
+ if "load_stats" in args.commands:
279
+ qdc_client = qdc.QDCExternalAPIClient(
280
+ base_url=args.api_url, client_id=args.client_id, client_secret=args.client_secret
281
+ )
282
+ databricks_column_stats(
283
+ conn=conn,
284
+ endpoint=args.host,
285
+ qdc_client=qdc_client,
286
+ tenant_id=args.tenant_id,
287
+ monitoring_table_suffix=args.monitoring_table_suffix,
288
+ )
@@ -0,0 +1,4 @@
1
+
2
+ target/
3
+ dbt_packages/
4
+ logs/
@@ -0,0 +1,5 @@
1
+ ### Quollio Intelligence Agent Support For Databricks
2
+ Notable Files:
3
+ 1. [quollio_lineage_table_level.sql](models/quollio_lineage_table_level.sql) - Generates table lineage data from Databricks system tables.
4
+ 2. [quollio_lineage_column_level.sql](models/quollio_lineage_table_level.sql) - Generates column lineage data from Databricks system tables.
5
+ 3. [sources.yml](models/sources.yml) - Refrences sources in the Databricks system catalog.
File without changes
@@ -0,0 +1,21 @@
1
+ name: 'quollio_intelligence_databricks'
2
+ version: '1.0.0'
3
+ config-version: 2
4
+
5
+ profile: 'quollio_intelligence_databricks'
6
+
7
+ model-paths: ["models"]
8
+ analysis-paths: ["analyses"]
9
+ test-paths: ["tests"]
10
+ seed-paths: ["seeds"]
11
+ macro-paths: ["macros"]
12
+ snapshot-paths: ["snapshots"]
13
+
14
+ clean-targets:
15
+ - "target"
16
+ - "dbt_packages"
17
+
18
+ models:
19
+ +dbt-osmosis: "{model}.yml"
20
+ # Databricks automatically enables grants on SQL endpoints
21
+ # https://docs.getdbt.com/reference/resource-configs/grants
File without changes
@@ -0,0 +1,73 @@
1
+ -- Gets full table lineage from Databricks
2
+ WITH columns_lineage_history AS (
3
+ SELECT
4
+ -- The databricks columns table does not have a full table name, create with CONCAT()
5
+ source_table_full_name AS upstream_table,
6
+ target_table_full_name as downstream_table,
7
+ source_column_name as upstream_column,
8
+ target_column_name as downstream_column,
9
+ event_time,
10
+ RANK() OVER (
11
+ PARTITION BY target_table_full_name
12
+ ORDER BY
13
+ event_time DESC
14
+ ) AS rank
15
+ FROM
16
+ {{ source('access','column_lineage') }}
17
+ WHERE
18
+ source_table_full_name IS NOT NULL
19
+ AND target_table_full_name IS NOT NULL
20
+ AND source_table_full_name NOT LIKE "%quollio%"
21
+ AND target_table_full_name NOT LIKE "%quollio%"
22
+ ),
23
+ -- Gets list of existing columns in catalogs
24
+ existing_columns (
25
+ SELECT
26
+ CONCAT(table_catalog, '.', table_schema, '.', table_name) AS table_full_name,
27
+ column_name
28
+ FROM
29
+ {{ source('inf_sch','columns') }}
30
+ ),
31
+
32
+ -- Checks if the downstream tables exists and group operations.
33
+ downstream_column_exists (
34
+ SELECT
35
+ upstream_table AS UPSTREAM_TABLE_NAME,
36
+ upstream_column AS UPSTREAM_COLUMN_NAME,
37
+ downstream_table AS DOWNSTREAM_TABLE_NAME,
38
+ downstream_column AS DOWNSTREAM_COLUMN_NAME,
39
+ event_time
40
+ FROM
41
+ columns_lineage_history clh
42
+ INNER JOIN existing_columns ec ON clh.downstream_table = ec.table_full_name
43
+ AND clh.downstream_column = ec.column_name
44
+ WHERE
45
+ rank = 1
46
+ GROUP BY UPSTREAM_TABLE, UPSTREAM_COLUMN, DOWNSTREAM_TABLE, DOWNSTREAM_COLUMN, EVENT_TIME
47
+ ),
48
+
49
+ -- Aggregates the column lineage
50
+ aggregated_column_lineage AS (
51
+ SELECT
52
+ downstream_table_name,
53
+ downstream_column_name,
54
+ collect_set(
55
+ named_struct(
56
+ 'upstream_table_name', upstream_table_name,
57
+ 'upstream_column_name', upstream_column_name
58
+ )
59
+ ) AS upstream_columns
60
+ FROM
61
+ downstream_column_exists
62
+ GROUP BY
63
+ downstream_table_name,
64
+ downstream_column_name
65
+ )
66
+
67
+ SELECT
68
+ downstream_table_name AS DOWNSTREAM_TABLE_NAME,
69
+ downstream_column_name AS DOWNSTREAM_COLUMN_NAME,
70
+ to_json(upstream_columns) AS UPSTREAM_COLUMNS
71
+ FROM
72
+ aggregated_column_lineage
73
+
@@ -0,0 +1,14 @@
1
+ version: 2
2
+
3
+ model:
4
+ - name: quollio_lineage_column_level
5
+ columns:
6
+ - name: UPSTREAM_COLUMNS
7
+ description: 'String column with all upstream columns in JSON format'
8
+ type: string
9
+ - name: DOWNSTREAM_TABLE_NAME
10
+ description: 'Full downstream table name in <catalog>.<schema>.<table> format'
11
+ type: string
12
+ - name: DOWNSTREAM_COLUMN_NAME
13
+ description: 'Downstream column name'
14
+ type: string
@@ -0,0 +1,63 @@
1
+ -- Gets full table lineage from Databricks
2
+ WITH table_lineage_history AS (
3
+ SELECT
4
+ source_table_full_name as upstream_table,
5
+ target_table_full_name as downstream_table,
6
+ target_type,
7
+ event_time,
8
+ RANK() OVER (
9
+ PARTITION BY target_table_full_name
10
+ ORDER BY
11
+ event_time DESC
12
+ ) AS rank
13
+ FROM
14
+ {{ source('access','table_lineage') }}
15
+ WHERE
16
+ source_table_full_name IS NOT NULL
17
+ AND target_table_full_name IS NOT NULL
18
+ AND source_table_full_name NOT LIKE "%quollio%"
19
+ AND target_table_full_name NOT LIKE "%quollio%"
20
+ ),
21
+ -- Gets list of existing tables in catalogs
22
+ existing_tables (
23
+ SELECT
24
+ CONCAT(table_catalog, '.', table_schema, '.', table_name) AS table_full_name
25
+ FROM
26
+ {{ source('inf_sch','tables') }}
27
+ ),
28
+
29
+ -- Checks if the downstream tables exists and group operations.
30
+ downstream_table_exists (
31
+ SELECT
32
+ upstream_table,
33
+ downstream_table,
34
+ target_type,
35
+ event_time
36
+ FROM
37
+ table_lineage_history tlh
38
+ INNER JOIN existing_tables et ON tlh.downstream_table = et.table_full_name
39
+ WHERE
40
+ rank = 1
41
+ GROUP BY upstream_table, downstream_table, target_type, event_time
42
+ ),
43
+
44
+ aggregated_table_lineage AS (
45
+ SELECT
46
+ downstream_table,
47
+ collect_set(
48
+ named_struct(
49
+ 'upstream_object_name', upstream_table
50
+ )
51
+ ) AS upstream_tables
52
+ FROM
53
+ downstream_table_exists
54
+ GROUP BY
55
+ downstream_table
56
+ )
57
+ SELECT
58
+ downstream_table as DOWNSTREAM_TABLE_NAME,
59
+ to_json(upstream_tables) as UPSTREAM_TABLES
60
+
61
+ FROM
62
+ aggregated_table_lineage
63
+
@@ -0,0 +1,11 @@
1
+ version: 2
2
+
3
+ model:
4
+ - name: quollio_lineage_table_level
5
+ columns:
6
+ - name: UPSTREAM_TABLES
7
+ description: 'String column with all upstream tables in JSON format'
8
+ type: string
9
+ - name: DOWNSTREAM_TABLE_NAME
10
+ description: 'Full downstream table name in <catalog>.<schema>.<table> format'
11
+ type: string