quollio-core 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
quollio_core/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  """Quollio Core"""
2
2
 
3
- __version__ = "0.4.6"
3
+ __version__ = "0.4.8"
4
4
  __author__ = "Quollio Technologies, Inc"
@@ -0,0 +1,114 @@
1
+ import argparse
2
+ import json
3
+ import logging
4
+
5
+ from quollio_core.helper.env_default import env_default
6
+ from quollio_core.profilers.bigquery import bigquery_table_lineage
7
+ from quollio_core.repository import qdc
8
+ from quollio_core.repository.bigquery import get_credentials, get_org_id
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ def load_lineage(
14
+ qdc_client: qdc.QDCExternalAPIClient, project_id: str, regions: list, tenant_id: str, credentials: dict, org_id: str
15
+ ):
16
+ bigquery_table_lineage(
17
+ qdc_client=qdc_client,
18
+ tenant_id=tenant_id,
19
+ project_id=project_id,
20
+ regions=regions,
21
+ credentials=credentials,
22
+ org_id=org_id,
23
+ )
24
+
25
+
26
+ if __name__ == "__main__":
27
+ parser = argparse.ArgumentParser(
28
+ prog="Quollio Intelligence Agent for Google BigQuery",
29
+ description="Collect lineage and stats from Google BigQuery and load to Quollio Data Catalog",
30
+ epilog="Copyright (c) 2024 Quollio Technologies, Inc.",
31
+ )
32
+ parser.add_argument(
33
+ "commands",
34
+ choices=["load_lineage"],
35
+ type=str,
36
+ nargs="+",
37
+ help="""
38
+ The command to execute.
39
+ 'load_lineage': Load lineage data from Google Data Catalog to Quollio,
40
+ """,
41
+ )
42
+ parser.add_argument(
43
+ "--credentials",
44
+ type=str,
45
+ action=env_default("GOOGLE_APPLICATION_CREDENTIALS"),
46
+ help="Crendentials for Google Cloud Platform",
47
+ )
48
+ parser.add_argument(
49
+ "--tenant_id",
50
+ type=str,
51
+ action=env_default("TENANT_ID"),
52
+ required=False,
53
+ help="The tenant id (company id) where the lineage and stats are loaded",
54
+ )
55
+ parser.add_argument(
56
+ "--api_url",
57
+ type=str,
58
+ action=env_default("QDC_API_URL"),
59
+ required=False,
60
+ help="The base URL of Quollio External API",
61
+ )
62
+ parser.add_argument(
63
+ "--client_id",
64
+ type=str,
65
+ action=env_default("QDC_CLIENT_ID"),
66
+ required=False,
67
+ help="The client id that is created on Quollio console to let clients access Quollio External API",
68
+ )
69
+ parser.add_argument(
70
+ "--client_secret",
71
+ type=str,
72
+ action=env_default("QDC_CLIENT_SECRET"),
73
+ required=False,
74
+ help="The client secret that is created on Quollio console to let clients access Quollio External API",
75
+ )
76
+ parser.add_argument(
77
+ "--project_id",
78
+ type=str,
79
+ action=env_default("GCP_PROJECT_ID"),
80
+ required=False,
81
+ help="GCP Project ID",
82
+ )
83
+ parser.add_argument(
84
+ "--regions",
85
+ type=str,
86
+ action=env_default("GCP_REGIONS"),
87
+ required=False,
88
+ help="GCP regions where the data is located. Multiple regions can be provided separated by space.",
89
+ nargs="+",
90
+ )
91
+
92
+ args = parser.parse_args()
93
+
94
+ if len(args.commands) == 0:
95
+ raise ValueError("No command is provided")
96
+
97
+ if "load_lineage" in args.commands:
98
+
99
+ qdc_client = qdc.QDCExternalAPIClient(
100
+ base_url=args.api_url, client_id=args.client_id, client_secret=args.client_secret
101
+ )
102
+
103
+ credentials_json = json.loads(args.credentials)
104
+ credentials = get_credentials(credentials_json=credentials_json)
105
+ org_id = get_org_id(credentials_json=credentials_json)
106
+
107
+ load_lineage(
108
+ qdc_client=qdc_client,
109
+ project_id=args.project_id,
110
+ regions=args.regions,
111
+ tenant_id=args.tenant_id,
112
+ credentials=credentials,
113
+ org_id=org_id,
114
+ )
quollio_core/bricks.py CHANGED
@@ -2,7 +2,7 @@ import argparse
2
2
  import logging
3
3
  import os
4
4
 
5
- from quollio_core.helper.core import setup_dbt_profile
5
+ from quollio_core.helper.core import setup_dbt_profile, trim_prefix
6
6
  from quollio_core.helper.env_default import env_default
7
7
  from quollio_core.profilers.databricks import (
8
8
  databricks_column_level_lineage,
@@ -17,7 +17,7 @@ logger = logging.getLogger(__name__)
17
17
 
18
18
  def build_view(
19
19
  conn: db.DatabricksConnectionConfig,
20
- target_tables: str,
20
+ target_tables: str = "",
21
21
  log_level: str = "info",
22
22
  ) -> None:
23
23
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
@@ -41,7 +41,13 @@ def build_view(
41
41
  options=["--no-use-colors", "--log-level", log_level],
42
42
  )
43
43
 
44
- run_options = ["--no-use-colors", "--log-level", log_level, "--select", target_tables]
44
+ run_options = ["--no-use-colors", "--log-level", log_level]
45
+
46
+ if target_tables is not None:
47
+ target_tables_str = " ".join(target_tables)
48
+ run_options.append("--select")
49
+ run_options.append(target_tables_str)
50
+
45
51
  dbt_client.invoke(
46
52
  cmd="run",
47
53
  project_dir=project_path,
@@ -53,20 +59,35 @@ def build_view(
53
59
 
54
60
  def load_lineage(
55
61
  conn: db.DatabricksConnectionConfig,
62
+ endpoint: str,
56
63
  qdc_client: qdc.QDCExternalAPIClient,
57
64
  tenant_id: str,
65
+ enable_column_lineage: bool = False,
58
66
  ) -> None:
59
67
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
60
68
 
61
69
  logger.info("Generate Databricks table to table lineage.")
62
70
  databricks_table_level_lineage(
63
- conn=conn, qdc_client=qdc_client, tenant_id=tenant_id, dbt_table_name="quollio_lineage_table_level"
71
+ conn=conn,
72
+ endpoint=endpoint,
73
+ qdc_client=qdc_client,
74
+ tenant_id=tenant_id,
75
+ dbt_table_name="quollio_lineage_table_level",
64
76
  )
65
77
 
66
- logger.info("Generate Databricks column to column lineage.")
67
- databricks_column_level_lineage(
68
- conn=conn, qdc_client=qdc_client, tenant_id=tenant_id, dbt_table_name="quollio_lineage_column_level"
69
- )
78
+ if enable_column_lineage:
79
+ logger.info(
80
+ f"enable_column_lineage is set to {enable_column_lineage}.Generate Databricks column to column lineage."
81
+ )
82
+ databricks_column_level_lineage(
83
+ conn=conn,
84
+ endpoint=endpoint,
85
+ qdc_client=qdc_client,
86
+ tenant_id=tenant_id,
87
+ dbt_table_name="quollio_lineage_column_level",
88
+ )
89
+ else:
90
+ logger.info("Skip column lineage ingestion. Set enable_column_lineage to True if you ingest column lineage.")
70
91
 
71
92
  logger.info("Lineage data is successfully loaded.")
72
93
  return
@@ -74,6 +95,7 @@ def load_lineage(
74
95
 
75
96
  def load_column_stats(
76
97
  conn: db.DatabricksConnectionConfig,
98
+ endpoint: str,
77
99
  qdc_client: qdc.QDCExternalAPIClient,
78
100
  tenant_id: str,
79
101
  ) -> None:
@@ -82,6 +104,7 @@ def load_column_stats(
82
104
  logger.info("Generate Databricks column stats.")
83
105
  databricks_column_stats(
84
106
  conn=conn,
107
+ endpoint=endpoint,
85
108
  qdc_client=qdc_client,
86
109
  tenant_id=tenant_id,
87
110
  )
@@ -106,7 +129,6 @@ if __name__ == "__main__":
106
129
  'build_view': Build views using dbt,
107
130
  'load_lineage': Load lineage data from created views to Quollio,
108
131
  'load_stats': Load stats from created views to Quollio,
109
- 'load_sqllineage': Load lineage data from sql parse result(alpha),
110
132
  """,
111
133
  )
112
134
  parser.add_argument(
@@ -193,8 +215,8 @@ if __name__ == "__main__":
193
215
  parser.add_argument(
194
216
  "--target_tables",
195
217
  type=str,
196
- nargs="*",
197
- choices=["quollio_lineage_table_level", "quollio_lineage_view_level"],
218
+ nargs="+",
219
+ choices=["quollio_lineage_table_level", "quollio_lineage_column_level"],
198
220
  action=env_default("DATABRICKS_TARGET_TABLES"),
199
221
  required=False,
200
222
  help="Target tables you want to create with dbt module. \
@@ -202,11 +224,29 @@ if __name__ == "__main__":
202
224
  Please specify table name with blank delimiter like tableA tableB \
203
225
  if you want to create two or more tables",
204
226
  )
227
+ parser.add_argument(
228
+ "--monitoring_table_suffix",
229
+ type=str,
230
+ action=env_default("DATABRICKS_MONITORING_TABLE_SUFFIX"),
231
+ required=False,
232
+ help="Sets the monitoring tables suffix for databricks. \
233
+ This is used to identify the monitoring tables created by the databricks monitoring tool. \
234
+ Default value is _profile_metrics",
235
+ )
236
+ parser.add_argument(
237
+ "--enable_column_lineage",
238
+ type=bool,
239
+ action=env_default("ENABLE_COLUMN_LINEAGE", store_true=True),
240
+ default=False,
241
+ required=False,
242
+ help="Whether to ingest column lineage into QDIC or not. Default value is False",
243
+ )
205
244
 
206
245
  args = parser.parse_args()
207
246
 
208
247
  conn = db.DatabricksConnectionConfig(
209
- host=args.host,
248
+ # MEMO: Metadata agent allows the string 'https://' as a host name but is not allowed by intelligence agent.
249
+ host=trim_prefix(args.host, "https://"),
210
250
  http_path=args.http_path,
211
251
  client_id=args.databricks_client_id,
212
252
  client_secret=args.databricks_client_secret,
@@ -228,10 +268,22 @@ if __name__ == "__main__":
228
268
  qdc_client = qdc.QDCExternalAPIClient(
229
269
  base_url=args.api_url, client_id=args.client_id, client_secret=args.client_secret
230
270
  )
231
- load_lineage(conn=conn, qdc_client=qdc_client, tenant_id=args.tenant_id)
271
+ load_lineage(
272
+ conn=conn,
273
+ endpoint=args.host,
274
+ qdc_client=qdc_client,
275
+ tenant_id=args.tenant_id,
276
+ enable_column_lineage=args.enable_column_lineage,
277
+ )
232
278
 
233
279
  if "load_stats" in args.commands:
234
280
  qdc_client = qdc.QDCExternalAPIClient(
235
281
  base_url=args.api_url, client_id=args.client_id, client_secret=args.client_secret
236
282
  )
237
- databricks_column_stats(conn=conn, qdc_client=qdc_client, tenant_id=args.tenant_id)
283
+ databricks_column_stats(
284
+ conn=conn,
285
+ endpoint=args.host,
286
+ qdc_client=qdc_client,
287
+ tenant_id=args.tenant_id,
288
+ monitoring_table_suffix=args.monitoring_table_suffix,
289
+ )
@@ -1,7 +1,7 @@
1
1
  version: 2
2
2
 
3
3
  model:
4
- - name: quollio_lineage_column_level
4
+ - name: quollio_lineage_table_level
5
5
  columns:
6
6
  - name: UPSTREAM_TABLES
7
7
  description: 'String column with all upstream tables in JSON format'
@@ -1,28 +1,67 @@
1
1
  {%- materialization divided_view, default %}
2
2
  {%- set identifier = model['alias'] %}
3
3
  {%- set target_relations = [] %}
4
- {%- set chunk = config.get('chunk') %}
5
4
  {%- set grant_config = config.get('grants') %}
6
5
 
7
6
  {{ run_hooks(pre_hooks, inside_transaction=False) }}
8
7
  -- `BEGIN` happens here:
9
8
  {{ run_hooks(pre_hooks, inside_transaction=True) }}
10
9
 
11
- -- fetch records
12
- {%- set query_quollio_stats_profiling_columns -%}
13
- SELECT * FROM {{ ref('quollio_stats_profiling_columns') }} WHERE table_name not like 'quollio_%'
10
+ -- fetch target_tables
11
+ {%- set query_stats_target_tables -%}
12
+ SELECT
13
+ distinct
14
+ database_name
15
+ , schema_name
16
+ , table_name
17
+ FROM
18
+ {{ ref('quollio_stats_profiling_columns') }}
19
+ WHERE
20
+ table_name not like 'quollio_%%'
14
21
  {%- endset -%}
15
- {%- set results = run_query(query_quollio_stats_profiling_columns) -%}
22
+ {%- set results = run_query(query_stats_target_tables) -%}
16
23
  {%- if execute -%}
17
- {%- set records = results.rows -%}
24
+ {%- set stats_target_tables = results.rows -%}
18
25
  {%- else -%}
19
- {%- set records = [] -%}
26
+ {%- set stats_target_tables = [] -%}
27
+ {%- endif -%}
28
+
29
+ -- skip creating views if the target profiling columns don't exist.
30
+ {%- if stats_target_tables | length == 0 -%}
31
+ {% call statement("main") %}
32
+ {{ log("No records found. Just execute select stmt for skipping call statement.", info=True) }}
33
+ select null
34
+ {% endcall %}
35
+ {%- set full_refresh_mode = (should_full_refresh()) -%}
36
+ {%- set should_revoke = should_revoke(target_relation, full_refresh_mode) %}
20
37
  {%- endif -%}
21
38
 
22
39
  -- build sql
23
- {%- for i in range(0, records|length, chunk) -%}
24
- {%- set build_sql %}
25
- {%- for record in records[i: i+chunk] -%}
40
+ {%- for stats_target_table in stats_target_tables -%}
41
+ -- get columns for statistics.
42
+ -- LISTAGG function can't be used for sys table, then it's necessary to get column for each table.
43
+ -- See https://docs.aws.amazon.com/redshift/latest/dg/c_join_PG.html.
44
+ {%- set stats_target_columns %}
45
+ SELECT
46
+ database_name
47
+ , schema_name
48
+ , table_name
49
+ , column_name
50
+ , is_bool
51
+ , is_calculable
52
+ FROM
53
+ {{ ref('quollio_stats_profiling_columns') }}
54
+ WHERE
55
+ database_name = '{{stats_target_table[0]}}'
56
+ AND schema_name = '{{stats_target_table[1]}}'
57
+ AND table_name = '{{stats_target_table[2]}}'
58
+ {%- endset -%}
59
+
60
+ {%- set results = run_query(stats_target_columns) -%}
61
+ {%- set stats_target_columns = results.rows -%}
62
+
63
+ {%- set sql_for_column_stats %}
64
+ {%- for stats_target_column in stats_target_columns -%}
26
65
  {%- if not loop.first -%}UNION{% endif %}
27
66
  SELECT
28
67
  main.db_name
@@ -41,33 +80,33 @@ SELECT * FROM {{ ref('quollio_stats_profiling_columns') }} WHERE table_name no
41
80
  (
42
81
  SELECT
43
82
  DISTINCT
44
- '{{record[0]}}'::varchar as db_name
45
- , '{{record[1]}}'::varchar as schema_name
46
- , '{{record[2]}}'::varchar as table_name
47
- , '{{record[3]}}'::varchar as column_name
48
- , {% if var("skip_heavy") == false and record[5] == true %}cast(max("{{record[3]}}") as varchar){% else %}null::varchar{% endif %} AS max_value
49
- , {% if var("skip_heavy") == false and record[5] == true %}cast(min("{{record[3]}}") as varchar){% else %}null::varchar{% endif %} AS min_value
83
+ '{{stats_target_column[0]}}'::varchar as db_name
84
+ , '{{stats_target_column[1]}}'::varchar as schema_name
85
+ , '{{stats_target_column[2]}}'::varchar as table_name
86
+ , '{{stats_target_column[3]}}'::varchar as column_name
87
+ , {% if var("aggregate_all") == True and stats_target_column[5] == True %}cast(max("{{stats_target_column[3]}}") as varchar){% else %}null::varchar{% endif %} AS max_value
88
+ , {% if var("aggregate_all") == True and stats_target_column[5] == True %}cast(min("{{stats_target_column[3]}}") as varchar){% else %}null::varchar{% endif %} AS min_value
50
89
  -- requires full table scan
51
- , {% if var("skip_heavy") == false %}cast(SUM(NVL2("{{record[3]}}", 0, 1)) as integer){% else %}null::integer{% endif %} AS null_count
52
- , APPROXIMATE COUNT(DISTINCT "{{record[3]}}") AS cardinality
90
+ , {% if var("aggregate_all") == True %}cast(SUM(NVL2("{{stats_target_column[3]}}", 0, 1)) as integer){% else %}null::integer{% endif %} AS null_count
91
+ , APPROXIMATE COUNT(DISTINCT "{{stats_target_column[3]}}") AS cardinality
53
92
  -- requires full table scan
54
- , {% if var("skip_heavy") == false and record[5] == true %}cast(avg("{{record[3]}}")as varchar){% else %}null::varchar{% endif %} AS avg_value
55
- , {% if var("skip_heavy") == false and record[5] == true %}cast(median("{{record[3]}}") as varchar){% else %}null::varchar{% endif %} AS median_value
93
+ , {% if var("aggregate_all") == True and stats_target_column[5] == True %}cast(avg("{{stats_target_column[3]}}")as varchar){% else %}null::varchar{% endif %} AS avg_value
94
+ , {% if var("aggregate_all") == True and stats_target_column[5] == True %}cast(median("{{stats_target_column[3]}}") as varchar){% else %}null::varchar{% endif %} AS median_value
56
95
  -- requires full table scan
57
- , {% if record[5] == true %}cast(STDDEV_SAMP("{{record[3]}}") as integer){% else %}null::integer{% endif %} AS stddev_value
58
- FROM {{ record[0] }}.{{ record[1] }}.{{ record[2] }}
96
+ , {% if stats_target_column[5] == True %}cast(STDDEV_SAMP("{{stats_target_column[3]}}") as integer){% else %}null::integer{% endif %} AS stddev_value
97
+ FROM {{ stats_target_column[0] }}.{{ stats_target_column[1] }}.{{ stats_target_column[2] }}
59
98
  ) main, (
60
- {%- if var("skip_heavy") == false and record[4] == false %}
99
+ {%- if var("aggregate_all") == True and stats_target_column[4] == false %}
61
100
  SELECT
62
- cast("{{record[3]}}" as varchar) mode_value
101
+ cast("{{stats_target_column[3]}}" as varchar) mode_value
63
102
  FROM (
64
103
  SELECT
65
104
  DISTINCT
66
- "{{record[3]}}"
105
+ "{{stats_target_column[3]}}"
67
106
  , ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC) AS row_num
68
- FROM {{ record[0] }}.{{ record[1] }}.{{ record[2] }}
107
+ FROM {{ stats_target_column[0] }}.{{ stats_target_column[1] }}.{{ stats_target_column[2] }}
69
108
  GROUP BY
70
- "{{record[3]}}"
109
+ "{{stats_target_column[3]}}"
71
110
  )
72
111
  WHERE
73
112
  row_num = 1
@@ -77,11 +116,11 @@ SELECT * FROM {{ ref('quollio_stats_profiling_columns') }} WHERE table_name no
77
116
  {% endfor -%}
78
117
  {%- endset %}
79
118
  -- create a view with a index as suffix
80
- {%- set target_identifier = "%s_%d"|format(model['name'], loop.index) %}
119
+ {%- set target_identifier = "%s_%s_%s_%s"|format(model['name'], stats_target_table[0], stats_target_table[1], stats_target_table[2]) %}
81
120
  {%- set target_relation = api.Relation.create(identifier=target_identifier, schema=schema, database=database, type='view') %}
82
121
  -- {{ drop_relation_if_exists(target_relation) }}
83
122
  {% call statement("main") %}
84
- {{ get_replace_view_sql(target_relation, build_sql) }}
123
+ {{ get_replace_view_sql(target_relation, sql_for_column_stats) }}
85
124
  {% endcall %}
86
125
  {%- set full_refresh_mode = (should_full_refresh()) -%}
87
126
  {%- set should_revoke = should_revoke(target_relation, full_refresh_mode) %}
@@ -1,7 +1,6 @@
1
1
  {{
2
2
  config(
3
- materialized='divided_view',
4
- chunk=20
3
+ materialized='divided_view'
5
4
  )
6
5
  }}
7
6
  -- depends_on: {{ ref('quollio_stats_profiling_columns') }}
@@ -1,51 +1,74 @@
1
1
  {%- materialization divided_view, default %}
2
2
  {%- set identifier = model['alias'] %}
3
3
  {%- set target_relations = [] %}
4
- {%- set chunk = config.get('chunk') %}
5
4
  {%- set grant_config = config.get('grants') %}
6
5
 
7
6
  {{ run_hooks(pre_hooks, inside_transaction=False) }}
8
7
  -- `BEGIN` happens here:
9
8
  {{ run_hooks(pre_hooks, inside_transaction=True) }}
10
9
 
11
- -- fetch records
12
- {%- set query_quollio_stats_profiling_columns -%}
13
- SELECT * FROM {{ ref('quollio_stats_profiling_columns') }} WHERE NOT startswith(table_name, 'QUOLLIO_')
10
+ -- fetch target_tables
11
+ {%- set query_stats_target_tables -%}
12
+ SELECT
13
+ TABLE_CATALOG
14
+ , TABLE_SCHEMA
15
+ , TABLE_NAME
16
+ , OBJECT_AGG(COLUMN_NAME, IS_CALCULABLE) AS COLUMNS_OBJ
17
+ FROM
18
+ {{ ref('quollio_stats_profiling_columns') }}
19
+ WHERE NOT startswith(table_name, 'QUOLLIO_')
20
+ GROUP BY
21
+ TABLE_CATALOG
22
+ , TABLE_SCHEMA
23
+ , TABLE_NAME
14
24
  {%- endset -%}
15
- {%- set results = run_query(query_quollio_stats_profiling_columns) -%}
25
+ {%- set results = run_query(query_stats_target_tables) -%}
16
26
  {%- if execute -%}
17
- {%- set records = results.rows -%}
27
+ {%- set stats_target_tables = results.rows -%}
18
28
  {%- else -%}
19
- {%- set records = [] -%}
29
+ {%- set stats_target_tables = [] -%}
30
+ {%- endif -%}
31
+
32
+ -- skip creating views if the target profiling columns don't exist.
33
+ {%- if stats_target_tables | length == 0 -%}
34
+ {% call statement("main") %}
35
+ {{ log("No records found. Just execute select stmt for skipping call statement.", info=True) }}
36
+ select null
37
+ {% endcall %}
38
+ {%- set full_refresh_mode = (should_full_refresh()) -%}
39
+ {%- set should_revoke = should_revoke(target_relation, full_refresh_mode) %}
20
40
  {%- endif -%}
21
41
 
22
- -- build sql
23
- {%- for i in range(0, records|length, chunk) -%}
24
- {%- set build_sql %}
25
- {%- for record in records[i: i+chunk] -%}
42
+ -- create view for each table
43
+ {%- for stats_target_table in stats_target_tables -%}
44
+ -- build sql for column value aggregation.
45
+ {%- set sql_for_column_stats %}
46
+ {% set columns_json = fromjson(stats_target_table[3]) %}
47
+ {%- for col_name, is_calclable in columns_json.items() -%}
26
48
  {%- if not loop.first %}UNION{% endif %}
27
49
  SELECT
28
50
  DISTINCT
29
- '{{record[0]}}' as db_name
30
- , '{{record[1]}}' as schema_name
31
- , '{{record[2]}}' as table_name
32
- , '{{record[3]}}' as column_name
33
- , {% if record[5] == true %}CAST(max("{{record[3]}}") AS STRING){% else %}null{% endif %} AS max_value
34
- , {% if record[5] == true %}CAST(min("{{record[3]}}") AS STRING){% else %}null{% endif %} AS min_value
35
- , COUNT_IF("{{record[3]}}" IS NULL) AS null_count
36
- , APPROX_COUNT_DISTINCT("{{record[3]}}") AS cardinality
37
- , {% if record[5] == true %}avg("{{record[3]}}"){% else %}null{% endif %} AS avg_value
38
- , {% if record[5] == true %}median("{{record[3]}}"){% else %}null{% endif %} AS median_value
39
- , {% if record[5] == true %}approx_top_k("{{record[3]}}")[0][0]{% else %}null{% endif %} AS mode_value
40
- , {% if record[5] == true %}stddev("{{record[3]}}"){% else %}null{% endif %} AS stddev_value
41
- FROM {{ record[0] }}.{{ record[1] }}.{{ record[2] }} {{ var("sample_method") }}
51
+ '{{stats_target_table[0]}}' as db_name
52
+ , '{{stats_target_table[1]}}' as schema_name
53
+ , '{{stats_target_table[2]}}' as table_name
54
+ , '{{col_name}}' as column_name
55
+ , {% if is_calclable == True %}CAST(MAX("{{col_name}}") AS STRING){% else %}NULL{% endif %} AS max_value
56
+ , {% if is_calclable == True %}CAST(MIN("{{col_name}}") AS STRING){% else %}NULL{% endif %} AS min_value
57
+ , COUNT_IF("{{col_name}}" IS NULL) AS null_count
58
+ , APPROX_COUNT_DISTINCT("{{col_name}}") AS cardinality
59
+ , {% if is_calclable == True %}AVG("{{col_name}}"){% else %}NULL{% endif %} AS avg_value
60
+ , {% if is_calclable == True %}MEDIAN("{{col_name}}"){% else %}NULL{% endif %} AS median_value
61
+ , {% if is_calclable == True %}APPROX_TOP_K("{{col_name}}")[0][0]{% else %}NULL{% endif %} AS mode_value
62
+ , {% if is_calclable == True %}STDDEV("{{col_name}}"){% else %}NULL{% endif %} AS stddev_value
63
+ FROM "{{stats_target_table[0]}}"."{{stats_target_table[1]}}"."{{stats_target_table[2]}}" {{ var("sample_method") }}
42
64
  {% endfor -%}
43
65
  {%- endset %}
66
+
44
67
  -- create a view with a index as suffix
45
- {%- set target_identifier = "%s_%d"|format(model['name'], loop.index) %}
46
- {%- set target_relation = api.Relation.create(identifier=target_identifier, schema=schema, database=database, type='view') %}
68
+ {%- set stats_view_identifier = "%s_%s_%s_%s"|format(model['name'], stats_target_table[0], stats_target_table[1], stats_target_table[2]) %}
69
+ {%- set target_relation = api.Relation.create(identifier=stats_view_identifier, schema=schema, database=database, type='view') %}
47
70
  {% call statement("main") %}
48
- {{ get_create_view_as_sql(target_relation, build_sql) }}
71
+ {{ get_create_view_as_sql(target_relation, sql_for_column_stats) }}
49
72
  {% endcall %}
50
73
  {%- set full_refresh_mode = (should_full_refresh()) -%}
51
74
  {%- set should_revoke = should_revoke(target_relation, full_refresh_mode) %}
@@ -1,7 +1,6 @@
1
1
  {{
2
2
  config(
3
- materialized='divided_view',
4
- chunk=20
3
+ materialized='divided_view'
5
4
  )
6
5
  }}
7
6
  -- depends_on: {{ ref('quollio_stats_profiling_columns') }}
@@ -36,24 +36,61 @@ WITH columns AS (
36
36
  table_catalog
37
37
  , table_schema
38
38
  , name
39
+ ), m_view_sys_columns AS (
40
+ SELECT
41
+ cols.table_catalog
42
+ , cols.table_schema
43
+ , cols.table_name
44
+ , cols.column_name
45
+ , cols.data_type
46
+ FROM
47
+ {{ source('account_usage', 'COLUMNS') }} cols
48
+ LEFT OUTER JOIN
49
+ {{ source('account_usage', 'TABLES') }} tbls
50
+ ON
51
+ cols.table_catalog = tbls.table_catalog
52
+ AND cols.table_schema = tbls.table_schema
53
+ AND cols.table_name = tbls.table_name
54
+ WHERE
55
+ tbls.table_type = 'MATERIALIZED VIEW'
56
+ AND cols.column_name = 'SYS_MV_SOURCE_PARTITION'
57
+ ), implicit_columns_removed AS (
58
+ SELECT
59
+ c.table_catalog
60
+ , c.table_schema
61
+ , c.table_name
62
+ , c.column_name
63
+ , c.data_type
64
+ FROM
65
+ columns c
66
+ INNER JOIN
67
+ accessible_tables a
68
+ ON
69
+ c.table_catalog = a.table_catalog
70
+ AND c.table_schema = a.table_schema
71
+ AND c.table_name = a.name
72
+ MINUS
73
+ SELECT
74
+ table_catalog
75
+ , table_schema
76
+ , table_name
77
+ , column_name
78
+ , data_type
79
+ FROM
80
+ m_view_sys_columns
81
+ ), final AS (
82
+ SELECT
83
+ table_catalog
84
+ , table_schema
85
+ , table_name
86
+ , column_name
87
+ , data_type
88
+ , case when data_type in('NUMBER','DECIMAL', 'DEC', 'NUMERIC',
89
+ 'INT', 'INTEGER', 'BIGINT', 'SMALLINT',
90
+ 'TINYINT', 'BYTEINT')
91
+ THEN true
92
+ else false END AS is_calculable
93
+ FROM
94
+ implicit_columns_removed
39
95
  )
40
-
41
- SELECT
42
- c.table_catalog
43
- , c.table_schema
44
- , c.table_name
45
- , c.column_name
46
- , c.data_type
47
- , case when c.data_type in('NUMBER','DECIMAL', 'DEC', 'NUMERIC',
48
- 'INT', 'INTEGER', 'BIGINT', 'SMALLINT',
49
- 'TINYINT', 'BYTEINT')
50
- THEN true
51
- else false END AS is_calculable
52
- FROM
53
- columns c
54
- INNER JOIN
55
- accessible_tables a
56
- ON
57
- c.table_catalog = a.table_catalog
58
- AND c.table_schema = a.table_schema
59
- AND c.table_name = a.name
96
+ select * from final
@@ -31,3 +31,7 @@ def setup_dbt_profile(connections_json: Dict[str, str], template_path: str, temp
31
31
  with open(profile_path, "w") as profiles:
32
32
  yaml.dump(yaml.safe_load(profiles_body), profiles, default_flow_style=False, allow_unicode=True)
33
33
  return
34
+
35
+
36
+ def trim_prefix(s: str, prefix: str) -> str:
37
+ return s.lstrip(prefix)
@@ -6,6 +6,8 @@ Currently requires explicit naming of env vars to check for
6
6
 
7
7
  import argparse
8
8
  import os
9
+ from distutils.util import strtobool
10
+ from typing import Union
9
11
 
10
12
 
11
13
  # Courtesy of http://stackoverflow.com/a/10551190 with env-var retrieval fixed
@@ -28,9 +30,30 @@ class EnvDefault(argparse.Action):
28
30
  setattr(namespace, self.dest, values)
29
31
 
30
32
 
33
+ class EnvStoreTrue(argparse._StoreTrueAction):
34
+ """An argparse action class that auto-sets missing default values from env vars for store_true."""
35
+
36
+ def __init__(self, envvar, required=True, default=None, **kwargs):
37
+ # Only pass the arguments that argparse._StoreTrueAction expects
38
+ action_kwargs = {key: value for key, value in kwargs.items() if key in ("option_strings", "dest")}
39
+ if envvar in os.environ:
40
+ default = _convert_value_to_bool(os.environ[envvar])
41
+ if required and default:
42
+ required = False
43
+ super(EnvStoreTrue, self).__init__(default=default, required=required, **action_kwargs)
44
+
45
+
31
46
  # functional sugar for the above
32
- def env_default(envvar):
47
+ def env_default(envvar, store_true=False):
33
48
  def wrapper(**kwargs):
49
+ if store_true:
50
+ return EnvStoreTrue(envvar, **kwargs)
34
51
  return EnvDefault(envvar, **kwargs)
35
52
 
36
53
  return wrapper
54
+
55
+
56
+ def _convert_value_to_bool(v: Union[str, bool]) -> bool:
57
+ if isinstance(v, str):
58
+ return bool(strtobool(v))
59
+ return v
@@ -0,0 +1,81 @@
1
+ import logging
2
+ from typing import Any, Dict, List
3
+
4
+ from quollio_core.profilers.lineage import gen_table_lineage_payload, parse_bigquery_table_lineage
5
+ from quollio_core.repository import qdc
6
+ from quollio_core.repository.bigquery import BigQueryClient, GCPLineageClient, get_entitiy_reference, get_search_request
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ def bigquery_table_lineage(
12
+ qdc_client: qdc.QDCExternalAPIClient,
13
+ tenant_id: str,
14
+ project_id: str,
15
+ regions: list,
16
+ org_id: str,
17
+ credentials: Any,
18
+ ):
19
+ lineage_client = GCPLineageClient(credentials)
20
+ bq_client = BigQueryClient(credentials)
21
+
22
+ datasets = bq_client.list_datasets(project_id)
23
+ all_tables = generate_table_list(datasets, bq_client)
24
+ lineage_links = generate_lineage_links(all_tables, lineage_client, project_id, regions)
25
+ lineage_links = parse_bigquery_table_lineage(lineage_links)
26
+
27
+ update_table_lineage_inputs = gen_table_lineage_payload(tenant_id=tenant_id, endpoint=org_id, tables=lineage_links)
28
+
29
+ req_count = 0
30
+ for update_table_lineage_input in update_table_lineage_inputs:
31
+ logger.info(
32
+ "Generating table lineage. downstream: %s -> %s-> %s",
33
+ update_table_lineage_input.downstream_database_name,
34
+ update_table_lineage_input.downstream_schema_name,
35
+ update_table_lineage_input.downstream_table_name,
36
+ )
37
+ status_code = qdc_client.update_lineage_by_id(
38
+ global_id=update_table_lineage_input.downstream_global_id,
39
+ payload=update_table_lineage_input.upstreams.as_dict(),
40
+ )
41
+ if status_code == 200:
42
+ req_count += 1
43
+ logger.info("Generating table lineage is finished. %s lineages are ingested.", req_count)
44
+
45
+
46
+ def generate_table_list(datasets: List[str], bq_client: BigQueryClient) -> List[str]:
47
+ all_tables = []
48
+ for dataset in datasets:
49
+ all_tables.extend(
50
+ [
51
+ table
52
+ for table in bq_client.list_tables(dataset.dataset_id)
53
+ if table.table_type in ["TABLE", "VIEW", "MATERIALIZED_VIEW"]
54
+ ]
55
+ )
56
+
57
+ all_table_names = []
58
+ for table in all_tables:
59
+ all_table_names.append(f"{table.project}.{table.dataset_id}.{table.table_id}")
60
+
61
+ return all_table_names
62
+
63
+
64
+ def generate_lineage_links(
65
+ all_tables: List[str], lineage_client: GCPLineageClient, project_id: str, regions: List[str]
66
+ ) -> Dict[str, List[str]]:
67
+ lineage_links = {}
68
+ for table in all_tables:
69
+ downstream = get_entitiy_reference()
70
+ downstream.fully_qualified_name = f"bigquery:{table}"
71
+
72
+ for region in regions:
73
+ request = get_search_request(downstream_table=downstream, project_id=project_id, region=region)
74
+ response = lineage_client.get_links(request=request)
75
+ for lineage in response:
76
+ target_table = str(lineage.target.fully_qualified_name).replace("bigquery:", "")
77
+ if target_table not in lineage_links:
78
+ lineage_links[target_table] = []
79
+ lineage_links[target_table].append(str(lineage.source.fully_qualified_name).replace("bigquery:", ""))
80
+
81
+ return lineage_links
@@ -14,6 +14,7 @@ logger = logging.getLogger(__name__)
14
14
 
15
15
  def databricks_table_level_lineage(
16
16
  conn: databricks.DatabricksConnectionConfig,
17
+ endpoint: str,
17
18
  qdc_client: qdc.QDCExternalAPIClient,
18
19
  tenant_id: str,
19
20
  dbt_table_name: str = "quollio_lineage_table_level",
@@ -31,7 +32,7 @@ def databricks_table_level_lineage(
31
32
  tables = parse_databricks_table_lineage(results)
32
33
  update_table_lineage_inputs = gen_table_lineage_payload(
33
34
  tenant_id=tenant_id,
34
- endpoint=conn.host,
35
+ endpoint=endpoint,
35
36
  tables=tables,
36
37
  )
37
38
 
@@ -55,6 +56,7 @@ def databricks_table_level_lineage(
55
56
 
56
57
  def databricks_column_level_lineage(
57
58
  conn: databricks.DatabricksConnectionConfig,
59
+ endpoint: str,
58
60
  qdc_client: qdc.QDCExternalAPIClient,
59
61
  tenant_id: str,
60
62
  dbt_table_name: str = "quollio_lineage_column_level",
@@ -72,7 +74,7 @@ def databricks_column_level_lineage(
72
74
 
73
75
  update_column_lineage_inputs = gen_column_lineage_payload(
74
76
  tenant_id=tenant_id,
75
- endpoint=conn.host,
77
+ endpoint=endpoint,
76
78
  columns=results,
77
79
  )
78
80
 
@@ -99,7 +101,7 @@ def databricks_column_level_lineage(
99
101
 
100
102
 
101
103
  def _get_monitoring_tables(
102
- conn: databricks.DatabricksConnectionConfig, monitoring_table_id: str = "_profile_metrics"
104
+ conn: databricks.DatabricksConnectionConfig, monitoring_table_suffix: str = "_profile_metrics"
103
105
  ) -> List[Dict[str, str]]:
104
106
  tables = []
105
107
  query = f"""
@@ -110,7 +112,9 @@ def _get_monitoring_tables(
110
112
  CONCAT(table_catalog, '.', table_schema, '.', table_name) AS table_fqdn
111
113
  FROM
112
114
  system.information_schema.tables
113
- WHERE table_name LIKE "%{monitoring_table_id}"
115
+ WHERE
116
+ table_name LIKE "%{monitoring_table_suffix}"
117
+ AND table_name NOT LIKE ('quollio_%')
114
118
  """
115
119
  with databricks.DatabricksQueryExecutor(config=conn) as databricks_executor:
116
120
  tables = databricks_executor.get_query_results(query)
@@ -123,9 +127,9 @@ def _get_monitoring_tables(
123
127
 
124
128
 
125
129
  def _get_column_stats(
126
- conn: databricks.DatabricksConnectionConfig, monitoring_table_id: str = "_profile_metrics"
130
+ conn: databricks.DatabricksConnectionConfig, monitoring_table_suffix: str = "_profile_metrics"
127
131
  ) -> List[Dict[str, str]]:
128
- tables = _get_monitoring_tables(conn, monitoring_table_id)
132
+ tables = _get_monitoring_tables(conn, monitoring_table_suffix)
129
133
  if not tables:
130
134
  return []
131
135
  stats = []
@@ -153,6 +157,8 @@ def _get_column_stats(
153
157
  MAX(t.window) AS LATEST
154
158
  FROM
155
159
  {monitoring_table} t
160
+ WHERE
161
+ t.column_name not in (':table')
156
162
  GROUP BY
157
163
  t.COLUMN_NAME,
158
164
  t.DATA_TYPE,
@@ -176,13 +182,14 @@ def _get_column_stats(
176
182
 
177
183
  def databricks_column_stats(
178
184
  conn: databricks.DatabricksConnectionConfig,
185
+ endpoint: str,
179
186
  qdc_client: qdc.QDCExternalAPIClient,
180
187
  tenant_id: str,
181
- monitoring_table_id: str = "_profile_metrics",
188
+ monitoring_table_suffix: str = "_profile_metrics",
182
189
  ) -> None:
183
- table_stats = _get_column_stats(conn, monitoring_table_id)
190
+ table_stats = _get_column_stats(conn, monitoring_table_suffix)
184
191
  for table in table_stats:
185
- stats = gen_table_stats_payload(tenant_id, conn.host, table)
192
+ stats = gen_table_stats_payload(tenant_id=tenant_id, endpoint=endpoint, stats=table)
186
193
  for stat in stats:
187
194
  status_code = qdc_client.update_stats_by_id(
188
195
  global_id=stat.global_id,
@@ -153,3 +153,17 @@ def parse_databricks_table_lineage(results: List) -> List[Dict[str, Dict]]:
153
153
  payload["UPSTREAM_TABLES"] = json.loads(result["UPSTREAM_TABLES"])
154
154
  payloads.append(payload)
155
155
  return payloads
156
+
157
+
158
+ def parse_bigquery_table_lineage(tables: Dict) -> List[Dict[str, Dict]]:
159
+ payloads = list()
160
+ for downstream, upstream in tables.items():
161
+ payload = {
162
+ "DOWNSTREAM_TABLE_NAME": "",
163
+ "UPSTREAM_TABLES": [],
164
+ }
165
+ payload["DOWNSTREAM_TABLE_NAME"] = downstream
166
+ for upstream_table in upstream:
167
+ payload["UPSTREAM_TABLES"].append({"upstream_object_name": upstream_table})
168
+ payloads.append(payload)
169
+ return payloads
quollio_core/redshift.py CHANGED
@@ -16,7 +16,7 @@ logger = logging.getLogger(__name__)
16
16
 
17
17
  def build_view(
18
18
  conn: redshift.RedshiftConnectionConfig,
19
- skip_heavy: bool,
19
+ aggregate_all: bool = False,
20
20
  target_tables: str = "",
21
21
  log_level: str = "info",
22
22
  ) -> None:
@@ -29,9 +29,9 @@ def build_view(
29
29
  project_path = f"{current_dir}/dbt_projects/redshift"
30
30
  template_path = f"{current_dir}/dbt_projects/redshift/profiles"
31
31
  template_name = "profiles_template.yml"
32
- options = '{{"query_user": {query_user}, "skip_heavy": {skip_heavy}, "target_database": {database}}}'.format(
32
+ options = '{{"query_user": {query_user}, "aggregate_all": {aggregate_all}, "target_database": {database}}}'.format(
33
33
  query_user=conn.query_user,
34
- skip_heavy=skip_heavy,
34
+ aggregate_all=aggregate_all,
35
35
  database=conn.database,
36
36
  )
37
37
 
@@ -210,12 +210,12 @@ if __name__ == "__main__":
210
210
  help="Target schema name where the views are built by dbt",
211
211
  )
212
212
  parser.add_argument(
213
- "--skip_heavy",
213
+ "--aggregate_all",
214
214
  type=bool,
215
- action=env_default("REDSHIFT_SKIP_HEAVY"),
216
- default=True,
215
+ action=env_default("REDSHIFT_AGGREGATE_ALL", store_true=True),
216
+ default=False,
217
217
  required=False,
218
- help="Skip heavy queries when building views by dbt",
218
+ help="Aggregate all stats values. False by default.",
219
219
  )
220
220
  parser.add_argument(
221
221
  "--target_tables",
@@ -283,7 +283,7 @@ if __name__ == "__main__":
283
283
  if "build_view" in args.commands:
284
284
  build_view(
285
285
  conn=conn,
286
- skip_heavy=args.skip_heavy,
286
+ aggregate_all=args.aggregate_all,
287
287
  target_tables=args.target_tables,
288
288
  log_level=args.log_level,
289
289
  )
@@ -0,0 +1,61 @@
1
+ import logging
2
+
3
+ from google.cloud.bigquery import Client
4
+ from google.cloud.datacatalog_lineage_v1 import EntityReference, LineageClient, SearchLinksRequest
5
+ from google.oauth2.service_account import Credentials
6
+ from googleapiclient.discovery import build
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class BigQueryClient:
12
+ def __init__(self, credentials: Credentials) -> None:
13
+ self.client = self.__initialze(credentials=credentials)
14
+
15
+ def __initialze(self, credentials: Credentials) -> Client:
16
+ client = Client(credentials=credentials)
17
+ return client
18
+
19
+ def list_datasets(self, project_id) -> list:
20
+ datasets = list(self.client.list_datasets(project_id))
21
+ logger.debug("Found %s datasets in project %s", len(datasets), project_id)
22
+ return datasets
23
+
24
+ def list_tables(self, dataset_id) -> list:
25
+ tables = list(self.client.list_tables(dataset_id))
26
+ logger.debug("Found %s tables in dataset %s", len(tables), dataset_id)
27
+ return list(self.client.list_tables(dataset_id))
28
+
29
+
30
+ class GCPLineageClient:
31
+ def __init__(self, credentials: Credentials) -> None:
32
+ self.client = self.__initialze(credentials=credentials)
33
+
34
+ def __initialze(self, credentials: Credentials) -> LineageClient:
35
+ client = LineageClient(credentials=credentials)
36
+ return client
37
+
38
+ def get_links(self, request: SearchLinksRequest) -> list:
39
+ response = self.client.search_links(request)
40
+ return response.links
41
+
42
+
43
+ def get_entitiy_reference() -> EntityReference:
44
+ return EntityReference()
45
+
46
+
47
+ def get_search_request(downstream_table: EntityReference, project_id: str, region: str) -> SearchLinksRequest:
48
+ return SearchLinksRequest(target=downstream_table, parent=f"projects/{project_id}/locations/{region.lower()}")
49
+
50
+
51
+ def get_credentials(credentials_json: dict) -> Credentials:
52
+ return Credentials.from_service_account_info(credentials_json)
53
+
54
+
55
+ def get_org_id(credentials_json: dict) -> str:
56
+ credentials = get_credentials(credentials_json)
57
+ crm_service = build("cloudresourcemanager", "v1", credentials=credentials)
58
+ project_id = credentials_json["project_id"]
59
+ project = crm_service.projects().get(projectId=project_id).execute()
60
+ org_id = project["parent"]["id"]
61
+ return org_id
quollio_core/snowflake.py CHANGED
@@ -72,6 +72,7 @@ def load_lineage(
72
72
  conn: snowflake.SnowflakeConnectionConfig,
73
73
  qdc_client: qdc.QDCExternalAPIClient,
74
74
  tenant_id: str,
75
+ enable_column_lineage: bool = False,
75
76
  ) -> None:
76
77
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
77
78
 
@@ -82,12 +83,17 @@ def load_lineage(
82
83
  tenant_id=tenant_id,
83
84
  )
84
85
 
85
- logger.info("Generate Snowflake column to column lineage.")
86
- snowflake_column_to_column_lineage(
87
- conn=conn,
88
- qdc_client=qdc_client,
89
- tenant_id=tenant_id,
90
- )
86
+ if enable_column_lineage:
87
+ logger.info(
88
+ f"enable_column_lineage is set to {enable_column_lineage}.Generate Snowflake column to column lineage."
89
+ )
90
+ snowflake_column_to_column_lineage(
91
+ conn=conn,
92
+ qdc_client=qdc_client,
93
+ tenant_id=tenant_id,
94
+ )
95
+ else:
96
+ logger.info("Skip column lineage ingestion. Set enable_column_lineage to True if you ingest column lineage.")
91
97
 
92
98
  logger.info("Lineage data is successfully loaded.")
93
99
 
@@ -264,6 +270,14 @@ if __name__ == "__main__":
264
270
  required=False,
265
271
  help="The client secrete that is created on Quollio console to let clients access Quollio External API",
266
272
  )
273
+ parser.add_argument(
274
+ "--enable_column_lineage",
275
+ type=bool,
276
+ action=env_default("ENABLE_COLUMN_LINEAGE", store_true=True),
277
+ default=False,
278
+ required=False,
279
+ help="Whether to ingest column lineage into QDIC or not. Default value is False",
280
+ )
267
281
  args = parser.parse_args()
268
282
  conn = snowflake.SnowflakeConnectionConfig(
269
283
  account_id=args.account_id,
@@ -296,6 +310,7 @@ if __name__ == "__main__":
296
310
  conn=conn,
297
311
  qdc_client=qdc_client,
298
312
  tenant_id=args.tenant_id,
313
+ enable_column_lineage=args.enable_column_lineage,
299
314
  )
300
315
  if "load_stats" in args.commands:
301
316
  qdc_client = qdc.QDCExternalAPIClient(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: quollio-core
3
- Version: 0.4.6
3
+ Version: 0.4.8
4
4
  Summary: Quollio Core
5
5
  Author-email: quollio-dev <qt.dev@quollio.com>
6
6
  Maintainer-email: RyoAriyama <ryo.arym@gmail.com>, tharuta <35373297+TakumiHaruta@users.noreply.github.com>
@@ -31,6 +31,11 @@ Requires-Dist: snowflake-connector-python==3.5.0
31
31
  Requires-Dist: databricks-sdk==0.17.0
32
32
  Requires-Dist: databricks-sql-connector==2.9.5
33
33
  Requires-Dist: sqlglot==20.8.0
34
+ Requires-Dist: google-cloud==0.34.0
35
+ Requires-Dist: google-cloud-bigquery==3.22.0
36
+ Requires-Dist: google-cloud-datacatalog==3.19.0
37
+ Requires-Dist: google-cloud-datacatalog-lineage==0.3.6
38
+ Requires-Dist: google-api-python-client==2.131.0
34
39
  Requires-Dist: black>=22.3.0 ; extra == "test"
35
40
  Requires-Dist: coverage>=7.3.2 ; extra == "test"
36
41
  Requires-Dist: isort>=5.10.1 ; extra == "test"
@@ -74,7 +79,7 @@ To see available commands and options, please run the following command. (ex: Sn
74
79
  コマンドやオプションの詳細については、下記のコマンドを実行してください。(例: Snowflake)
75
80
 
76
81
  ```
77
- $ python3 -m quollio_core.snowflake -h
82
+ $ python -m quollio_core.snowflake -h
78
83
  ```
79
84
 
80
85
  Then run commands with the options provided.
@@ -1,7 +1,8 @@
1
- quollio_core/__init__.py,sha256=ruo7YFCZ8vY6Xsa1V1XTfT-ulLkKuB1As4n1BJcylfs,83
2
- quollio_core/bricks.py,sha256=PCHyh_I6M4PBRpLDtc5DTr7rpharllu-vcSAhySM4xg,8001
3
- quollio_core/redshift.py,sha256=wap7QmV-YuHZAomIrHXytGUuxhQ5MFEb38QDY3XrThQ,10167
4
- quollio_core/snowflake.py,sha256=8IMbdTjCDBIiS_GF8APWRTVWNj6EM3ZT8MRN12T-1v0,10266
1
+ quollio_core/__init__.py,sha256=bdxhTj5XUDW12XHez-8E9LL3JCLZ2EQpEibqkSXg8xQ,83
2
+ quollio_core/bigquery.py,sha256=2DrUMo4evcH4BHiUtnY48IjmsdAsQMoPGtNx8SRoyzQ,3528
3
+ quollio_core/bricks.py,sha256=BVwh9clJMXe_YXbd78ku6Y9470cYYE3CflcuCRzrY3I,9689
4
+ quollio_core/redshift.py,sha256=wVqtNnv1fmZ5QwYq4A2JVLHH3VrCYPEZBx39NoCqRIM,10220
5
+ quollio_core/snowflake.py,sha256=WRtnXxc01AaV4oTa382MTppYvmOn4mOY7jFDNDnzxDA,10922
5
6
  quollio_core/dbt_projects/databricks/.gitignore,sha256=1jJAyXSzJ3YUm0nx3i7wUSE4RjQMX3ad6F8O88UbtzI,29
6
7
  quollio_core/dbt_projects/databricks/README.md,sha256=ZpRQyhFAODAiS8dc1Kb_ndkul4cu4o4udN_EMa49CU4,440
7
8
  quollio_core/dbt_projects/databricks/dbt_project.yml,sha256=3sH98RNk7TnphvI3yEdXDstb92kW5BNxr-cT0tXhwzk,480
@@ -12,7 +13,7 @@ quollio_core/dbt_projects/databricks/macros/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5
12
13
  quollio_core/dbt_projects/databricks/models/quollio_lineage_column_level.sql,sha256=mZ4mDCEZTwiSgCUr-w2QGze2-NQapt45EyQNQkCOI5I,2171
13
14
  quollio_core/dbt_projects/databricks/models/quollio_lineage_column_level.yml,sha256=tidAK_FMhYYuPTxFoactwcXYQPSMZwQTxWrGBly4-1o,450
14
15
  quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.sql,sha256=K63J7n7NIM2Jc7c4IF21JcW8AYOm9HxBNDiveUE4kzU,1558
15
- quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.yml,sha256=ZGjz6C2bguDJxJyA7LhCHbuyZSRPEaRMXln9rxcotuo,344
16
+ quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.yml,sha256=0nQhdU5gBlHLAJHwsPotgCVrm6tReBdEdczvnxnvmpo,343
16
17
  quollio_core/dbt_projects/databricks/models/sources.yml,sha256=JXU-8lNsKm8dxIjmWos1vbTsWiea-9-pXnntik63ZpA,2231
17
18
  quollio_core/dbt_projects/databricks/profiles/profiles_template.yml,sha256=Dw1RuTrE04yvGIaPQL7uc6pgSWloKHhu0KrduzJ1Z6M,353
18
19
  quollio_core/dbt_projects/databricks/seeds/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -23,14 +24,14 @@ quollio_core/dbt_projects/redshift/package-lock.yml,sha256=Gef3zDCLF41j_FL-_h3sI
23
24
  quollio_core/dbt_projects/redshift/packages.yml,sha256=p9Bl2C44gdC6iYTUkz_15yq3xahSJf2IA3WOXLF_ahA,61
24
25
  quollio_core/dbt_projects/redshift/analyses/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
26
  quollio_core/dbt_projects/redshift/macros/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
- quollio_core/dbt_projects/redshift/macros/materialization/divided_view.sql,sha256=3tRQeXXdjn3aSZ94DgMN6A6yMlTCE8aMKXeIdcZVDIM,3998
27
+ quollio_core/dbt_projects/redshift/macros/materialization/divided_view.sql,sha256=rSKZJHpdU7vy0HvrI10nmnjFMkB5OI3oRmHfAvkjlyk,5686
27
28
  quollio_core/dbt_projects/redshift/models/quollio_lineage_table_level.sql,sha256=AVPcNXfVYHwyutJzg61QT_VF9umfoC4i8C2HecAU4d4,2042
28
29
  quollio_core/dbt_projects/redshift/models/quollio_lineage_table_level.yml,sha256=UcrXpUTT3ihBHKPljvjw8xHz-ND60PfvMJaXqGKOEic,236
29
30
  quollio_core/dbt_projects/redshift/models/quollio_lineage_view_level.sql,sha256=A0CTgQwlz8InabA0cHuygV2GMZGYuAa7Zd5DIUOYzQI,1289
30
31
  quollio_core/dbt_projects/redshift/models/quollio_lineage_view_level.yml,sha256=7Npwo3svL9715HpNU2MKzRI014Da4tIStLzAHmd0UaU,235
31
32
  quollio_core/dbt_projects/redshift/models/quollio_sqllineage_sources.sql,sha256=e0A_Wqv_OcC8gG_yzTbI59vT-4vCI3JiAzFlmkvLnMk,1049
32
33
  quollio_core/dbt_projects/redshift/models/quollio_sqllineage_sources.yml,sha256=qgazupx3ca4P8R0loY5F9hyCz2fmAcWqZ6iOySo_NoY,377
33
- quollio_core/dbt_projects/redshift/models/quollio_stats_columns.sql,sha256=lH8xPmAzSW-6wi_g1y_LFVhtFgHzBvTweVX-MKeJzUQ,302
34
+ quollio_core/dbt_projects/redshift/models/quollio_stats_columns.sql,sha256=BzvP9gKMFItmwqEQ4bDgtS-Invxhhe6L73Qe1ucxfHo,284
34
35
  quollio_core/dbt_projects/redshift/models/quollio_stats_columns.yml,sha256=V_BESPk6IqE52ExT26-78As9l9AlWW86-Geb5PIhThU,67
35
36
  quollio_core/dbt_projects/redshift/models/quollio_stats_profiling_columns.sql,sha256=IPmHf51Er2jE9cMQHybT4adRxwwi2CEmgrBSv1Oeduc,1592
36
37
  quollio_core/dbt_projects/redshift/models/quollio_stats_profiling_columns.yml,sha256=s-p9F44TdwoFYlQN-b9gHzcFYOMqhqDGA9ORS_M4lhs,523
@@ -45,38 +46,40 @@ quollio_core/dbt_projects/snowflake/package-lock.yml,sha256=Gef3zDCLF41j_FL-_h3s
45
46
  quollio_core/dbt_projects/snowflake/packages.yml,sha256=p9Bl2C44gdC6iYTUkz_15yq3xahSJf2IA3WOXLF_ahA,61
46
47
  quollio_core/dbt_projects/snowflake/analyses/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
48
  quollio_core/dbt_projects/snowflake/macros/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
- quollio_core/dbt_projects/snowflake/macros/materialization/divided_view.sql,sha256=T4nFL76AbuQHBiLSAvNchoJnRjb1IRj4nToOyTNvLvw,2782
49
+ quollio_core/dbt_projects/snowflake/macros/materialization/divided_view.sql,sha256=BC4fPMw2qEc39WeJbimalHeIj-klcDk1EF-zbcYfk0w,3704
49
50
  quollio_core/dbt_projects/snowflake/models/quollio_lineage_column_level.sql,sha256=Cxt2U2aXNG_LUm63jwTyxUkapkrB7_uHmesx1PTcMJM,4721
50
51
  quollio_core/dbt_projects/snowflake/models/quollio_lineage_column_level.yml,sha256=a2uNIAh-xw51eu-GmHVuAnGnTbwK7h8-DjDeQtK3KaQ,711
51
52
  quollio_core/dbt_projects/snowflake/models/quollio_lineage_table_level.sql,sha256=Q_7vY1N1Hi1LFv5CxkkdR3gQw8fTDnoKECTLSK4gd3o,5112
52
53
  quollio_core/dbt_projects/snowflake/models/quollio_lineage_table_level.yml,sha256=QXlMBIkHo1Y-ANveKVx1FwyoYTMRXKgE2Z-PNouhQTw,325
53
54
  quollio_core/dbt_projects/snowflake/models/quollio_sqllineage_sources.sql,sha256=gd6JhQO13xBIvOoeXcce1I7amNGytwE8pwUApXehwqM,1520
54
55
  quollio_core/dbt_projects/snowflake/models/quollio_sqllineage_sources.yml,sha256=qgazupx3ca4P8R0loY5F9hyCz2fmAcWqZ6iOySo_NoY,377
55
- quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.sql,sha256=lH8xPmAzSW-6wi_g1y_LFVhtFgHzBvTweVX-MKeJzUQ,302
56
+ quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.sql,sha256=BzvP9gKMFItmwqEQ4bDgtS-Invxhhe6L73Qe1ucxfHo,284
56
57
  quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.yml,sha256=V_BESPk6IqE52ExT26-78As9l9AlWW86-Geb5PIhThU,67
57
- quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.sql,sha256=kt2aFimIPkgKI_UQTjvfRlAjrdSbO8z6C_749pnXrnE,1382
58
+ quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.sql,sha256=ex5ax-KJoM_P1QspkolOUOQg9BazTdZO1Jllp08PQo8,2265
58
59
  quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.yml,sha256=W39VAmFnnX6RBoW7B_4CConC1lm0Jm9o50Jsz9bYZzY,538
59
60
  quollio_core/dbt_projects/snowflake/models/sources.yml,sha256=vGSV33cNj4UUyPUcYS-JFgc3r8KvSLfiA7qhbDCUU9s,10975
60
61
  quollio_core/dbt_projects/snowflake/profiles/profiles_template.yml,sha256=gcZsgdGP461QuUM9jLbBKdadT8cHTXgNarq_azOOMhk,379
61
62
  quollio_core/dbt_projects/snowflake/seeds/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
63
  quollio_core/dbt_projects/snowflake/snapshots/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
63
64
  quollio_core/helper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
- quollio_core/helper/core.py,sha256=-3vCDlKExWPHJmWuZQNpYnvPP55uoGwRpTtnFvsDxIo,1127
65
- quollio_core/helper/env_default.py,sha256=YIL9hfrPs1ViL1AXohnbWEjVBUDXbVVakH0ZoSZWOlc,1202
65
+ quollio_core/helper/core.py,sha256=wbu4FWI7YiFEttXGSuj3tMyAhtPAFlHOjDpWJGNXOHA,1202
66
+ quollio_core/helper/env_default.py,sha256=H6gbSGUPrEDZr4YDrL49hbOpw6RntI4U82kX1q6vUnI,2148
66
67
  quollio_core/profilers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
67
- quollio_core/profilers/databricks.py,sha256=oX1N_FmXvdMpCbZj4xRyGwcbsMQr_C6VOMGZrlBV44E,7257
68
- quollio_core/profilers/lineage.py,sha256=HrTjXxrchETRmHEb5tSFzzHdb6z2KMw-DTnUSeKxmr0,6379
68
+ quollio_core/profilers/bigquery.py,sha256=e1Y8cZR-LxI9mSsYb0DurQyy0eCjM_kAKLfvl4IuQLE,3262
69
+ quollio_core/profilers/databricks.py,sha256=aNUIk48GuwSDEnmNzhZ6yv6TqAc1dBJt9e81NiTGBjo,7496
70
+ quollio_core/profilers/lineage.py,sha256=4FyxIuPBrUFihqZryqTQBcfB0Z7634lKl_WwkD82vzE,6865
69
71
  quollio_core/profilers/redshift.py,sha256=obdHVIsOM1bwHGdvYKalsJcTXwLK02kAKQMSBzSvsDo,7862
70
72
  quollio_core/profilers/snowflake.py,sha256=C1LC19ZaUMwNoXjsbnez0xANydJYs8oNRt6tixWKDq8,9090
71
73
  quollio_core/profilers/sqllineage.py,sha256=oCyl4tpXL5bkfguXAzTHSB9kZBL3tQK_rfcJ4XQMrLo,5177
72
74
  quollio_core/profilers/stats.py,sha256=PG1NbbUSpc1JuEYvBzD66rd24tp0C13_Y5Y7vRjYG1c,4720
73
75
  quollio_core/repository/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
76
+ quollio_core/repository/bigquery.py,sha256=KMJTeF4OUxtaJt0ymoJ4tkrMKq8yLyMYaMxNvU5yd_Y,2271
74
77
  quollio_core/repository/databricks.py,sha256=m68tja5N-QxH3VqEq-mOJKBeR2qldSgj_L9iIxvWwm0,1945
75
78
  quollio_core/repository/dbt.py,sha256=HXqW_xa4xYPh9CnKkg4L1gwG3SGjj2BAYoWgzWMFU4U,770
76
79
  quollio_core/repository/qdc.py,sha256=VCmzAUvjLemw1os5TaPtfBFkMCOMuPeftjZmUPhFj2Y,4702
77
80
  quollio_core/repository/redshift.py,sha256=UVHIpYzDQ2AbBTAGa8DgmEenG0NZsHfYroR1MmEPQGA,2991
78
81
  quollio_core/repository/snowflake.py,sha256=1YVMDfb9euJKvikv1pk_IxVF6SVsiemSvZ-WMTSbY7E,1874
79
- quollio_core-0.4.6.dist-info/LICENSE,sha256=V8j_M8nAz8PvAOZQocyRDX7keai8UJ9skgmnwqETmdY,34520
80
- quollio_core-0.4.6.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
81
- quollio_core-0.4.6.dist-info/METADATA,sha256=iXDj06XhRtzAmJ6rTQGYWxBbHiof6TOzTcU-202wguc,6571
82
- quollio_core-0.4.6.dist-info/RECORD,,
82
+ quollio_core-0.4.8.dist-info/LICENSE,sha256=V8j_M8nAz8PvAOZQocyRDX7keai8UJ9skgmnwqETmdY,34520
83
+ quollio_core-0.4.8.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
84
+ quollio_core-0.4.8.dist-info/METADATA,sha256=BRWWUVTFdakMWC4muSecvVCYOkNYM8u5BZipXVUFqJE,6803
85
+ quollio_core-0.4.8.dist-info/RECORD,,