quollio-core 0.4.12__py3-none-any.whl → 0.4.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
quollio_core/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  """Quollio Core"""
2
2
 
3
- __version__ = "0.4.12"
3
+ __version__ = "0.4.13"
4
4
  __author__ = "Quollio Technologies, Inc"
quollio_core/bigquery.py CHANGED
@@ -1,19 +1,37 @@
1
1
  import argparse
2
2
  import json
3
- import logging
3
+
4
+ from google.auth.credentials import Credentials
4
5
 
5
6
  from quollio_core.helper.env_default import env_default
6
- from quollio_core.helper.log import set_log_level
7
- from quollio_core.profilers.bigquery import bigquery_table_lineage
7
+ from quollio_core.helper.log_utils import configure_logging, error_handling_decorator, logger
8
+ from quollio_core.profilers.bigquery import bigquery_table_lineage, bigquery_table_stats
8
9
  from quollio_core.repository import qdc
9
- from quollio_core.repository.bigquery import get_credentials, get_org_id
10
+ from quollio_core.repository.bigquery import BigQueryClient, get_credentials, get_org_id
11
+
12
+
13
+ def initialize_credentials(credentials_json: str) -> Credentials:
14
+ return get_credentials(json.loads(credentials_json))
15
+
10
16
 
11
- logger = logging.getLogger(__name__)
17
+ def initialize_org_id(credentials_json: str) -> str:
18
+ return get_org_id(json.loads(credentials_json))
12
19
 
13
20
 
21
+ def initialize_bq_client(credentials: Credentials, project_id: str) -> BigQueryClient:
22
+ return BigQueryClient(credentials=credentials, project_id=project_id)
23
+
24
+
25
+ @error_handling_decorator
14
26
  def load_lineage(
15
- qdc_client: qdc.QDCExternalAPIClient, project_id: str, regions: list, tenant_id: str, credentials: dict, org_id: str
16
- ):
27
+ tenant_id: str,
28
+ project_id: str,
29
+ regions: list,
30
+ org_id: str,
31
+ credentials: Credentials,
32
+ qdc_client: qdc.QDCExternalAPIClient,
33
+ ) -> None:
34
+ logger.info("Loading lineage data.")
17
35
  bigquery_table_lineage(
18
36
  qdc_client=qdc_client,
19
37
  tenant_id=tenant_id,
@@ -22,29 +40,53 @@ def load_lineage(
22
40
  credentials=credentials,
23
41
  org_id=org_id,
24
42
  )
43
+ logger.info("Lineage data loaded successfully.")
44
+
45
+
46
+ @error_handling_decorator
47
+ def load_stats(
48
+ conn: BigQueryClient,
49
+ tenant_id: str,
50
+ org_id: str,
51
+ qdc_client: qdc.QDCExternalAPIClient,
52
+ dataplex_stats_tables: list,
53
+ ) -> None:
54
+ logger.info("Loading statistics data.")
55
+ bigquery_table_stats(
56
+ bq_client=conn,
57
+ qdc_client=qdc_client,
58
+ tenant_id=tenant_id,
59
+ org_id=org_id,
60
+ dataplex_stats_tables=dataplex_stats_tables,
61
+ )
62
+ logger.info("Statistics data loaded successfully.")
25
63
 
26
64
 
27
65
  if __name__ == "__main__":
28
66
  parser = argparse.ArgumentParser(
29
- prog="Quollio Intelligence Agent for Google BigQuery",
30
- description="Collect lineage and stats from Google BigQuery and load to Quollio Data Catalog",
67
+ prog="Quollio Intelligence Agent for BigQuery",
68
+ description="Load lineage and stats to Quollio from BigQuery using Dataplex and BigQuery APIs",
31
69
  epilog="Copyright (c) 2024 Quollio Technologies, Inc.",
32
70
  )
33
71
  parser.add_argument(
34
72
  "commands",
35
- choices=["load_lineage"],
73
+ choices=["load_lineage", "load_stats"],
36
74
  type=str,
37
75
  nargs="+",
38
76
  help="""
39
77
  The command to execute.
40
- 'load_lineage': Load lineage data from Google Data Catalog to Quollio,
78
+ 'load_lineage': Load lineage data from created views to Quollio,
79
+ 'load_stats': Load stats from created views to Quollio,
41
80
  """,
42
81
  )
43
82
  parser.add_argument(
44
- "--credentials",
83
+ "--log_level",
45
84
  type=str,
46
- action=env_default("GOOGLE_APPLICATION_CREDENTIALS"),
47
- help="Crendentials for Google Cloud Platform",
85
+ choices=["debug", "info", "warn", "error", "none"],
86
+ action=env_default("LOG_LEVEL"),
87
+ default="info",
88
+ required=False,
89
+ help="The log level for dbt commands. Default value is info",
48
90
  )
49
91
  parser.add_argument(
50
92
  "--tenant_id",
@@ -53,6 +95,27 @@ if __name__ == "__main__":
53
95
  required=False,
54
96
  help="The tenant id (company id) where the lineage and stats are loaded",
55
97
  )
98
+ parser.add_argument(
99
+ "--project_id",
100
+ type=str,
101
+ default=None,
102
+ required=False,
103
+ help="Project ID of the BigQuery project to load lineage and stats from (default is loaded from credentials)",
104
+ )
105
+ parser.add_argument(
106
+ "--regions",
107
+ type=str,
108
+ action=env_default("GCP_REGIONS"),
109
+ required=True,
110
+ help="Comma-separated list of regions BigQuery data is in",
111
+ )
112
+ parser.add_argument(
113
+ "--credentials_json",
114
+ type=str,
115
+ action=env_default("GOOGLE_APPLICATION_CREDENTIALS"),
116
+ required=True,
117
+ help="Credentials JSON",
118
+ )
56
119
  parser.add_argument(
57
120
  "--api_url",
58
121
  type=str,
@@ -74,50 +137,47 @@ if __name__ == "__main__":
74
137
  required=False,
75
138
  help="The client secret that is created on Quollio console to let clients access Quollio External API",
76
139
  )
140
+
77
141
  parser.add_argument(
78
- "--project_id",
79
- type=str,
80
- action=env_default("GCP_PROJECT_ID"),
81
- required=False,
82
- help="GCP Project ID",
83
- )
84
- parser.add_argument(
85
- "--regions",
86
- type=str,
87
- action=env_default("GCP_REGIONS"),
88
- required=False,
89
- help="GCP regions where the data is located. Multiple regions can be provided separated by space.",
90
- nargs="+",
91
- )
92
- parser.add_argument(
93
- "--log_level",
142
+ "--dataplex_stats_tables",
94
143
  type=str,
95
- choices=["debug", "info", "warn", "error", "none"],
96
- action=env_default("LOG_LEVEL"),
144
+ action=env_default("DATAPLEX_STATS_TABLES"),
97
145
  required=False,
98
- help="The log level for dbt commands. Default value is info",
146
+ help="Comma-separated list of dataplex stats tables - <project_id>.<dataset_id>.<table_id>",
99
147
  )
100
148
 
101
149
  args = parser.parse_args()
102
- set_log_level(level=args.log_level)
103
150
 
104
- if len(args.commands) == 0:
105
- raise ValueError("No command is provided")
151
+ # Validate that dataplex_stats_tables is provided if load_stats is in commands
152
+ if "load_stats" in args.commands and not args.dataplex_stats_tables:
153
+ parser.error("--dataplex_stats_tables is required when 'load_stats' command is used")
106
154
 
107
- if "load_lineage" in args.commands:
108
- qdc_client = qdc.QDCExternalAPIClient(
109
- base_url=args.api_url, client_id=args.client_id, client_secret=args.client_secret
110
- )
155
+ configure_logging(args.log_level)
111
156
 
112
- credentials_json = json.loads(args.credentials)
113
- credentials = get_credentials(credentials_json=credentials_json)
114
- org_id = get_org_id(credentials_json=credentials_json)
157
+ credentials = initialize_credentials(args.credentials_json)
158
+ org_id = initialize_org_id(args.credentials_json)
159
+ qdc_client = qdc.initialize_qdc_client(args.api_url, args.client_id, args.client_secret)
160
+ bq_client = initialize_bq_client(credentials, args.project_id)
161
+ if args.project_id is None:
162
+ args.project_id = json.loads(args.credentials_json)["project_id"]
163
+ regions = args.regions.split(",")
115
164
 
165
+ if "load_lineage" in args.commands:
116
166
  load_lineage(
117
- qdc_client=qdc_client,
118
- project_id=args.project_id,
119
- regions=args.regions,
120
167
  tenant_id=args.tenant_id,
168
+ project_id=args.project_id,
169
+ regions=regions,
170
+ org_id=org_id,
121
171
  credentials=credentials,
172
+ qdc_client=qdc_client,
173
+ )
174
+
175
+ if "load_stats" in args.commands:
176
+ tables = args.dataplex_stats_tables.split(",")
177
+ load_stats(
178
+ conn=bq_client,
179
+ tenant_id=args.tenant_id,
122
180
  org_id=org_id,
181
+ qdc_client=qdc_client,
182
+ dataplex_stats_tables=tables,
123
183
  )
quollio_core/bricks.py CHANGED
@@ -10,6 +10,7 @@ from quollio_core.profilers.databricks import (
10
10
  databricks_column_stats,
11
11
  databricks_table_level_lineage,
12
12
  )
13
+ from quollio_core.profilers.stats import get_column_stats_items
13
14
  from quollio_core.repository import databricks as db
14
15
  from quollio_core.repository import dbt, qdc
15
16
 
@@ -21,7 +22,6 @@ def build_view(
21
22
  target_tables: str = "",
22
23
  log_level: str = "info",
23
24
  ) -> None:
24
-
25
25
  logger.info("Build profiler views using dbt")
26
26
  # set parameters
27
27
  dbt_client = dbt.DBTClient()
@@ -64,7 +64,6 @@ def load_lineage(
64
64
  tenant_id: str,
65
65
  enable_column_lineage: bool = False,
66
66
  ) -> None:
67
-
68
67
  logger.info("Generate Databricks table to table lineage.")
69
68
  databricks_table_level_lineage(
70
69
  conn=conn,
@@ -98,7 +97,6 @@ def load_column_stats(
98
97
  qdc_client: qdc.QDCExternalAPIClient,
99
98
  tenant_id: str,
100
99
  ) -> None:
101
-
102
100
  logger.info("Generate Databricks column stats.")
103
101
  databricks_column_stats(
104
102
  conn=conn,
@@ -240,6 +238,19 @@ if __name__ == "__main__":
240
238
  help="Whether to ingest column lineage into QDIC or not. Default value is False",
241
239
  )
242
240
 
241
+ stats_items = get_column_stats_items()
242
+ parser.add_argument(
243
+ "--target_stats_items",
244
+ type=str,
245
+ nargs="*",
246
+ choices=stats_items,
247
+ default=stats_items,
248
+ action=env_default("DATABRICKS_STATS_ITEMS"),
249
+ required=False,
250
+ help="The items for statistic values.\
251
+ You can choose the items to be aggregated for stats. All items are selected by default.",
252
+ )
253
+
243
254
  args = parser.parse_args()
244
255
  set_log_level(level=args.log_level)
245
256
 
@@ -284,5 +295,6 @@ if __name__ == "__main__":
284
295
  endpoint=args.host,
285
296
  qdc_client=qdc_client,
286
297
  tenant_id=args.tenant_id,
298
+ stats_items=args.target_stats_items,
287
299
  monitoring_table_suffix=args.monitoring_table_suffix,
288
300
  )
@@ -0,0 +1,48 @@
1
+ import inspect
2
+ import logging
3
+
4
+ LOG_LEVELS = {
5
+ "critical": logging.CRITICAL,
6
+ "error": logging.ERROR,
7
+ "warning": logging.WARNING,
8
+ "info": logging.INFO,
9
+ "debug": logging.DEBUG,
10
+ "notset": logging.NOTSET,
11
+ }
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def configure_logging(level: str = "INFO"):
17
+ """Configure logging settings."""
18
+ log_level = LOG_LEVELS.get(level.lower())
19
+ if log_level is None:
20
+ raise ValueError(f"Unknown log level: {level}")
21
+
22
+ logging.basicConfig(
23
+ level=log_level,
24
+ format="%(asctime)s - %(levelname)s - %(funcName)s:%(lineno)d - %(message)s",
25
+ )
26
+ logger.setLevel(log_level)
27
+ logger.info(f"Logging is configured to {level} level.")
28
+
29
+
30
+ def error_handling_decorator(func):
31
+ """Decorator for consistent error handling in CLI commands."""
32
+
33
+ def wrapper(*args, **kwargs):
34
+ func_name = func.__name__
35
+ try:
36
+ logger.debug(f"Starting {func_name}")
37
+ result = func(*args, **kwargs)
38
+ logger.debug(f"Completed {func_name} successfully")
39
+ return result
40
+ except Exception as e:
41
+
42
+ current_frame = inspect.currentframe()
43
+ error_frame = current_frame.f_back
44
+ line_number = error_frame.f_lineno
45
+ logger.error(f"Error in {func_name} at line {line_number}: {str(e)}", exc_info=True)
46
+ raise
47
+
48
+ return wrapper
@@ -1,26 +1,28 @@
1
- import logging
2
- from typing import Any, Dict, List
1
+ from typing import Dict, List
3
2
 
3
+ from google.auth.credentials import Credentials
4
+
5
+ from quollio_core.helper.log_utils import error_handling_decorator, logger
4
6
  from quollio_core.profilers.lineage import gen_table_lineage_payload, parse_bigquery_table_lineage
7
+ from quollio_core.profilers.stats import gen_table_stats_payload
5
8
  from quollio_core.repository import qdc
6
9
  from quollio_core.repository.bigquery import BigQueryClient, GCPLineageClient, get_entitiy_reference, get_search_request
7
10
 
8
- logger = logging.getLogger(__name__)
9
-
10
11
 
12
+ @error_handling_decorator
11
13
  def bigquery_table_lineage(
12
14
  qdc_client: qdc.QDCExternalAPIClient,
13
15
  tenant_id: str,
14
16
  project_id: str,
15
17
  regions: list,
16
18
  org_id: str,
17
- credentials: Any,
18
- ):
19
+ credentials: Credentials,
20
+ ) -> None:
19
21
  lineage_client = GCPLineageClient(credentials)
20
- bq_client = BigQueryClient(credentials)
22
+ bq_client = BigQueryClient(credentials, project_id)
21
23
 
22
- datasets = bq_client.list_datasets(project_id)
23
- all_tables = generate_table_list(datasets, bq_client)
24
+ datasets = bq_client.list_dataset_ids()
25
+ all_tables = generate_table_list(bq_client, datasets)
24
26
  lineage_links = generate_lineage_links(all_tables, lineage_client, project_id, regions)
25
27
  lineage_links = parse_bigquery_table_lineage(lineage_links)
26
28
 
@@ -29,7 +31,7 @@ def bigquery_table_lineage(
29
31
  req_count = 0
30
32
  for update_table_lineage_input in update_table_lineage_inputs:
31
33
  logger.info(
32
- "Generating table lineage. downstream: %s -> %s-> %s",
34
+ "Generating table lineage. downstream: %s -> %s -> %s",
33
35
  update_table_lineage_input.downstream_database_name,
34
36
  update_table_lineage_input.downstream_schema_name,
35
37
  update_table_lineage_input.downstream_table_name,
@@ -43,29 +45,65 @@ def bigquery_table_lineage(
43
45
  logger.info("Generating table lineage is finished. %s lineages are ingested.", req_count)
44
46
 
45
47
 
46
- def generate_table_list(datasets: List[str], bq_client: BigQueryClient) -> List[str]:
48
+ @error_handling_decorator
49
+ def bigquery_table_stats(
50
+ qdc_client: qdc.QDCExternalAPIClient,
51
+ bq_client: BigQueryClient,
52
+ tenant_id: str,
53
+ org_id: str,
54
+ dataplex_stats_tables: list,
55
+ ) -> None:
56
+ profiling_results = []
57
+ for table in dataplex_stats_tables:
58
+ logger.info("Profiling columns using Dataplex stats table: %s", table)
59
+ profiling_results.extend(column_stats_from_dataplex(bq_client, table))
60
+
61
+ stats = gen_table_stats_payload(tenant_id, org_id, profiling_results)
62
+
63
+ for stat in stats:
64
+ status_code = qdc_client.update_stats_by_id(
65
+ global_id=stat.global_id,
66
+ payload=stat.body.as_dict(),
67
+ )
68
+ if status_code == 200:
69
+ logger.info(
70
+ "Stats for column %s -> %s -> %s -> %s is successfully ingested.",
71
+ stat.db,
72
+ stat.schema,
73
+ stat.table,
74
+ stat.column,
75
+ )
76
+ logger.debug("Stats for column id %s is successfully ingested.", stat.global_id)
77
+
78
+
79
+ def generate_table_list(bq_client: BigQueryClient, datasets: List[str]) -> List[str]:
47
80
  all_tables = []
48
81
  for dataset in datasets:
49
82
  all_tables.extend(
50
83
  [
51
84
  table
52
- for table in bq_client.list_tables(dataset.dataset_id)
53
- if table.table_type in ["TABLE", "VIEW", "MATERIALIZED_VIEW"]
54
- ]
85
+ for table in bq_client.list_tables(dataset)
86
+ if table["table_type"] in ["TABLE", "VIEW", "MATERIALIZED_VIEW"]
87
+ ],
55
88
  )
56
89
 
57
90
  all_table_names = []
58
91
  for table in all_tables:
59
- all_table_names.append(f"{table.project}.{table.dataset_id}.{table.table_id}")
92
+ all_table_names.append(f"{bq_client.client.project}.{table['dataset_id']}.{table['table_id']}")
60
93
 
61
94
  return all_table_names
62
95
 
63
96
 
64
97
  def generate_lineage_links(
65
- all_tables: List[str], lineage_client: GCPLineageClient, project_id: str, regions: List[str]
98
+ all_tables: List[str],
99
+ lineage_client: GCPLineageClient,
100
+ project_id: str,
101
+ regions: List[str],
66
102
  ) -> Dict[str, List[str]]:
67
103
  lineage_links = {}
68
104
  for table in all_tables:
105
+ if "quollio" in table.lower():
106
+ continue
69
107
  downstream = get_entitiy_reference()
70
108
  downstream.fully_qualified_name = f"bigquery:{table}"
71
109
 
@@ -74,8 +112,34 @@ def generate_lineage_links(
74
112
  response = lineage_client.get_links(request=request)
75
113
  for lineage in response:
76
114
  target_table = str(lineage.target.fully_qualified_name).replace("bigquery:", "")
115
+ source_table = str(lineage.source.fully_qualified_name).replace("bigquery:", "")
77
116
  if target_table not in lineage_links:
78
117
  lineage_links[target_table] = []
79
- lineage_links[target_table].append(str(lineage.source.fully_qualified_name).replace("bigquery:", ""))
118
+ if source_table not in lineage_links[target_table]:
119
+ lineage_links[target_table].append(source_table)
80
120
 
81
121
  return lineage_links
122
+
123
+
124
+ def column_stats_from_dataplex(bq_client: BigQueryClient, profiling_table: str) -> List[Dict]:
125
+ query = f"""
126
+ SELECT
127
+ data_source.table_project_id AS DB_NAME,
128
+ data_source.dataset_id AS SCHEMA_NAME,
129
+ data_source.table_id AS TABLE_NAME,
130
+ column_name AS COLUMN_NAME,
131
+ min_value AS MIN_VALUE,
132
+ max_value AS MAX_VALUE,
133
+ average_value AS AVG_VALUE,
134
+ quartile_median AS MEDIAN_VALUE,
135
+ standard_deviation AS STDDEV_VALUE,
136
+ top_n[0][0] AS MODE_VALUE,
137
+ CAST((percent_null / 100) * job_rows_scanned AS INT) as NULL_COUNT,
138
+ CAST((percent_unique / 100) * job_rows_scanned AS INT) as CARDINALITY
139
+ FROM `{profiling_table}`
140
+ """
141
+ logger.debug(f"Executing Query: {query}")
142
+ results = bq_client.client.query(query).result()
143
+
144
+ # Convert RowIterator to a list of dictionaries
145
+ return [dict(row) for row in results]
@@ -6,7 +6,7 @@ from quollio_core.profilers.lineage import (
6
6
  gen_table_lineage_payload,
7
7
  parse_databricks_table_lineage,
8
8
  )
9
- from quollio_core.profilers.stats import gen_table_stats_payload
9
+ from quollio_core.profilers.stats import gen_table_stats_payload, get_is_target_stats_items, render_sql_for_stats
10
10
  from quollio_core.repository import databricks, qdc
11
11
 
12
12
  logger = logging.getLogger(__name__)
@@ -125,59 +125,63 @@ def _get_monitoring_tables(
125
125
 
126
126
 
127
127
  def _get_column_stats(
128
- conn: databricks.DatabricksConnectionConfig, monitoring_table_suffix: str = "_profile_metrics"
128
+ conn: databricks.DatabricksConnectionConfig,
129
+ stats_items: List[str],
130
+ monitoring_table_suffix: str = "_profile_metrics",
129
131
  ) -> List[Dict[str, str]]:
130
132
  tables = _get_monitoring_tables(conn, monitoring_table_suffix)
131
133
  if not tables:
132
134
  return []
133
135
  stats = []
136
+ is_aggregate_items = get_is_target_stats_items(stats_items=stats_items)
134
137
  for table in tables:
135
138
  monitored_table = table["table_fqdn"].removesuffix("_profile_metrics")
136
139
  monitored_table = monitored_table.split(".")
137
140
  if len(monitored_table) != 3:
138
141
  raise ValueError(f"Invalid table name: {table['table_fqdn']}")
139
142
  with databricks.DatabricksQueryExecutor(config=conn) as databricks_executor:
140
- query = """
141
- WITH profile_record_history AS (
142
- SELECT
143
- COLUMN_NAME
144
- , distinct_count as CARDINALITY
145
- , MAX as MAX_VALUE
146
- , MIN as MIN_VALUE
147
- , AVG as AVG_VALUE
148
- , MEDIAN as MEDIAN_VALUE
149
- , STDDEV as STDDEV_VALUE
150
- , NUM_NULLS as NULL_COUNT
151
- , get(frequent_items, 0).item AS MODE_VALUE
152
- , row_number() over(partition by column_name order by window desc) rownum
153
- FROM
154
- {monitoring_table}
155
- WHERE
156
- column_name not in (':table')
157
- )
158
- SELECT
159
- "{monitored_table_catalog}" as DB_NAME
160
- , "{monitored_table_schema}" as SCHEMA_NAME
161
- , "{monitored_table_name}" as TABLE_NAME
162
- , COLUMN_NAME
163
- , CARDINALITY
164
- , MAX_VALUE
165
- , MIN_VALUE
166
- , AVG_VALUE
167
- , MEDIAN_VALUE
168
- , STDDEV_VALUE
169
- , NULL_COUNT
170
- , MODE_VALUE
171
- FROM
172
- profile_record_history
173
- WHERE
174
- rownum = 1
175
- """.format(
143
+ cte = """
144
+ WITH profile_record_history AS (
145
+ SELECT
146
+ COLUMN_NAME
147
+ , distinct_count as cardinality
148
+ , MAX as max_value
149
+ , MIN as min_value
150
+ , AVG as avg_value
151
+ , MEDIAN as median_value
152
+ , STDDEV as stddev_value
153
+ , NUM_NULLS as null_count
154
+ , get(frequent_items, 0).item AS mode_value
155
+ , row_number() over(partition by column_name order by window desc) rownum
156
+ FROM
157
+ {monitoring_table}
158
+ WHERE
159
+ column_name not in (':table')
160
+ ), profile_record AS (
161
+ SELECT
162
+ "{monitored_table_catalog}" as db_name
163
+ , "{monitored_table_schema}" as schema_name
164
+ , "{monitored_table_name}" as table_name
165
+ , column_name
166
+ , max_value
167
+ , min_value
168
+ , null_count
169
+ , cardinality
170
+ , avg_value
171
+ , median_value
172
+ , mode_value
173
+ , stddev_value
174
+ FROM
175
+ profile_record_history
176
+ WHERE
177
+ rownum = 1
178
+ )""".format(
176
179
  monitoring_table=table["table_fqdn"],
177
180
  monitored_table_catalog=monitored_table[0],
178
181
  monitored_table_schema=monitored_table[1],
179
182
  monitored_table_name=monitored_table[2],
180
183
  )
184
+ query = render_sql_for_stats(is_aggregate_items=is_aggregate_items, table_fqn="profile_record", cte=cte)
181
185
  logger.debug(f"The following sql will be fetched to retrieve stats values. {query}")
182
186
  stats.append(databricks_executor.get_query_results(query))
183
187
  return stats
@@ -188,9 +192,10 @@ def databricks_column_stats(
188
192
  endpoint: str,
189
193
  qdc_client: qdc.QDCExternalAPIClient,
190
194
  tenant_id: str,
195
+ stats_items: List[str],
191
196
  monitoring_table_suffix: str = "_profile_metrics",
192
197
  ) -> None:
193
- table_stats = _get_column_stats(conn, monitoring_table_suffix)
198
+ table_stats = _get_column_stats(conn, stats_items, monitoring_table_suffix)
194
199
  for table in table_stats:
195
200
  logger.debug("Table %s will be aggregated.", table)
196
201
  stats = gen_table_stats_payload(tenant_id=tenant_id, endpoint=endpoint, stats=table)
@@ -1,8 +1,13 @@
1
1
  import logging
2
+ from typing import List
2
3
 
3
4
  from quollio_core.profilers.lineage import gen_table_lineage_payload, gen_table_lineage_payload_inputs
4
5
  from quollio_core.profilers.sqllineage import SQLLineage
5
- from quollio_core.profilers.stats import gen_table_stats_payload_from_tuple
6
+ from quollio_core.profilers.stats import (
7
+ gen_table_stats_payload_from_tuple,
8
+ get_is_target_stats_items,
9
+ render_sql_for_stats,
10
+ )
6
11
  from quollio_core.repository import qdc, redshift
7
12
 
8
13
  logger = logging.getLogger(__name__)
@@ -76,38 +81,24 @@ def redshift_table_stats(
76
81
  conn: redshift.RedshiftConnectionConfig,
77
82
  qdc_client: qdc.QDCExternalAPIClient,
78
83
  tenant_id: str,
84
+ stats_items: List[str],
79
85
  ) -> None:
80
-
86
+ is_aggregate_items = get_is_target_stats_items(stats_items=stats_items)
81
87
  with redshift.RedshiftQueryExecutor(config=conn) as redshift_executor:
82
88
  stats_query = _gen_get_stats_views_query(
83
89
  db=conn.database,
84
90
  schema=conn.schema,
85
91
  )
86
92
  stats_views = redshift_executor.get_query_results(query=stats_query)
93
+ logger.info("Found %s for table statistics.", len(stats_views))
87
94
 
88
95
  req_count = 0
89
96
  for stats_view in stats_views:
90
- stats_query = """
91
- SELECT
92
- db_name
93
- , schema_name
94
- , table_name
95
- , column_name
96
- , max_value
97
- , min_value
98
- , null_count
99
- , cardinality
100
- , avg_value
101
- , median_value
102
- , mode_value
103
- , stddev_value
104
- FROM
105
- {db}.{schema}.{table}
106
- """.format(
107
- db=stats_view[0],
108
- schema=stats_view[1],
109
- table=stats_view[2],
97
+ table_fqn = "{catalog}.{schema}.{table}".format(
98
+ catalog=stats_view[0], schema=stats_view[1], table=stats_view[2]
110
99
  )
100
+ stats_query = render_sql_for_stats(is_aggregate_items=is_aggregate_items, table_fqn=table_fqn)
101
+ logger.debug(f"The following sql will be fetched to retrieve stats values. {stats_query}")
111
102
  stats_result = redshift_executor.get_query_results(query=stats_query)
112
103
  payloads = gen_table_stats_payload_from_tuple(tenant_id=tenant_id, endpoint=conn.host, stats=stats_result)
113
104
  for payload in payloads:
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ from typing import List
2
3
 
3
4
  from quollio_core.profilers.lineage import (
4
5
  gen_column_lineage_payload,
@@ -6,7 +7,7 @@ from quollio_core.profilers.lineage import (
6
7
  parse_snowflake_results,
7
8
  )
8
9
  from quollio_core.profilers.sqllineage import SQLLineage
9
- from quollio_core.profilers.stats import gen_table_stats_payload
10
+ from quollio_core.profilers.stats import gen_table_stats_payload, get_is_target_stats_items, render_sql_for_stats
10
11
  from quollio_core.repository import qdc, snowflake
11
12
 
12
13
  logger = logging.getLogger(__name__)
@@ -154,6 +155,7 @@ def snowflake_table_stats(
154
155
  conn: snowflake.SnowflakeConnectionConfig,
155
156
  qdc_client: qdc.QDCExternalAPIClient,
156
157
  tenant_id: str,
158
+ stats_items: List[str],
157
159
  ) -> None:
158
160
  with snowflake.SnowflakeQueryExecutor(conn) as sf_executor:
159
161
  stats_query = _gen_get_stats_views_query(
@@ -163,28 +165,12 @@ def snowflake_table_stats(
163
165
  stats_views = sf_executor.get_query_results(query=stats_query)
164
166
 
165
167
  req_count = 0
168
+ is_aggregate_items = get_is_target_stats_items(stats_items=stats_items)
166
169
  for stats_view in stats_views:
167
- stats_query = """
168
- SELECT
169
- db_name
170
- , schema_name
171
- , table_name
172
- , column_name
173
- , max_value
174
- , min_value
175
- , null_count
176
- , cardinality
177
- , avg_value
178
- , median_value
179
- , mode_value
180
- , stddev_value
181
- FROM
182
- {db}.{schema}.{table}
183
- """.format(
184
- db=stats_view["TABLE_CATALOG"],
185
- schema=stats_view["TABLE_SCHEMA"],
186
- table=stats_view["TABLE_NAME"],
170
+ table_fqn = "{catalog}.{schema}.{table}".format(
171
+ catalog=stats_view["TABLE_CATALOG"], schema=stats_view["TABLE_SCHEMA"], table=stats_view["TABLE_NAME"]
187
172
  )
173
+ stats_query = render_sql_for_stats(is_aggregate_items=is_aggregate_items, table_fqn=table_fqn)
188
174
  logger.debug(f"The following sql will be fetched to retrieve stats values. {stats_query}")
189
175
  stats_result = sf_executor.get_query_results(query=stats_query)
190
176
  payloads = gen_table_stats_payload(tenant_id=tenant_id, endpoint=conn.account_id, stats=stats_result)
@@ -1,8 +1,10 @@
1
1
  import logging
2
- from dataclasses import asdict, dataclass
2
+ from dataclasses import asdict, dataclass, fields
3
3
  from decimal import ROUND_HALF_UP, Decimal
4
4
  from typing import Dict, List, Tuple, Union
5
5
 
6
+ from jinja2 import Template
7
+
6
8
  from quollio_core.helper.core import new_global_id
7
9
 
8
10
  logger = logging.getLogger(__name__)
@@ -71,30 +73,35 @@ def convert_value_type(obj, cast_str: bool = False):
71
73
  def gen_table_stats_payload(tenant_id: str, endpoint: str, stats: List[Dict[str, str]]) -> List[StatsRequest]:
72
74
  payloads = list()
73
75
  for stat in stats:
76
+ db_name = stat.get("DB_NAME", stat.get("db_name"))
77
+ schema_name = stat.get("SCHEMA_NAME", stat.get("schema_name"))
78
+ table_name = stat.get("TABLE_NAME", stat.get("table_name"))
79
+ column_name = stat.get("COLUMN_NAME", stat.get("column_name"))
74
80
  global_id_arg = "{db}{schema}{table}{column}".format(
75
- db=stat["DB_NAME"], schema=stat["SCHEMA_NAME"], table=stat["TABLE_NAME"], column=stat["COLUMN_NAME"]
81
+ db=db_name, schema=schema_name, table=table_name, column=column_name
76
82
  )
77
83
  table_global_id = new_global_id(
78
84
  tenant_id=tenant_id, cluster_id=endpoint, data_id=global_id_arg, data_type="column"
79
85
  )
86
+ column_stats_input = ColumnStatsInput(
87
+ cardinality=convert_value_type(stat.get("CARDINALITY", stat.get("cardinality"))),
88
+ max=convert_value_type(stat.get("MAX_VALUE", stat.get("max_value")), True),
89
+ mean=convert_value_type(stat.get("AVG_VALUE", stat.get("avg_value")), True),
90
+ median=convert_value_type(stat.get("MEDIAN_VALUE", stat.get("median_value")), True),
91
+ min=convert_value_type(stat.get("MIN_VALUE", stat.get("min_value")), True),
92
+ mode=convert_value_type(stat.get("MODE_VALUE", stat.get("mode_value")), True),
93
+ number_of_null=convert_value_type(stat.get("NULL_COUNT", stat.get("null_count"))),
94
+ number_of_unique=convert_value_type(stat.get("CARDINALITY", stat.get("cardinality"))),
95
+ stddev=convert_value_type(stat.get("STDDEV_VALUE", stat.get("stddev_value")), True),
96
+ )
80
97
  stats_request = StatsRequest(
81
98
  global_id=table_global_id,
82
- db=stat["DB_NAME"],
83
- schema=stat["SCHEMA_NAME"],
84
- table=stat["TABLE_NAME"],
85
- column=stat["COLUMN_NAME"],
99
+ db=db_name,
100
+ schema=schema_name,
101
+ table=table_name,
102
+ column=column_name,
86
103
  body=StatsInput(
87
- column_stats=ColumnStatsInput(
88
- cardinality=convert_value_type(stat["CARDINALITY"]),
89
- max=convert_value_type(stat["MAX_VALUE"], True),
90
- mean=convert_value_type(stat["AVG_VALUE"], True),
91
- median=convert_value_type(stat["MEDIAN_VALUE"], True),
92
- min=convert_value_type(stat["MIN_VALUE"], True),
93
- mode=convert_value_type(stat["MODE_VALUE"], True),
94
- number_of_null=convert_value_type(stat["NULL_COUNT"]),
95
- number_of_unique=convert_value_type(stat["CARDINALITY"]),
96
- stddev=convert_value_type(stat["STDDEV_VALUE"], True),
97
- ),
104
+ column_stats=column_stats_input,
98
105
  # MEMO: Table stats can be collected with metadata agent.
99
106
  # Then, It's not necessary to update with this system for now.
100
107
  table_stats=TableStatsInput(count=0, size=0.0),
@@ -138,3 +145,57 @@ def gen_table_stats_payload_from_tuple(
138
145
  )
139
146
  payloads.append(stats_request)
140
147
  return payloads
148
+
149
+
150
+ def render_sql_for_stats(is_aggregate_items: Dict[str, bool], table_fqn: str, cte: str = "") -> str:
151
+ sql_template_for_stats = Template(
152
+ """
153
+ {% if cte -%}
154
+ {{ cte }}
155
+ {% endif -%}
156
+ SELECT
157
+ db_name
158
+ , schema_name
159
+ , table_name
160
+ , column_name
161
+ , {% if agg_max == True -%} max_value {% else -%} null as max_value {% endif %}
162
+ , {% if agg_min == True -%} min_value {% else -%} null as min_value {% endif %}
163
+ , {% if agg_null_count == True -%} null_count {% else -%} null as null_count {% endif %}
164
+ , {% if agg_cardinality == True -%} cardinality {% else -%} null as cardinality {% endif %}
165
+ , {% if agg_avg == True -%} avg_value {% else -%} null as avg_value {% endif %}
166
+ , {% if agg_median == True -%} median_value {% else -%} null as median_value {% endif %}
167
+ , {% if agg_mode == True -%} mode_value {% else -%} null as mode_value {% endif %}
168
+ , {% if agg_stddev == True -%} stddev_value {% else -%} null as stddev_value {% endif %}
169
+ FROM
170
+ {{ table_fqn }}
171
+ """
172
+ )
173
+ query = sql_template_for_stats.render(
174
+ agg_max=is_aggregate_items["max"],
175
+ agg_min=is_aggregate_items["min"],
176
+ agg_null_count=is_aggregate_items["number_of_null"],
177
+ agg_cardinality=is_aggregate_items["cardinality"],
178
+ agg_avg=is_aggregate_items["mean"],
179
+ agg_median=is_aggregate_items["median"],
180
+ agg_mode=is_aggregate_items["mode"],
181
+ agg_stddev=is_aggregate_items["stddev"],
182
+ table_fqn=table_fqn,
183
+ cte=cte,
184
+ )
185
+ return query
186
+
187
+
188
+ def get_is_target_stats_items(stats_items: List[str]) -> List[Dict[str, bool]]:
189
+ target_stats_fields = get_column_stats_items()
190
+ is_aggregate_items = dict()
191
+ for target_stats_field in target_stats_fields:
192
+ is_aggregate_items[target_stats_field] = False
193
+
194
+ for stats_item in stats_items:
195
+ is_aggregate_items[stats_item] = True
196
+
197
+ return is_aggregate_items
198
+
199
+
200
+ def get_column_stats_items() -> List[str]:
201
+ return [field.name for field in fields(ColumnStatsInput)]
quollio_core/redshift.py CHANGED
@@ -10,6 +10,7 @@ from quollio_core.profilers.redshift import (
10
10
  redshift_table_level_sqllineage,
11
11
  redshift_table_stats,
12
12
  )
13
+ from quollio_core.profilers.stats import get_column_stats_items
13
14
  from quollio_core.repository import dbt, qdc, redshift
14
15
 
15
16
  logger = logging.getLogger(__name__)
@@ -98,13 +99,19 @@ def load_stats(
98
99
  conn: redshift.RedshiftConnectionConfig,
99
100
  qdc_client: qdc.QDCExternalAPIClient,
100
101
  tenant_id: str,
102
+ stats_items: str,
101
103
  ) -> None:
102
-
103
104
  logger.info("Generate redshift stats.")
105
+
106
+ if stats_items is None:
107
+ raise ValueError("No stats items are not selected. Please specify any value to `stats_items` param.")
108
+
109
+ logger.info("The following values will be aggregated. {stats_items}".format(stats_items=stats_items))
104
110
  redshift_table_stats(
105
111
  conn=conn,
106
112
  qdc_client=qdc_client,
107
113
  tenant_id=tenant_id,
114
+ stats_items=stats_items,
108
115
  )
109
116
 
110
117
  logger.info("Stats data is successfully loaded.")
@@ -116,7 +123,6 @@ def load_sqllineage(
116
123
  qdc_client: qdc.QDCExternalAPIClient,
117
124
  tenant_id: str,
118
125
  ) -> None:
119
-
120
126
  logger.info("Generate Redshift sqllineage.")
121
127
  redshift_table_level_sqllineage(
122
128
  conn=conn,
@@ -261,6 +267,19 @@ if __name__ == "__main__":
261
267
  required=False,
262
268
  help="The client secrete that is created on Quollio console to let clients access Quollio External API",
263
269
  )
270
+
271
+ stats_items = get_column_stats_items()
272
+ parser.add_argument(
273
+ "--target_stats_items",
274
+ type=str,
275
+ nargs="*",
276
+ choices=stats_items,
277
+ default=stats_items,
278
+ action=env_default("REDSHIFT_STATS_ITEMS"),
279
+ required=False,
280
+ help="The items for stats values. \
281
+ You can choose the items to be aggregated for stats. All items are selected by default.",
282
+ )
264
283
  args = parser.parse_args()
265
284
  set_log_level(level=args.log_level)
266
285
 
@@ -306,6 +325,7 @@ if __name__ == "__main__":
306
325
  conn=conn,
307
326
  qdc_client=qdc_client,
308
327
  tenant_id=args.tenant_id,
328
+ stats_items=args.target_stats_items,
309
329
  )
310
330
  if "load_sqllineage" in args.commands:
311
331
  qdc_client = qdc.QDCExternalAPIClient(
@@ -1,41 +1,75 @@
1
- import logging
1
+ from typing import Any, Dict, List
2
2
 
3
3
  from google.cloud.bigquery import Client
4
4
  from google.cloud.datacatalog_lineage_v1 import EntityReference, LineageClient, SearchLinksRequest
5
5
  from google.oauth2.service_account import Credentials
6
6
  from googleapiclient.discovery import build
7
7
 
8
- logger = logging.getLogger(__name__)
8
+ from quollio_core.helper.log_utils import logger # Importing the logger from logging_utils
9
9
 
10
10
 
11
11
  class BigQueryClient:
12
- def __init__(self, credentials: Credentials) -> None:
13
- self.client = self.__initialze(credentials=credentials)
12
+ """Client to interact with the BigQuery API."""
14
13
 
15
- def __initialze(self, credentials: Credentials) -> Client:
16
- client = Client(credentials=credentials)
17
- return client
14
+ def __init__(self, credentials: Credentials, project_id: str) -> None:
15
+ """Initialize the BigQuery client with provided credentials."""
16
+ self.client = self.__initialize(credentials=credentials, project_id=project_id)
18
17
 
19
- def list_datasets(self, project_id) -> list:
20
- datasets = list(self.client.list_datasets(project_id))
21
- logger.debug("Found %s datasets in project %s", len(datasets), project_id)
22
- return datasets
18
+ def __initialize(self, credentials: Credentials, project_id: str) -> Client:
19
+ return Client(credentials=credentials, project=project_id)
23
20
 
24
- def list_tables(self, dataset_id) -> list:
21
+ def list_dataset_ids(self) -> List[str]:
22
+ """List all dataset ids in the project."""
23
+ datasets = list(self.client.list_datasets())
24
+ logger.debug("Found %s datasets in project %s", len(datasets), self.client.project)
25
+ return [dataset.dataset_id for dataset in datasets]
26
+
27
+ def list_tables(self, dataset_id: str) -> List[Dict[str, str]]:
28
+ """List all tables in the dataset."""
25
29
  tables = list(self.client.list_tables(dataset_id))
26
30
  logger.debug("Found %s tables in dataset %s", len(tables), dataset_id)
27
- return list(self.client.list_tables(dataset_id))
31
+ return [
32
+ {
33
+ "table_id": table.table_id,
34
+ "table_type": table.table_type,
35
+ "project": table.project,
36
+ "dataset_id": table.dataset_id,
37
+ }
38
+ for table in tables
39
+ ]
40
+
41
+ def get_columns(self, table_id: str, dataset_id: str) -> List[Dict[str, str]]:
42
+ """Get the columns of the table."""
43
+ table = self.client.get_table(f"{self.client.project}.{dataset_id}.{table_id}")
44
+ return [{"name": field.name, "type": field.field_type} for field in table.schema]
45
+
46
+ def get_all_columns(self) -> Dict[str, Dict[str, List[Dict[str, Any]]]]:
47
+ """Get all columns in the project."""
48
+ all_columns = {}
49
+ datasets = self.list_dataset_ids()
50
+ for dataset_id in datasets:
51
+ all_columns[dataset_id] = {}
52
+ tables = self.list_tables(dataset_id)
53
+ for table_info in tables:
54
+ table_id = table_info["table_id"]
55
+ table_type = table_info["table_type"]
56
+ columns = self.get_columns(table_id, dataset_id)
57
+ all_columns[dataset_id][table_id] = {"columns": columns, "table_type": table_type}
58
+ return all_columns
28
59
 
29
60
 
30
61
  class GCPLineageClient:
62
+ """Client to interact with the GCP Lineage API."""
63
+
31
64
  def __init__(self, credentials: Credentials) -> None:
65
+ """Initialize the GCP Lineage client with provided credentials."""
32
66
  self.client = self.__initialze(credentials=credentials)
33
67
 
34
68
  def __initialze(self, credentials: Credentials) -> LineageClient:
35
- client = LineageClient(credentials=credentials)
36
- return client
69
+ return LineageClient(credentials=credentials)
37
70
 
38
71
  def get_links(self, request: SearchLinksRequest) -> list:
72
+ """Search for links between entities (tables)."""
39
73
  response = self.client.search_links(request)
40
74
  return response.links
41
75
 
@@ -57,5 +91,4 @@ def get_org_id(credentials_json: dict) -> str:
57
91
  crm_service = build("cloudresourcemanager", "v1", credentials=credentials)
58
92
  project_id = credentials_json["project_id"]
59
93
  project = crm_service.projects().get(projectId=project_id).execute()
60
- org_id = project["parent"]["id"]
61
- return org_id
94
+ return project["parent"]["id"]
@@ -100,3 +100,7 @@ class QDCExternalAPIClient:
100
100
  logger.error(f"Error: {re} downstream_global_id: {global_id}.")
101
101
  else:
102
102
  return res.status_code
103
+
104
+
105
+ def initialize_qdc_client(api_url: str, client_id: str, client_secret: str) -> QDCExternalAPIClient:
106
+ return QDCExternalAPIClient(base_url=api_url, client_id=client_id, client_secret=client_secret)
quollio_core/snowflake.py CHANGED
@@ -11,6 +11,7 @@ from quollio_core.profilers.snowflake import (
11
11
  snowflake_table_stats,
12
12
  snowflake_table_to_table_lineage,
13
13
  )
14
+ from quollio_core.profilers.stats import get_column_stats_items
14
15
  from quollio_core.repository import dbt, qdc, snowflake
15
16
 
16
17
  logger = logging.getLogger(__name__)
@@ -22,7 +23,6 @@ def build_view(
22
23
  target_tables: str = "",
23
24
  log_level: str = "info",
24
25
  ) -> None:
25
-
26
26
  logger.info("Build profiler views using dbt")
27
27
  # set parameters
28
28
  dbt_client = dbt.DBTClient()
@@ -103,13 +103,19 @@ def load_stats(
103
103
  conn: snowflake.SnowflakeConnectionConfig,
104
104
  qdc_client: qdc.QDCExternalAPIClient,
105
105
  tenant_id: str,
106
+ stats_items: str,
106
107
  ) -> None:
107
-
108
108
  logger.info("Generate Snowflake stats.")
109
+
110
+ if stats_items is None:
111
+ raise ValueError("No stats items are not selected. Please specify any value to `stats_items` param.")
112
+
113
+ logger.info("The following values will be aggregated. {stats_items}".format(stats_items=stats_items))
109
114
  snowflake_table_stats(
110
115
  conn=conn,
111
116
  qdc_client=qdc_client,
112
117
  tenant_id=tenant_id,
118
+ stats_items=stats_items,
113
119
  )
114
120
 
115
121
  logger.info("Stats data is successfully loaded.")
@@ -122,7 +128,6 @@ def load_sqllineage(
122
128
  qdc_client: qdc.QDCExternalAPIClient,
123
129
  tenant_id: str,
124
130
  ) -> None:
125
-
126
131
  logger.info("Generate Snowflake sqllineage.")
127
132
  snowflake_table_level_sqllineage(
128
133
  conn=conn,
@@ -275,6 +280,19 @@ if __name__ == "__main__":
275
280
  required=False,
276
281
  help="Whether to ingest column lineage into QDIC or not. Default value is False",
277
282
  )
283
+
284
+ stats_items = get_column_stats_items()
285
+ parser.add_argument(
286
+ "--target_stats_items",
287
+ type=str,
288
+ nargs="*",
289
+ choices=stats_items,
290
+ default=stats_items,
291
+ action=env_default("SNOWFLAKE_STATS_ITEMS"),
292
+ required=False,
293
+ help="The items for statistic values.\
294
+ You can choose the items to be aggregated for stats. All items are selected by default.",
295
+ )
278
296
  args = parser.parse_args()
279
297
  set_log_level(level=args.log_level)
280
298
 
@@ -321,6 +339,7 @@ if __name__ == "__main__":
321
339
  conn=conn,
322
340
  qdc_client=qdc_client,
323
341
  tenant_id=args.tenant_id,
342
+ stats_items=args.target_stats_items,
324
343
  )
325
344
  if "load_sqllineage" in args.commands:
326
345
  qdc_client = qdc.QDCExternalAPIClient(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: quollio-core
3
- Version: 0.4.12
3
+ Version: 0.4.13
4
4
  Summary: Quollio Core
5
5
  Author-email: quollio-dev <qt.dev@quollio.com>
6
6
  Maintainer-email: RyoAriyama <ryo.arym@gmail.com>, tharuta <35373297+TakumiHaruta@users.noreply.github.com>
@@ -22,6 +22,7 @@ Requires-Dist: dbt-core==1.7.10
22
22
  Requires-Dist: dbt-snowflake==1.7.0
23
23
  Requires-Dist: dbt-redshift==1.7.1
24
24
  Requires-Dist: dbt-databricks==1.7.1
25
+ Requires-Dist: db-dtypes==1.2.0
25
26
  Requires-Dist: jinja2==3.1.3
26
27
  Requires-Dist: PyYAML==6.0.1
27
28
  Requires-Dist: requests==2.31.0
@@ -1,8 +1,8 @@
1
- quollio_core/__init__.py,sha256=0xTw5MNfA7O56-oLDQVOHnaT2p7G6LxL1nxoV2zfkRQ,84
2
- quollio_core/bigquery.py,sha256=RguUznaY5YjROzJtXimoS8yCNH9jgGphpzd5v_JgSQM,3884
3
- quollio_core/bricks.py,sha256=Lehv-qsBSMNNE9BGVvidGOXJsxLSSsbNtmiEZH4lSUg,9458
4
- quollio_core/redshift.py,sha256=1d-mHnalB1jtiGPgzsGd3lRwLHCxaBJlUMEV2dh4f60,9882
5
- quollio_core/snowflake.py,sha256=G3tImWbZgMlycYuw1b5WnNBp3zWo3hyrbOX5ARLIs7A,10585
1
+ quollio_core/__init__.py,sha256=AUePs5X9J3XSNhx1MlWVacGiCUUUbcMKTZG3Rs0jrNY,84
2
+ quollio_core/bigquery.py,sha256=6Oq4DVGpa3X21Es_nbrsb8pK3vaxwb9Egnvq3huo95k,5894
3
+ quollio_core/bricks.py,sha256=4M0fzxwtFCwAv2Lat9XYdLtoGp27fy-w6a3ty1dExSc,9999
4
+ quollio_core/redshift.py,sha256=x86Fu3QJoJNGKPYbOcqUgQzzj1qNR6I3dd0R9oQClUE,10720
5
+ quollio_core/snowflake.py,sha256=ars0S8sbEcDR74RLrsJX9VWh8fbBGgk2H7G81paCPlk,11426
6
6
  quollio_core/dbt_projects/databricks/.gitignore,sha256=1jJAyXSzJ3YUm0nx3i7wUSE4RjQMX3ad6F8O88UbtzI,29
7
7
  quollio_core/dbt_projects/databricks/README.md,sha256=ZpRQyhFAODAiS8dc1Kb_ndkul4cu4o4udN_EMa49CU4,440
8
8
  quollio_core/dbt_projects/databricks/dbt_project.yml,sha256=3sH98RNk7TnphvI3yEdXDstb92kW5BNxr-cT0tXhwzk,480
@@ -65,22 +65,23 @@ quollio_core/helper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
65
65
  quollio_core/helper/core.py,sha256=wbu4FWI7YiFEttXGSuj3tMyAhtPAFlHOjDpWJGNXOHA,1202
66
66
  quollio_core/helper/env_default.py,sha256=H6gbSGUPrEDZr4YDrL49hbOpw6RntI4U82kX1q6vUnI,2148
67
67
  quollio_core/helper/log.py,sha256=flxyZZ44G79l1TaUp3OT58uCHcnE5z_pCduwoeI6IUs,645
68
+ quollio_core/helper/log_utils.py,sha256=w1El5yafNcKgzpiMmspsAjUm3R32ACm5QNj5lNb3xsk,1392
68
69
  quollio_core/profilers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
- quollio_core/profilers/bigquery.py,sha256=e1Y8cZR-LxI9mSsYb0DurQyy0eCjM_kAKLfvl4IuQLE,3262
70
- quollio_core/profilers/databricks.py,sha256=n1JCSkOGVjUwXn6wjMybN-3sn3WDKGt0Dm7ydkMSNFY,7808
70
+ quollio_core/profilers/bigquery.py,sha256=LQzDPo-fyTHPc4C-LC59Aby5cJ7m-m4THzl9HUurXm0,5641
71
+ quollio_core/profilers/databricks.py,sha256=ik4RiR_GOeU3S7s6C6Y9SGe1D_Y_f98BDWJVlEJXL4U,7868
71
72
  quollio_core/profilers/lineage.py,sha256=4FyxIuPBrUFihqZryqTQBcfB0Z7634lKl_WwkD82vzE,6865
72
- quollio_core/profilers/redshift.py,sha256=6_4amsBL4QW0ZajWhS-TW3f_cjKKa6TpXClMgBC-fZo,6440
73
- quollio_core/profilers/snowflake.py,sha256=nitlP5pmDm2RhLGO4f_WTzkw41EmOTY2uWN1HZkCHbI,8465
73
+ quollio_core/profilers/redshift.py,sha256=p6ONDCkhndZAOcKAwEyQ5fsi-jsQrlwHHb7LTI_m1uk,6473
74
+ quollio_core/profilers/snowflake.py,sha256=YdrV82pjJ1BilWQvPES1pz3EmQoBOJEPc6mVlI4FDRg,8311
74
75
  quollio_core/profilers/sqllineage.py,sha256=XkF7hwDWIGNtyEP5cv2wETBgMfdQxeHolv7qPIkntSQ,5066
75
- quollio_core/profilers/stats.py,sha256=PG1NbbUSpc1JuEYvBzD66rd24tp0C13_Y5Y7vRjYG1c,4720
76
+ quollio_core/profilers/stats.py,sha256=OLQrdrh0y64jo9rmzvGlDdxy_c7gMz_GnlXPJzWkBjM,7343
76
77
  quollio_core/repository/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
- quollio_core/repository/bigquery.py,sha256=KMJTeF4OUxtaJt0ymoJ4tkrMKq8yLyMYaMxNvU5yd_Y,2271
78
+ quollio_core/repository/bigquery.py,sha256=3AyGcJNYGnUyMweyc6lGm4quwrOzd-ZBS2zNnFwafII,3990
78
79
  quollio_core/repository/databricks.py,sha256=9Cgdv8qBnVaHqu3RA-IUBieAqb69moQ-KAAMVSf5Ds4,1877
79
80
  quollio_core/repository/dbt.py,sha256=cnLwJPywLi8VowVW7zfIBa9jxVwDWO7xzzNRn1vWiuw,659
80
- quollio_core/repository/qdc.py,sha256=qEpMF6rKdic23dPJoDYmbIcyCKDuSFqbDF2_jqmqoZw,4369
81
+ quollio_core/repository/qdc.py,sha256=hw7L7RdX5srv_MUSxAObq3l9b3IYjzN5lopp6CgPXyY,4572
81
82
  quollio_core/repository/redshift.py,sha256=p2ouEuYcDCjx1oBhc6H1ekQsvEqHGd3bFu3PW0ngYBc,2880
82
83
  quollio_core/repository/snowflake.py,sha256=J9rHshfWdOSnjQWxwGEYPpAU2lY7Tu5UFB_BNakkAX0,1892
83
- quollio_core-0.4.12.dist-info/LICENSE,sha256=V8j_M8nAz8PvAOZQocyRDX7keai8UJ9skgmnwqETmdY,34520
84
- quollio_core-0.4.12.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
85
- quollio_core-0.4.12.dist-info/METADATA,sha256=Zy6X_ZypiIkyysb50Ic4UwCAuSByyd3y__ED_IefbIQ,6804
86
- quollio_core-0.4.12.dist-info/RECORD,,
84
+ quollio_core-0.4.13.dist-info/LICENSE,sha256=V8j_M8nAz8PvAOZQocyRDX7keai8UJ9skgmnwqETmdY,34520
85
+ quollio_core-0.4.13.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
86
+ quollio_core-0.4.13.dist-info/METADATA,sha256=fyVJbVrl739taWh8w9ndVhKI2KWgsVLLZbRmzVF9Yj8,6836
87
+ quollio_core-0.4.13.dist-info/RECORD,,