quollio-core 0.4.19__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
quollio_core/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  """Quollio Core"""
2
2
 
3
- __version__ = "0.4.19"
3
+ __version__ = "0.5.3"
4
4
  __author__ = "Quollio Technologies, Inc"
@@ -95,6 +95,23 @@ UNION
95
95
  {{ source('account_usage', 'TABLES') }}
96
96
  WHERE
97
97
  DELETED IS NULL
98
+ AND (
99
+ {% if var('target_databases_method') == 'ALLOWLIST' %}
100
+ {% if var('target_databases') %}
101
+ TABLE_CATALOG LIKE ANY ({{ var('target_databases')|join(",") }})
102
+ {% else %}
103
+ 1=0 -- If no databases specified in allowlist, deny all
104
+ {% endif %}
105
+ {% elif var('target_databases_method') == 'DENYLIST' %}
106
+ {% if var('target_databases') %}
107
+ NOT (TABLE_CATALOG LIKE ANY ({{ var('target_databases')|join(",") }}))
108
+ {% else %}
109
+ 1=1 -- If no databases specified in denylist, include all
110
+ {% endif %}
111
+ {% else %}
112
+ 1=1 -- Default case: allow all databases
113
+ {% endif %}
114
+ )
98
115
  ), exists_upstream_column_lineage AS (
99
116
  SELECT
100
117
  downstream_table_name
@@ -49,6 +49,23 @@ WITH table_lineage_history AS (
49
49
  {{ source('account_usage', 'TABLES') }}
50
50
  WHERE
51
51
  DELETED IS NULL
52
+ AND (
53
+ {% if var('target_databases_method') == 'ALLOWLIST' %}
54
+ {% if var('target_databases') %}
55
+ TABLE_CATALOG LIKE ANY ({{ var('target_databases')|join(",") }})
56
+ {% else %}
57
+ 1=0 -- If no databases specified in allowlist, deny all
58
+ {% endif %}
59
+ {% elif var('target_databases_method') == 'DENYLIST' %}
60
+ {% if var('target_databases') %}
61
+ NOT (TABLE_CATALOG LIKE ANY ({{ var('target_databases')|join(",") }}))
62
+ {% else %}
63
+ 1=1 -- If no databases specified in denylist, include all
64
+ {% endif %}
65
+ {% else %}
66
+ 1=1 -- Default case: allow all databases
67
+ {% endif %}
68
+ )
52
69
  ), upstream_exists_table AS (
53
70
  SELECT
54
71
  downstream_table_name AS "DOWNSTREAM_TABLE_NAME"
@@ -48,3 +48,21 @@ on
48
48
  lst.query_id = qt.query_id
49
49
  where
50
50
  qt.query_id is not null
51
+ AND (
52
+ {% if var('target_databases_method') == 'ALLOWLIST' %}
53
+ {% if var('target_databases') %}
54
+ database_name LIKE ANY ({{ var('target_databases')|join(",") }})
55
+ {% else %}
56
+ 1=0 -- If no databases specified in allowlist, deny all
57
+ {% endif %}
58
+ {% elif var('target_databases_method') == 'DENYLIST' %}
59
+ {% if var('target_databases') %}
60
+ NOT (database_name LIKE ANY ({{ var('target_databases')|join(",") }}))
61
+ {% else %}
62
+ 1=1 -- If no databases specified in denylist, include all
63
+ {% endif %}
64
+ {% else %}
65
+ 1=1 -- Default case: allow all databases
66
+ {% endif %}
67
+ )
68
+
@@ -28,7 +28,7 @@ WITH columns AS (
28
28
  FROM
29
29
  {{ source('account_usage', 'GRANTS_TO_ROLES') }}
30
30
  WHERE
31
- granted_on in ('TABLE', 'MATERIALIZED VIEW')
31
+ granted_on in ('TABLE', 'VIEW', 'MATERIALIZED VIEW')
32
32
  AND grantee_name = '{{ var("query_role") }}'
33
33
  AND privilege in ('SELECT', 'OWNERSHIP')
34
34
  AND deleted_on IS NULL
@@ -87,10 +87,26 @@ WITH columns AS (
87
87
  , data_type
88
88
  , case when data_type in('NUMBER','DECIMAL', 'DEC', 'NUMERIC',
89
89
  'INT', 'INTEGER', 'BIGINT', 'SMALLINT',
90
- 'TINYINT', 'BYTEINT')
90
+ 'TINYINT', 'BYTEINT', 'FLOAT')
91
91
  THEN true
92
92
  else false END AS is_calculable
93
93
  FROM
94
94
  implicit_columns_removed
95
- )
95
+ WHERE
96
+ {% if var('target_databases_method') == 'ALLOWLIST' %}
97
+ {% if var('target_databases') %}
98
+ TABLE_CATALOG LIKE ANY ({{ var('target_databases')|join(",") }})
99
+ {% else %}
100
+ 1=0 -- If no databases specified in allowlist, deny all
101
+ {% endif %}
102
+ {% elif var('target_databases_method') == 'DENYLIST' %}
103
+ {% if var('target_databases') %}
104
+ NOT (TABLE_CATALOG LIKE ANY ({{ var('target_databases')|join(",") }}))
105
+ {% else %}
106
+ 1=1 -- If no databases specified in denylist, include all
107
+ {% endif %}
108
+ {% else %}
109
+ 1=1 -- Default case: allow all databases
110
+ {% endif %}
111
+ )
96
112
  select * from final
@@ -35,3 +35,10 @@ def setup_dbt_profile(connections_json: Dict[str, str], template_path: str, temp
35
35
 
36
36
  def trim_prefix(s: str, prefix: str) -> str:
37
37
  return s.lstrip(prefix)
38
+
39
+
40
+ def is_valid_domain(domain: str, domain_type: str) -> bool:
41
+ if domain_type == "VPC_ENDPOINT":
42
+ return domain.endswith("/api")
43
+ else:
44
+ return domain.endswith(".com")
@@ -67,15 +67,19 @@ class SQLLineage:
67
67
  dest_schema = dest_schema.upper() if dest_schema is not None else None
68
68
 
69
69
  # MEMO: Complement sql with dialect, source database and source schema info.
70
- optimized_stmt: sqlglot.Expression = optimizer.qualify.qualify(
71
- statement,
72
- dialect=dialect,
73
- catalog=src_db,
74
- db=src_schema,
75
- qualify_columns=False,
76
- validate_qualify_columns=False,
77
- identify=False,
78
- )
70
+ # MEMO: Skipping qualify because it normalizes the table names.
71
+ if dialect == "teradata":
72
+ optimized_stmt = statement
73
+ else:
74
+ optimized_stmt: sqlglot.Expression = optimizer.qualify.qualify(
75
+ statement,
76
+ dialect=dialect,
77
+ catalog=src_db,
78
+ db=src_schema,
79
+ qualify_columns=False,
80
+ validate_qualify_columns=False,
81
+ identify=False,
82
+ )
79
83
 
80
84
  orig_dest_table = Table(table="")
81
85
  dest_table = Table(table="")
@@ -0,0 +1,176 @@
1
+ import os
2
+ from collections import OrderedDict
3
+ from typing import Dict, List, Set, Tuple, Union
4
+
5
+ from sqlglot import ParseError
6
+
7
+ from quollio_core.helper.log_utils import error_handling_decorator, logger
8
+ from quollio_core.profilers.sqllineage import SQLLineage, Table
9
+ from quollio_core.repository import qdc
10
+ from quollio_core.repository import teradata as teradata_repo
11
+
12
+
13
+ @error_handling_decorator
14
+ def load_lineage(
15
+ conn_config: teradata_repo.TeradataConfig,
16
+ endpoint: str = None,
17
+ tenant_id: str = None,
18
+ qdc_client: qdc.QDCExternalAPIClient = None,
19
+ page_size: int = None,
20
+ system_database: str = None,
21
+ ) -> None:
22
+ page_size = page_size or int(os.environ.get("TERADATA_PAGE_SIZE", 1000))
23
+ offset = 0
24
+ all_lineage_results = []
25
+
26
+ # Use system_database from config if not provided
27
+ system_database = system_database or conn_config.system_database
28
+
29
+ with teradata_repo.new_teradata_client(conn_config) as conn:
30
+ while True:
31
+ query = f"""
32
+ SELECT
33
+ a.QueryID,
34
+ TRIM(a.SqlTextInfo) AS SqlTextInfo,
35
+ a.SqlRowNo,
36
+ TRIM(d.DatabaseName) AS DefaultDatabase
37
+ FROM {system_database}.QryLogSQLV a
38
+ JOIN {system_database}.QryLogV b
39
+ ON a.QueryID = b.QueryID
40
+ JOIN {system_database}.DatabasesV d
41
+ ON b.DefaultDatabase = d.DatabaseName
42
+ WHERE
43
+ UPPER(TRIM(SqlTextInfo)) LIKE 'CREATE TABLE%'
44
+ OR UPPER(TRIM(SqlTextInfo)) LIKE 'CREATE VIEW%'
45
+ OR UPPER(TRIM(SqlTextInfo)) LIKE 'INSERT%'
46
+ OR UPPER(TRIM(SqlTextInfo)) LIKE 'MERGE%'
47
+ OR UPPER(TRIM(SqlTextInfo)) LIKE 'UPDATE%'
48
+ QUALIFY ROW_NUMBER() OVER (ORDER BY a.QueryID, a.SqlRowNo) > {offset}
49
+ AND ROW_NUMBER() OVER (ORDER BY a.QueryID, a.SqlRowNo) <= {offset + page_size}
50
+ """
51
+
52
+ rows = teradata_repo.execute_query(query, conn)
53
+ if not rows:
54
+ break
55
+
56
+ logger.info(f"Concatenating split queries for page {offset // page_size + 1}...")
57
+ concatenated_queries = concatenate_split_queries(rows)
58
+
59
+ logger.info("Processing SQL statements and extracting lineage...")
60
+ lineage_results = process_sql_statements(concatenated_queries)
61
+ all_lineage_results.extend(lineage_results)
62
+
63
+ if len(rows) < page_size:
64
+ break
65
+
66
+ offset += page_size
67
+
68
+ logger.info(f"Lineage extraction complete. Found {len(all_lineage_results)} unique entries.")
69
+ for entry in all_lineage_results:
70
+ if len(entry) > 1:
71
+ logger.debug(f"Destination table: {entry[1]}")
72
+ else:
73
+ logger.debug("Destination table: Not available (out of bounds)")
74
+
75
+ if len(entry) > 0 and isinstance(entry[0], list):
76
+ logger.debug("Source tables:")
77
+ for src_table in entry[0]:
78
+ logger.debug(f" - {src_table}")
79
+ else:
80
+ logger.debug("Source tables: Not available (out of bounds or invalid type)")
81
+
82
+ logger.debug("---")
83
+
84
+ sql_lineage = SQLLineage()
85
+ update_table_lineage_inputs = [
86
+ sql_lineage.gen_lineage_input(
87
+ tenant_id=tenant_id, endpoint=endpoint, src_tables=src_tables, dest_table=dest_table
88
+ )
89
+ for src_tables, dest_table in all_lineage_results
90
+ ]
91
+
92
+ table_req_count = 0
93
+ logger.info(f"Starting to update lineage information for {len(update_table_lineage_inputs)} tables.")
94
+ for update_table_lineage_input in update_table_lineage_inputs:
95
+ logger.info(
96
+ f"Generating table lineage. downstream: {update_table_lineage_input.downstream_database_name}"
97
+ f" -> {update_table_lineage_input.downstream_table_name}"
98
+ )
99
+ try:
100
+ status_code = qdc_client.update_lineage_by_id(
101
+ global_id=update_table_lineage_input.downstream_global_id,
102
+ payload=update_table_lineage_input.upstreams.as_dict(),
103
+ )
104
+ if status_code == 200:
105
+ table_req_count += 1
106
+ else:
107
+ logger.error(
108
+ f"Failed to update lineage for {update_table_lineage_input.downstream_table_name}.\
109
+ Status code: {status_code}"
110
+ )
111
+ except Exception as e:
112
+ logger.error(
113
+ f"Exception occurred while updating lineage for {update_table_lineage_input.downstream_table_name}: {e}"
114
+ )
115
+ logger.info(f"Generating table lineage is finished. {table_req_count} lineages are ingested.")
116
+
117
+
118
+ @error_handling_decorator
119
+ def extract_lineage(sql_statement: str, default_database: str = None) -> Tuple[Set[Table], Table]:
120
+ try:
121
+ logger.debug(f"Parsing SQL: {sql_statement}")
122
+ sql_lineage = SQLLineage()
123
+ source_tables, dest_table = sql_lineage.get_table_level_lineage_source(sql=sql_statement, dialect="teradata")
124
+
125
+ source_tables = {Table(db=t.db_schema or default_database, db_schema="", table=t.table) for t in source_tables}
126
+ dest_table = Table(db=dest_table.db_schema or default_database, db_schema="", table=dest_table.table)
127
+
128
+ return source_tables, dest_table
129
+ except ParseError as e:
130
+ logger.error(f"Error parsing SQL: {e}")
131
+ logger.debug(f"Problematic SQL: {sql_statement}")
132
+ except AttributeError as e:
133
+ logger.error(f"Attribute error while extracting lineage: {e}")
134
+ logger.debug(f"Problematic SQL: {sql_statement}")
135
+ except Exception as e:
136
+ logger.error(f"Unexpected error while extracting lineage: {e}")
137
+ logger.debug(f"Problematic SQL: {sql_statement}")
138
+ return set(), Table(db="", table="")
139
+
140
+
141
+ @error_handling_decorator
142
+ def process_sql_statements(queries: List[Union[str, Dict[str, Union[str, int]]]]) -> List[Tuple[Set[Table], Table]]:
143
+ lineage_dict = OrderedDict()
144
+ for query in queries:
145
+ if isinstance(query, str):
146
+ sql = query
147
+ default_database = None
148
+ else:
149
+ sql = query["SqlTextInfo"]
150
+ default_database = query.get("DefaultDatabase")
151
+
152
+ source_tables, dest_table = extract_lineage(sql, default_database)
153
+ if dest_table.table and source_tables:
154
+ if dest_table in lineage_dict:
155
+ logger.info(f"Merging duplicate entry for {dest_table}")
156
+ # Merge source tables
157
+ lineage_dict[dest_table] = lineage_dict[dest_table].union(source_tables)
158
+ else:
159
+ lineage_dict[dest_table] = source_tables
160
+ return [(src_tables, dest_table) for dest_table, src_tables in lineage_dict.items()]
161
+
162
+
163
+ def concatenate_split_queries(rows: List[Dict[str, Union[str, int]]]) -> List[Dict[str, Union[str, int]]]:
164
+ queries = {}
165
+ for row in rows:
166
+ query_id = row["QueryID"]
167
+ sql_text = row["SqlTextInfo"]
168
+ default_database = row["DefaultDatabase"]
169
+ if query_id not in queries:
170
+ queries[query_id] = {"SqlTextInfo": [], "DefaultDatabase": default_database}
171
+ queries[query_id]["SqlTextInfo"].append(sql_text)
172
+
173
+ return [
174
+ {"SqlTextInfo": "".join(query["SqlTextInfo"]), "DefaultDatabase": query["DefaultDatabase"]}
175
+ for query in queries.values()
176
+ ]
@@ -0,0 +1,224 @@
1
+ from typing import Any, Dict, List, Optional
2
+
3
+ from quollio_core.helper.log_utils import error_handling_decorator, logger
4
+ from quollio_core.profilers.stats import gen_table_stats_payload
5
+ from quollio_core.repository import qdc
6
+ from quollio_core.repository import teradata as teradata_repo
7
+
8
+ NUMERIC_TYPES = ["D", "F", "I1", "I2", "I8", "I", "N"]
9
+
10
+ # I, I1, I2, I8 - INT TYPES INTEGER, BYTEINT, SMALLINT, BIGINT
11
+ # F - Float
12
+ # D - Decimal
13
+ # N - Number
14
+
15
+
16
+ def quote_identifier(identifier: str) -> str:
17
+ return f'"{identifier}"'
18
+
19
+
20
+ @error_handling_decorator
21
+ def load_stats(
22
+ conn_config: teradata_repo.TeradataConfig,
23
+ sample_percent: Optional[float] = None,
24
+ endpoint: Optional[str] = None,
25
+ tenant_id: Optional[str] = None,
26
+ qdc_client: Optional[qdc.QDCExternalAPIClient] = None,
27
+ target_databases: Optional[List[str]] = None,
28
+ target_databases_method: str = "DENYLIST",
29
+ stats_items: Optional[List[str]] = None,
30
+ system_database: Optional[str] = None,
31
+ ) -> None:
32
+ stats_list = []
33
+ numerical_columns = 0
34
+ non_numerical_columns = 0
35
+ logger.info(
36
+ f"Starting statistics collection. " f"Sample percent: {sample_percent if sample_percent is not None else 'N/A'}"
37
+ )
38
+
39
+ # Use system_database from config if not provided
40
+ system_database = system_database or conn_config.system_database
41
+
42
+ with teradata_repo.new_teradata_client(conn_config) as conn:
43
+ try:
44
+ tables = teradata_repo.get_table_list(conn, target_databases, target_databases_method, system_database)
45
+ for table in tables:
46
+ logger.debug(f"Processing table: {table}")
47
+ database_name = table["DatabaseName"]
48
+ table_name = table["TableName"]
49
+
50
+ logger.info(f"Processing table {database_name}.{table_name}")
51
+ columns = teradata_repo.get_column_list(
52
+ conn, database_name=database_name, table_name=table_name, system_database=system_database
53
+ )
54
+ logger.debug(f"Columns: {columns}")
55
+
56
+ for column in columns:
57
+ column_name = column["ColumnName"]
58
+ column_type = column["ColumnType"]
59
+ if column_type is None:
60
+ column_type = ""
61
+ else:
62
+ column_type = column_type.strip()
63
+
64
+ is_numerical = column_type in NUMERIC_TYPES
65
+ if is_numerical:
66
+ numerical_columns += 1
67
+ else:
68
+ non_numerical_columns += 1
69
+
70
+ stats_sql = generate_column_statistics_sql(
71
+ database_name,
72
+ table_name,
73
+ column_name,
74
+ column_type,
75
+ sample_percent if is_numerical else None,
76
+ stats_items,
77
+ )
78
+ logger.debug(f"Generated SQL for column {column_name}: {stats_sql}")
79
+
80
+ try:
81
+ result = teradata_repo.execute_query(stats_sql, conn)
82
+ logger.debug(f"Query result for column {column_name}: {result}")
83
+ if result:
84
+ column_stats = parse_column_statistics_result(
85
+ result[0], database_name, table_name, column_name, stats_items, is_numerical
86
+ )
87
+ stats_list.append(column_stats)
88
+ except Exception as e:
89
+ logger.error(
90
+ f"Failed to collect statistics for {database_name}.{table_name}.{column_name}: {e}"
91
+ )
92
+
93
+ except Exception as e:
94
+ logger.error(f"Error during statistics collection: {e}")
95
+
96
+ logger.info("Statistics collection completed successfully.")
97
+
98
+ logger.debug(f"Stats list: {stats_list}")
99
+ payloads = gen_table_stats_payload(stats=stats_list, tenant_id=tenant_id, endpoint=endpoint)
100
+ logger.debug(f"Generated payloads: {payloads}")
101
+
102
+ req_count = 0
103
+ for payload in payloads:
104
+ logger.info(f"Generating table stats. asset: {payload.db} -> {payload.table} -> {payload.column}")
105
+ status_code = qdc_client.update_stats_by_id(
106
+ global_id=payload.global_id,
107
+ payload=payload.body.get_column_stats(),
108
+ )
109
+ if status_code == 200:
110
+ req_count += 1
111
+
112
+ logger.info(
113
+ f"Loading statistics is finished. {req_count} statistics are ingested. "
114
+ f"Numerical columns: {numerical_columns}, Non-numerical columns: {non_numerical_columns}"
115
+ )
116
+
117
+
118
+ @error_handling_decorator
119
+ def parse_column_statistics_result(
120
+ result: Dict[str, Any],
121
+ database_name: str,
122
+ table_name: str,
123
+ column_name: str,
124
+ stats_items: Optional[List[str]] = None,
125
+ is_numerical: bool = False,
126
+ ) -> Dict[str, Any]:
127
+ stats_dict = {
128
+ "DB_NAME": database_name,
129
+ "SCHEMA_NAME": "",
130
+ "TABLE_NAME": table_name,
131
+ "COLUMN_NAME": column_name,
132
+ }
133
+
134
+ if stats_items:
135
+ for item in stats_items:
136
+ if item == "cardinality" and "num_uniques" in result:
137
+ stats_dict["CARDINALITY"] = result["num_uniques"]
138
+ elif item == "number_of_null" and "num_nulls" in result:
139
+ stats_dict["NULL_COUNT"] = result["num_nulls"] # Changed from NUM_NULLS to NULL_COUNT
140
+
141
+ if is_numerical:
142
+ if item == "min" and "min_value" in result:
143
+ stats_dict["MIN_VALUE"] = str(result["min_value"])
144
+ elif item == "max" and "max_value" in result:
145
+ stats_dict["MAX_VALUE"] = str(result["max_value"])
146
+ elif item == "median" and "median_value" in result:
147
+ stats_dict["MEDIAN_VALUE"] = str(result["median_value"])
148
+ elif item == "mean" and "avg_value" in result:
149
+ stats_dict["AVG_VALUE"] = str(result["avg_value"])
150
+ elif item == "stddev" and "stddev_value" in result:
151
+ stats_dict["STDDEV_VALUE"] = str(result["stddev_value"])
152
+ elif item == "mode" and "mode_value" in result and is_numerical:
153
+ stats_dict["MODE_VALUE"] = str(result["mode_value"])
154
+
155
+ return stats_dict
156
+
157
+
158
+ @error_handling_decorator
159
+ def generate_column_statistics_sql(
160
+ database_name: str,
161
+ table_name: str,
162
+ column_name: str,
163
+ column_type: str,
164
+ sample_percent: Optional[float] = None,
165
+ stats_items: Optional[List[str]] = None,
166
+ ) -> str:
167
+ quoted_column = quote_identifier(column_name)
168
+ quoted_database = quote_identifier(database_name)
169
+
170
+ # Handle the case where table_name might include a database
171
+ if "." in table_name:
172
+ schema, table = table_name.split(".", 1)
173
+ quoted_table = f"{quote_identifier(schema)}.{quote_identifier(table)}"
174
+ else:
175
+ quoted_table = quote_identifier(table_name)
176
+
177
+ stats_clauses = []
178
+ mode_query = ""
179
+
180
+ if stats_items:
181
+ if "cardinality" in stats_items:
182
+ stats_clauses.append(f"COUNT(DISTINCT {quoted_column}) AS num_uniques")
183
+ if "number_of_null" in stats_items:
184
+ stats_clauses.append(f"SUM(CASE WHEN {quoted_column} IS NULL THEN 1 ELSE 0 END) AS num_nulls")
185
+
186
+ if column_type in NUMERIC_TYPES:
187
+ if "min" in stats_items:
188
+ stats_clauses.append(f"MIN(CAST({quoted_column} AS FLOAT)) AS min_value")
189
+ if "max" in stats_items:
190
+ stats_clauses.append(f"MAX(CAST({quoted_column} AS FLOAT)) AS max_value")
191
+ if "median" in stats_items:
192
+ stats_clauses.append(f"MEDIAN(CAST({quoted_column} AS FLOAT)) AS median_value")
193
+ if "mean" in stats_items:
194
+ stats_clauses.append(f"AVG(CAST({quoted_column} AS FLOAT)) AS avg_value")
195
+ if "stddev" in stats_items:
196
+ stats_clauses.append(f"STDDEV_SAMP(CAST({quoted_column} AS FLOAT)) AS stddev_value")
197
+ if "mode" in stats_items:
198
+ mode_query = (
199
+ f"WITH MODE_VALUE AS ("
200
+ f" SELECT {quoted_column}, COUNT(*) as freq "
201
+ f" FROM {quoted_database}.{quoted_table} "
202
+ )
203
+
204
+ if sample_percent is not None and 0 < sample_percent <= 99:
205
+ sample_fraction = sample_percent / 100
206
+ mode_query += f" SAMPLE {sample_fraction} "
207
+
208
+ mode_query += (
209
+ f" GROUP BY {quoted_column} " f" QUALIFY ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC) = 1" f") "
210
+ )
211
+ stats_clauses.append(f"(SELECT {quoted_column} FROM MODE_VALUE) AS mode_value")
212
+
213
+ if not stats_clauses:
214
+ logger.warning(f"No statistics selected for column {column_name}. Skipping this column.")
215
+ return ""
216
+
217
+ query = f"{mode_query}" f"SELECT {', '.join(stats_clauses)} " f"FROM {quoted_database}.{quoted_table}"
218
+
219
+ if sample_percent is not None and 0 < sample_percent <= 99:
220
+ sample_fraction = sample_percent / 100
221
+ query += f" SAMPLE {sample_fraction}"
222
+
223
+ logger.debug(f"Generated SQL query for {quoted_database}.{quoted_table}.{quoted_column}: {query}")
224
+ return query
@@ -25,9 +25,6 @@ class QDCExternalAPIClient:
25
25
  Tried to find a package for oauth0 client credentials flow,
26
26
  but any of them contains bugs or lacks of features to handle the token refresh when it's expired
27
27
  """
28
- is_domain_valid = is_valid_domain(domain=self.base_url)
29
- if not is_domain_valid:
30
- raise ValueError("The format of quollio API URL is invalid. The URL must end with `.com`")
31
28
 
32
29
  url = f"{self.base_url}/oauth2/token"
33
30
  creds = f"{self.client_id}:{self.client_secret}"
@@ -108,7 +105,3 @@ class QDCExternalAPIClient:
108
105
 
109
106
  def initialize_qdc_client(api_url: str, client_id: str, client_secret: str) -> QDCExternalAPIClient:
110
107
  return QDCExternalAPIClient(base_url=api_url, client_id=client_id, client_secret=client_secret)
111
-
112
-
113
- def is_valid_domain(domain: str) -> bool:
114
- return domain.endswith(".com")
@@ -0,0 +1,59 @@
1
+ import logging
2
+ import os
3
+ from typing import Tuple
4
+
5
+ import boto3
6
+ from botocore.exceptions import ClientError
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ def get_parameter_by_assume_role(key: str, region: str = "ap-northeast-1") -> Tuple[str, Exception]:
12
+ tenant_id = os.getenv("TENANT_ID")
13
+ if not _is_str_valid(tenant_id):
14
+ return ("", Exception("TENANT_ID is not set in get_parameter_by_assume_role."))
15
+ qdc_account_id = os.getenv("QDC_ACCOUNT_ID")
16
+ if not _is_valid_aws_account_id(qdc_account_id):
17
+ return ("", Exception("QDC_ACCOUNT_ID is not set in get_parameter_by_assume_role."))
18
+ qdc_region = os.getenv("QDC_REGION")
19
+ if not _is_str_valid(qdc_region):
20
+ return ("", Exception("QDC_REGION is not set in get_parameter_by_assume_role."))
21
+
22
+ sts_assume_role_arn = "arn:aws:iam::{account_id}:role/qdc-{tenant_id}-cross-account-access".format(
23
+ account_id=qdc_account_id, tenant_id=tenant_id
24
+ )
25
+
26
+ session = boto3.Session(region_name=region)
27
+ sts = session.client("sts", endpoint_url="https://sts.{region}.amazonaws.com".format(region=qdc_region))
28
+ assumed_role_object = sts.assume_role(
29
+ RoleArn=sts_assume_role_arn,
30
+ RoleSessionName="AssumeRoleSession",
31
+ )
32
+ credentials = assumed_role_object["Credentials"]
33
+
34
+ try:
35
+ ssm = session.client(
36
+ "ssm",
37
+ endpoint_url="https://ssm.{region}.amazonaws.com".format(region=qdc_region),
38
+ aws_access_key_id=credentials["AccessKeyId"],
39
+ aws_secret_access_key=credentials["SecretAccessKey"],
40
+ aws_session_token=credentials["SessionToken"],
41
+ )
42
+ res = ssm.get_parameter(Name=key, WithDecryption=True)
43
+ return (res["Parameter"]["Value"], None)
44
+ except ClientError as e:
45
+ logger.error(
46
+ "Failed to run ssm.get_parameter().\
47
+ Please check the value stored in parameter store is correct. error: {err}".format(
48
+ err=e
49
+ )
50
+ )
51
+ return ("", e)
52
+
53
+
54
+ def _is_valid_aws_account_id(s: str) -> bool:
55
+ return s is not None and len(s) == 12 and s.isdigit()
56
+
57
+
58
+ def _is_str_valid(s: str) -> bool:
59
+ return s is not None and s != ""
@@ -0,0 +1,117 @@
1
+ from dataclasses import dataclass, field
2
+ from typing import Any, Dict, List, Optional
3
+
4
+ import teradatasql
5
+
6
+ from quollio_core.helper.log_utils import error_handling_decorator, logger
7
+
8
+
9
+ @dataclass
10
+ class TeradataConfig:
11
+ host: str
12
+ port: int
13
+ username: str
14
+ password: str
15
+ database: str = "DBC"
16
+ system_database: str = "DBC"
17
+ encrypt_data: bool = True
18
+ additional_params: Dict[str, Any] = field(default_factory=dict)
19
+
20
+ @classmethod
21
+ def from_dict(
22
+ cls,
23
+ credentials: Dict[str, str],
24
+ host: str,
25
+ port: str,
26
+ additional_params: Dict[str, Any] = None,
27
+ system_database: str = "DBC",
28
+ ) -> "TeradataConfig":
29
+ return cls(
30
+ host=host,
31
+ port=int(port),
32
+ username=credentials["username"],
33
+ password=credentials["password"],
34
+ system_database=system_database,
35
+ additional_params=additional_params or {},
36
+ )
37
+
38
+ def get_connection_params(self) -> Dict[str, Any]:
39
+ params = {
40
+ "host": self.host,
41
+ "user": self.username,
42
+ "password": self.password,
43
+ "database": self.database,
44
+ "dbs_port": self.port,
45
+ "encryptdata": str(self.encrypt_data).lower(),
46
+ }
47
+ params.update(self.additional_params)
48
+ return params
49
+
50
+
51
+ @error_handling_decorator
52
+ def new_teradata_client(config: TeradataConfig) -> teradatasql.connect:
53
+ conn = teradatasql.connect(**config.get_connection_params())
54
+ return conn
55
+
56
+
57
+ @error_handling_decorator
58
+ def get_table_list(
59
+ conn: teradatasql.connect,
60
+ target_databases: Optional[List[str]] = None,
61
+ target_databases_method: str = "DENYLIST",
62
+ system_database: str = "DBC",
63
+ ) -> List[Dict[str, str]]:
64
+ if target_databases_method == "DENYLIST":
65
+ operator = "NOT"
66
+ else:
67
+ operator = ""
68
+
69
+ query_tables = f"""
70
+ SELECT DatabaseName, TableName
71
+ FROM {system_database}.TablesV
72
+ WHERE TableKind IN ('T', 'O', 'Q')
73
+ AND DatabaseName {operator} IN ({','.join("'" + db + "'" for db in target_databases)})
74
+ """
75
+ logger.debug("Executing query to retrieve table names.")
76
+ logger.debug(f"Query: {query_tables}")
77
+ tables = execute_query(query_tables, conn)
78
+ return tables
79
+
80
+
81
+ @error_handling_decorator
82
+ def get_column_list(
83
+ conn: teradatasql.connect, database_name: str, table_name: str, system_database: str = "DBC"
84
+ ) -> List[Dict[str, str]]:
85
+ query_columns = f"""
86
+ SELECT ColumnName, ColumnType
87
+ FROM {system_database}.ColumnsV
88
+ WHERE DatabaseName = '{database_name}'
89
+ AND TableName = '{table_name}'
90
+ """
91
+ logger.debug(f"Executing query to retrieve columns for {database_name}.{table_name}.")
92
+ logger.debug(f"Query: {query_columns}")
93
+ columns = execute_query(query_columns, conn)
94
+ logger.debug(f"Retrieved columns: {columns}")
95
+ return columns
96
+
97
+
98
+ @error_handling_decorator
99
+ def execute_query(query: str, con: teradatasql.connect) -> List[Dict[str, Any]]:
100
+ try:
101
+ with con.cursor() as cur:
102
+ logger.debug(f"Executing SQL query: {query}")
103
+ cur.execute(query)
104
+ logger.debug(f"Column descriptions: {cur.description}")
105
+ columns = [desc[0] for desc in cur.description]
106
+ rows = [dict(zip(columns, row)) for row in cur.fetchall()]
107
+ logger.debug(f"Fetched {len(rows)} rows from Teradata.")
108
+ return rows
109
+ except teradatasql.OperationalError as e:
110
+ logger.error(f"Teradata Operational Error: {e}")
111
+ raise
112
+ except teradatasql.ProgrammingError as e:
113
+ logger.error(f"Teradata Programming Error: {e}")
114
+ raise
115
+ except Exception as e:
116
+ logger.error(f"Unexpected error fetching data from Teradata: {e}")
117
+ raise
quollio_core/snowflake.py CHANGED
@@ -3,7 +3,7 @@ import logging
3
3
  import os
4
4
  import shutil
5
5
 
6
- from quollio_core.helper.core import setup_dbt_profile
6
+ from quollio_core.helper.core import is_valid_domain, setup_dbt_profile
7
7
  from quollio_core.helper.env_default import env_default
8
8
  from quollio_core.helper.log import set_log_level
9
9
  from quollio_core.profilers.snowflake import (
@@ -13,7 +13,7 @@ from quollio_core.profilers.snowflake import (
13
13
  snowflake_table_to_table_lineage,
14
14
  )
15
15
  from quollio_core.profilers.stats import get_column_stats_items
16
- from quollio_core.repository import dbt, qdc, snowflake
16
+ from quollio_core.repository import dbt, qdc, snowflake, ssm
17
17
 
18
18
  logger = logging.getLogger(__name__)
19
19
 
@@ -24,6 +24,8 @@ def build_view(
24
24
  target_tables: str = "",
25
25
  log_level: str = "info",
26
26
  dbt_macro_source: str = "hub",
27
+ target_databases_method: str = "DENYLIST",
28
+ target_databases: list[str] = [],
27
29
  ) -> None:
28
30
  logger.info("Build profiler views using dbt")
29
31
  # set parameters
@@ -32,10 +34,16 @@ def build_view(
32
34
  project_path = f"{current_dir}/dbt_projects/snowflake"
33
35
  template_path = f"{current_dir}/dbt_projects/snowflake/profiles"
34
36
  template_name = "profiles_template.yml"
35
- options = '{{"query_role": {query_role}, "sample_method": {sample_method}}}'.format(
37
+
38
+ options = '{{"query_role": "{query_role}", "sample_method": "{sample_method}",\
39
+ "target_databases_method": "{target_databases_method}",\
40
+ "target_databases": {target_databases}}}'.format(
36
41
  query_role=conn.account_query_role,
37
42
  sample_method=stats_sample_method,
43
+ target_databases_method=target_databases_method,
44
+ target_databases=target_databases,
38
45
  )
46
+
39
47
  new_package_file = f"{project_path}/packages.yml"
40
48
  if dbt_macro_source == "local":
41
49
  shutil.copyfile(f"{project_path}/packages_local.yml", new_package_file)
@@ -237,6 +245,25 @@ if __name__ == "__main__":
237
245
  Please specify table name with blank delimiter like tableA tableB \
238
246
  if you want to create two or more tables.",
239
247
  )
248
+ parser.add_argument(
249
+ "--target_databases_method",
250
+ type=str,
251
+ choices=["ALLOWLIST", "DENYLIST"],
252
+ action=env_default("SNOWFLAKE_TARGET_DATABASE_METHOD"),
253
+ required=False,
254
+ help="Method to filter databases. 'ALLOWLIST' to only include listed databases,\
255
+ 'DENNYLIST' to exclude listed databases",
256
+ )
257
+ parser.add_argument(
258
+ "--target_databases",
259
+ type=str,
260
+ nargs="*",
261
+ action=env_default("SNOWFLAKE_TARGET_DATABASES"),
262
+ required=False,
263
+ help='List of databases to allow or deny based on target_database_method\
264
+ please specify database names with blank space as delimiter\
265
+ wildcards (%) are supported "DATABASE%" ',
266
+ )
240
267
  parser.add_argument(
241
268
  "--sample_method",
242
269
  type=str,
@@ -298,6 +325,16 @@ if __name__ == "__main__":
298
325
  required=False,
299
326
  help="Whether to ingest column lineage into QDIC or not. Default value is False",
300
327
  )
328
+ parser.add_argument(
329
+ "--external_api_access",
330
+ type=str,
331
+ choices=["PUBLIC", "VPC_ENDPOINT"],
332
+ action=env_default("EXTERNAL_API_ACCESS"),
333
+ default="PUBLIC",
334
+ required=False,
335
+ help="Access method to Quollio API. Default 'PUBLIC'. Choose 'VPC_ENDPOINT'\
336
+ if you use API Gateway VPC Endpoint, DefaultValue is set to PUBLIC.",
337
+ )
301
338
 
302
339
  stats_items = get_column_stats_items()
303
340
  parser.add_argument(
@@ -329,16 +366,35 @@ if __name__ == "__main__":
329
366
  raise ValueError("No command is provided")
330
367
 
331
368
  if "build_view" in args.commands:
369
+
370
+ if args.target_databases:
371
+ target_databases = ["'" + db + "'" for db in args.target_databases[0].split(",")]
372
+ else:
373
+ target_databases = []
374
+
332
375
  build_view(
333
376
  conn=conn,
334
377
  stats_sample_method=args.sample_method,
335
378
  target_tables=args.target_tables,
336
379
  log_level=args.log_level,
337
380
  dbt_macro_source=args.dbt_macro_source,
381
+ target_databases_method=args.target_databases_method,
382
+ target_databases=target_databases,
338
383
  )
384
+ api_url = args.api_url
385
+ if args.external_api_access == "VPC_ENDPOINT":
386
+ api_url, err = ssm.get_parameter_by_assume_role(args.api_url)
387
+ if err is not None:
388
+ logger.error("Fail to ssm.get_parameter_by_assume_role. {err}".format(err=err))
389
+ raise Exception("Fail to ssm.get_parameter_by_assume_role. {err}".format(err=err))
390
+
391
+ is_domain_valid = is_valid_domain(domain=api_url, domain_type=args.external_api_access)
392
+ if not is_domain_valid:
393
+ raise ValueError("The format of quollio API URL is invalid. The URL must end with `.com`")
394
+
339
395
  if "load_lineage" in args.commands:
340
396
  qdc_client = qdc.QDCExternalAPIClient(
341
- base_url=args.api_url,
397
+ base_url=api_url,
342
398
  client_id=args.client_id,
343
399
  client_secret=args.client_secret,
344
400
  )
@@ -350,7 +406,7 @@ if __name__ == "__main__":
350
406
  )
351
407
  if "load_stats" in args.commands:
352
408
  qdc_client = qdc.QDCExternalAPIClient(
353
- base_url=args.api_url,
409
+ base_url=api_url,
354
410
  client_id=args.client_id,
355
411
  client_secret=args.client_secret,
356
412
  )
@@ -362,7 +418,7 @@ if __name__ == "__main__":
362
418
  )
363
419
  if "load_sqllineage" in args.commands:
364
420
  qdc_client = qdc.QDCExternalAPIClient(
365
- base_url=args.api_url,
421
+ base_url=api_url,
366
422
  client_id=args.client_id,
367
423
  client_secret=args.client_secret,
368
424
  )
@@ -0,0 +1,268 @@
1
+ import argparse
2
+ import json
3
+
4
+ from quollio_core.helper.env_default import env_default
5
+ from quollio_core.helper.log_utils import configure_logging, error_handling_decorator, logger
6
+ from quollio_core.profilers.stats import get_column_stats_items
7
+ from quollio_core.profilers.teradata.lineage import load_lineage
8
+ from quollio_core.profilers.teradata.stats import load_stats
9
+ from quollio_core.repository import qdc
10
+ from quollio_core.repository import teradata as teradata_repo
11
+
12
+ DEFAULT_SYSTEM_DATABASES = [
13
+ "DBC",
14
+ "GLOBAL_FUNCTIONS",
15
+ "gs_tables_db",
16
+ "modelops",
17
+ "system",
18
+ "tapidb",
19
+ "TDaaS_BAR",
20
+ "TDaaS_DB",
21
+ "TDaaS_Maint",
22
+ "TDaaS_Monitor",
23
+ "TDaaS_Support",
24
+ "TDaaS_TDBCMgmt1",
25
+ "TDaaS_TDBCMgmt2",
26
+ "TDBCMgmt",
27
+ "Crashdumps",
28
+ "dbcmngr",
29
+ "DemoNow_Monitor",
30
+ "External_AP",
31
+ "LockLogShredder",
32
+ "mldb",
33
+ "SQLJ",
34
+ "SysAdmin",
35
+ "SYSBAR",
36
+ "SYSJDBC",
37
+ "SYSLIB",
38
+ "SYSSPATIAL",
39
+ "SystemFe",
40
+ "SYSUDTLIB",
41
+ "SYSUIF",
42
+ "Sys_Calendar",
43
+ "TDMaps",
44
+ "TDPUSER",
45
+ "TDQCD",
46
+ "TDStats",
47
+ "tdwm",
48
+ "TD_ANALYTICS_DB",
49
+ "TD_SERVER_DB",
50
+ "TD_SYSFNLIB",
51
+ "TD_SYSGPL",
52
+ "TD_SYSXML",
53
+ "val",
54
+ ]
55
+
56
+
57
+ @error_handling_decorator
58
+ def main() -> None:
59
+ parser = argparse.ArgumentParser(
60
+ prog="Quollio Intelligence Agent for Teradata",
61
+ description="Load lineage and stats to Quollio from Teradata",
62
+ epilog="Copyright (c) 2024 Quollio Technologies, Inc.",
63
+ )
64
+ parser.add_argument(
65
+ "commands",
66
+ choices=["load_lineage", "load_stats"],
67
+ type=str,
68
+ nargs="+",
69
+ help="""
70
+ The command to execute.
71
+ 'load_lineage': Load lineage data from Teradata to Quollio,
72
+ 'load_stats': Load stats from Teradata to Quollio
73
+ """,
74
+ )
75
+ parser.add_argument(
76
+ "--log_level",
77
+ type=str,
78
+ choices=["debug", "info", "warn", "error", "none"],
79
+ action=env_default("LOG_LEVEL"),
80
+ default="info",
81
+ required=False,
82
+ help="The log level for commands. Default value is info",
83
+ )
84
+ parser.add_argument(
85
+ "--tenant_id",
86
+ type=str,
87
+ action=env_default("TENANT_ID"),
88
+ required=False,
89
+ help="The tenant id (company id) where the lineage and stats are loaded",
90
+ )
91
+ parser.add_argument(
92
+ "--teradata_host",
93
+ type=str,
94
+ action=env_default("TERADATA_HOST"),
95
+ required=True,
96
+ help="Teradata host",
97
+ )
98
+ parser.add_argument(
99
+ "--teradata_port",
100
+ type=str,
101
+ action=env_default("TERADATA_PORT"),
102
+ required=True,
103
+ help="Teradata port",
104
+ )
105
+ parser.add_argument(
106
+ "--teradata_user",
107
+ type=str,
108
+ action=env_default("TERADATA_USER_NAME"),
109
+ required=True,
110
+ help="Teradata username",
111
+ )
112
+ parser.add_argument(
113
+ "--teradata_password",
114
+ type=str,
115
+ action=env_default("TERADATA_PASSWORD"),
116
+ required=True,
117
+ help="Teradata password",
118
+ )
119
+ parser.add_argument(
120
+ "--teradata_connection_parameters",
121
+ type=str,
122
+ action=env_default("TERADATA_CONNECTION_PARAMETERS"),
123
+ default="{}",
124
+ help="Additional Teradata connection parameters as a JSON string",
125
+ )
126
+ parser.add_argument(
127
+ "--api_url",
128
+ type=str,
129
+ action=env_default("QDC_API_URL"),
130
+ required=False,
131
+ help="The base URL of Quollio External API",
132
+ )
133
+ parser.add_argument(
134
+ "--client_id",
135
+ type=str,
136
+ action=env_default("QDC_CLIENT_ID"),
137
+ required=False,
138
+ help="The client id that is created on Quollio console to let clients access Quollio External API",
139
+ )
140
+ parser.add_argument(
141
+ "--client_secret",
142
+ type=str,
143
+ action=env_default("QDC_CLIENT_SECRET"),
144
+ required=False,
145
+ help="The client secret that is created on Quollio console to let clients access Quollio External API",
146
+ )
147
+ parser.add_argument(
148
+ "--sample_percent",
149
+ type=float,
150
+ action=env_default("SAMPLE_PERCENT"),
151
+ default=1,
152
+ required=False,
153
+ help="Percentage of data to sample when collecting statistics (e.g., 10 for 10%). Default is 1%.",
154
+ )
155
+ parser.add_argument(
156
+ "--teradata_target_databases",
157
+ type=str,
158
+ action=env_default("TERADATA_TARGET_DATABASES"),
159
+ required=False,
160
+ default=None,
161
+ help="Comma-separated list of Teradata target databases. If not provided,\
162
+ DEFAULT_SYSTEM_DATABASES will be used.",
163
+ )
164
+ parser.add_argument(
165
+ "--teradata_target_databases_method",
166
+ type=str,
167
+ choices=["ALLOWLIST", "DENYLIST"],
168
+ action=env_default("TERADATA_TARGET_DATABASE_METHOD"),
169
+ default="DENYLIST",
170
+ help="Method to use for teradata_target_databases (allowlist or denylist)",
171
+ )
172
+ parser.add_argument(
173
+ "--teradata_page_size",
174
+ type=int,
175
+ action=env_default("TERADATA_PAGE_SIZE"),
176
+ default=1000,
177
+ required=False,
178
+ help="Page size for Teradata queries. Default is 1000.",
179
+ )
180
+ parser.add_argument(
181
+ "--target_stats_items",
182
+ type=str,
183
+ nargs="*",
184
+ choices=get_column_stats_items(),
185
+ default=get_column_stats_items(),
186
+ action=env_default("TERADATA_STATS_ITEMS"),
187
+ required=False,
188
+ help="The items for statistic values.\
189
+ You can choose the items to be aggregated for stats.\
190
+ Default is full stats.",
191
+ )
192
+ parser.add_argument(
193
+ "--teradata_system_database",
194
+ type=str,
195
+ action=env_default("TERADATA_SYSTEM_DATABASE"),
196
+ default="DBC",
197
+ help="Name of the Teradata system database.\
198
+ Default is DBC",
199
+ )
200
+
201
+ args = parser.parse_args()
202
+
203
+ configure_logging(args.log_level)
204
+
205
+ logger.info("Starting Quollio Intelligence Agent for Teradata")
206
+
207
+ credentials = {
208
+ "username": args.teradata_user,
209
+ "password": args.teradata_password,
210
+ }
211
+
212
+ # Parse additional connection parameters
213
+ try:
214
+ additional_params = json.loads(args.teradata_connection_parameters)
215
+ except json.JSONDecodeError:
216
+ logger.warning("Invalid JSON in TERADATA_CONNECTION_PARAMETERS. Using empty dict.")
217
+ additional_params = {}
218
+
219
+ logger.info("Initializing QDC client")
220
+ qdc_client = qdc.initialize_qdc_client(args.api_url, args.client_id, args.client_secret)
221
+
222
+ logger.info("Initializing Teradata client")
223
+ config = teradata_repo.TeradataConfig.from_dict(
224
+ credentials=credentials,
225
+ host=args.teradata_host,
226
+ port=args.teradata_port,
227
+ additional_params=additional_params,
228
+ system_database=args.teradata_system_database,
229
+ )
230
+
231
+ if "load_lineage" in args.commands:
232
+ logger.info("Starting lineage loading process")
233
+ load_lineage(
234
+ conn_config=config,
235
+ tenant_id=args.tenant_id,
236
+ endpoint=args.teradata_host,
237
+ qdc_client=qdc_client,
238
+ page_size=args.teradata_page_size,
239
+ system_database=args.teradata_system_database,
240
+ )
241
+ logger.info("Lineage loading process completed")
242
+
243
+ if "load_stats" in args.commands:
244
+ logger.info("Starting statistics loading process")
245
+ logger.info(f"Selected stats items: {args.target_stats_items}")
246
+ target_databases = (
247
+ DEFAULT_SYSTEM_DATABASES
248
+ if args.teradata_target_databases is None
249
+ else args.teradata_target_databases.split(",")
250
+ )
251
+ load_stats(
252
+ conn_config=config,
253
+ sample_percent=args.sample_percent,
254
+ tenant_id=args.tenant_id,
255
+ endpoint=args.teradata_host,
256
+ qdc_client=qdc_client,
257
+ target_databases=target_databases,
258
+ target_databases_method=args.teradata_target_databases_method.upper(),
259
+ stats_items=args.target_stats_items,
260
+ system_database=args.teradata_system_database,
261
+ )
262
+ logger.info("Statistics loading process completed")
263
+
264
+ logger.info("Quollio Intelligence Agent for Teradata completed successfully")
265
+
266
+
267
+ if __name__ == "__main__":
268
+ main()
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: quollio-core
3
- Version: 0.4.19
3
+ Version: 0.5.3
4
4
  Summary: Quollio Core
5
5
  Author-email: quollio-dev <qt.dev@quollio.com>
6
6
  Maintainer-email: RyoAriyama <ryo.arym@gmail.com>, tharuta <35373297+TakumiHaruta@users.noreply.github.com>
@@ -37,6 +37,7 @@ Requires-Dist: google-cloud-bigquery==3.22.0
37
37
  Requires-Dist: google-cloud-datacatalog==3.19.0
38
38
  Requires-Dist: google-cloud-datacatalog-lineage==0.3.6
39
39
  Requires-Dist: google-api-python-client==2.131.0
40
+ Requires-Dist: teradatasql==20.0.0.15
40
41
  Requires-Dist: black>=22.3.0 ; extra == "test"
41
42
  Requires-Dist: coverage>=7.3.2 ; extra == "test"
42
43
  Requires-Dist: isort>=5.10.1 ; extra == "test"
@@ -1,8 +1,9 @@
1
- quollio_core/__init__.py,sha256=y0SJ_NkmDeB1AzUwy_oG9Ivh80sUSTMebFbwH83kHe8,84
1
+ quollio_core/__init__.py,sha256=YrefYO__k4ewIsYGUmshPK7ySvzQxHKIdRv7kTYGaVk,83
2
2
  quollio_core/bigquery.py,sha256=6Oq4DVGpa3X21Es_nbrsb8pK3vaxwb9Egnvq3huo95k,5894
3
3
  quollio_core/bricks.py,sha256=8h3kbI2b6lGH2s-56jE_Q5-R5-nIsQYMfvtRrkFOzoU,10784
4
4
  quollio_core/redshift.py,sha256=KcdljY95xYf9JYrsaMOBoP_XxQQ8wFVE5ue_XEMVSFc,11504
5
- quollio_core/snowflake.py,sha256=D-d26OwCUIpXIqfxZnkv4Ei1GJ1mdw9z8YA8K0G-bSE,12216
5
+ quollio_core/snowflake.py,sha256=wKBfyoqdoLN-kYsceT0ctqWeay_Sn8OpKMSYedlBjD4,14558
6
+ quollio_core/teradata.py,sha256=H2VUcJvr8W-M2wvm3710Gf1ENb-BSscrDRKNm8gdHJE,8227
6
7
  quollio_core/dbt_projects/databricks/.gitignore,sha256=1jJAyXSzJ3YUm0nx3i7wUSE4RjQMX3ad6F8O88UbtzI,29
7
8
  quollio_core/dbt_projects/databricks/README.md,sha256=ZpRQyhFAODAiS8dc1Kb_ndkul4cu4o4udN_EMa49CU4,440
8
9
  quollio_core/dbt_projects/databricks/dbt_project.yml,sha256=3sH98RNk7TnphvI3yEdXDstb92kW5BNxr-cT0tXhwzk,480
@@ -47,22 +48,22 @@ quollio_core/dbt_projects/snowflake/packages_local.yml,sha256=ryyJSXv83gYFu48xmz
47
48
  quollio_core/dbt_projects/snowflake/analyses/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
49
  quollio_core/dbt_projects/snowflake/macros/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
50
  quollio_core/dbt_projects/snowflake/macros/materialization/divided_view.sql,sha256=6CM9L_SkTkfUVWOYj0APHqmS_uBOdsL8RdN4fwUXz1I,3822
50
- quollio_core/dbt_projects/snowflake/models/quollio_lineage_column_level.sql,sha256=Cxt2U2aXNG_LUm63jwTyxUkapkrB7_uHmesx1PTcMJM,4721
51
+ quollio_core/dbt_projects/snowflake/models/quollio_lineage_column_level.sql,sha256=Zhj0EXF1K8S-OkFxz3IBHe2olXktYrvly0LwZBOAUXw,5333
51
52
  quollio_core/dbt_projects/snowflake/models/quollio_lineage_column_level.yml,sha256=a2uNIAh-xw51eu-GmHVuAnGnTbwK7h8-DjDeQtK3KaQ,711
52
- quollio_core/dbt_projects/snowflake/models/quollio_lineage_table_level.sql,sha256=Q_7vY1N1Hi1LFv5CxkkdR3gQw8fTDnoKECTLSK4gd3o,5112
53
+ quollio_core/dbt_projects/snowflake/models/quollio_lineage_table_level.sql,sha256=lZ28A4E6s37-oBx8JbtT3ItXK6musdqr25eyaGn7kDk,5916
53
54
  quollio_core/dbt_projects/snowflake/models/quollio_lineage_table_level.yml,sha256=QXlMBIkHo1Y-ANveKVx1FwyoYTMRXKgE2Z-PNouhQTw,325
54
- quollio_core/dbt_projects/snowflake/models/quollio_sqllineage_sources.sql,sha256=gd6JhQO13xBIvOoeXcce1I7amNGytwE8pwUApXehwqM,1520
55
+ quollio_core/dbt_projects/snowflake/models/quollio_sqllineage_sources.sql,sha256=pSmx3DJDx39AzAIzil9ophcgnIVxK_o1b7HSgajMUPc,2257
55
56
  quollio_core/dbt_projects/snowflake/models/quollio_sqllineage_sources.yml,sha256=qgazupx3ca4P8R0loY5F9hyCz2fmAcWqZ6iOySo_NoY,377
56
57
  quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.sql,sha256=BzvP9gKMFItmwqEQ4bDgtS-Invxhhe6L73Qe1ucxfHo,284
57
58
  quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.yml,sha256=V_BESPk6IqE52ExT26-78As9l9AlWW86-Geb5PIhThU,67
58
- quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.sql,sha256=IDvYtl84vcQaE3ImOKhDfgO_8aYWTudmGggCiMF9yi0,2251
59
+ quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.sql,sha256=ubMEzZNHq55zCncg7HbzdMKMSdqHnwbJmVKYpet8Otc,2968
59
60
  quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.yml,sha256=W39VAmFnnX6RBoW7B_4CConC1lm0Jm9o50Jsz9bYZzY,538
60
61
  quollio_core/dbt_projects/snowflake/models/sources.yml,sha256=vGSV33cNj4UUyPUcYS-JFgc3r8KvSLfiA7qhbDCUU9s,10975
61
62
  quollio_core/dbt_projects/snowflake/profiles/profiles_template.yml,sha256=gcZsgdGP461QuUM9jLbBKdadT8cHTXgNarq_azOOMhk,379
62
63
  quollio_core/dbt_projects/snowflake/seeds/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
63
64
  quollio_core/dbt_projects/snowflake/snapshots/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
65
  quollio_core/helper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
65
- quollio_core/helper/core.py,sha256=wbu4FWI7YiFEttXGSuj3tMyAhtPAFlHOjDpWJGNXOHA,1202
66
+ quollio_core/helper/core.py,sha256=ddV5VLa49Je11eHUjlRN5m3nhtqPMMUSeG4135HuZC8,1390
66
67
  quollio_core/helper/env_default.py,sha256=H6gbSGUPrEDZr4YDrL49hbOpw6RntI4U82kX1q6vUnI,2148
67
68
  quollio_core/helper/log.py,sha256=flxyZZ44G79l1TaUp3OT58uCHcnE5z_pCduwoeI6IUs,645
68
69
  quollio_core/helper/log_utils.py,sha256=QontLKETHjSAbQniJ7YqS0RY2AYvFHSjrlPiGr317LE,1391
@@ -72,16 +73,20 @@ quollio_core/profilers/databricks.py,sha256=ik4RiR_GOeU3S7s6C6Y9SGe1D_Y_f98BDWJV
72
73
  quollio_core/profilers/lineage.py,sha256=4FyxIuPBrUFihqZryqTQBcfB0Z7634lKl_WwkD82vzE,6865
73
74
  quollio_core/profilers/redshift.py,sha256=p6ONDCkhndZAOcKAwEyQ5fsi-jsQrlwHHb7LTI_m1uk,6473
74
75
  quollio_core/profilers/snowflake.py,sha256=m9Ivv2LRwnrmgKS36a039AhrO27sR1EaOOdqNF26PhI,11156
75
- quollio_core/profilers/sqllineage.py,sha256=XkF7hwDWIGNtyEP5cv2wETBgMfdQxeHolv7qPIkntSQ,5066
76
+ quollio_core/profilers/sqllineage.py,sha256=h0FT6CYb0A20zSc68GELZ7Q8bDbaHLQnZQHsXBEXBug,5261
76
77
  quollio_core/profilers/stats.py,sha256=OLQrdrh0y64jo9rmzvGlDdxy_c7gMz_GnlXPJzWkBjM,7343
78
+ quollio_core/profilers/teradata/lineage.py,sha256=2wNksBQD8vC6UTQwCglPsF53YMEVIkAb2CWTmpiTHDU,7368
79
+ quollio_core/profilers/teradata/stats.py,sha256=OagvkTRFiWVbiLABwZwR3wQ7y36edwOViDetHsYiyxI,9277
77
80
  quollio_core/repository/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
78
81
  quollio_core/repository/bigquery.py,sha256=3AyGcJNYGnUyMweyc6lGm4quwrOzd-ZBS2zNnFwafII,3990
79
82
  quollio_core/repository/databricks.py,sha256=9Cgdv8qBnVaHqu3RA-IUBieAqb69moQ-KAAMVSf5Ds4,1877
80
83
  quollio_core/repository/dbt.py,sha256=cnLwJPywLi8VowVW7zfIBa9jxVwDWO7xzzNRn1vWiuw,659
81
- quollio_core/repository/qdc.py,sha256=_5ygUD6h-zs02f4rzj6evxXqD1JjSgtWc-oHPEofaig,4902
84
+ quollio_core/repository/qdc.py,sha256=IPGiYafnJYkuD7_kLapVM98-9ZhEwq5S-dGY2bO8fVk,4624
82
85
  quollio_core/repository/redshift.py,sha256=p2ouEuYcDCjx1oBhc6H1ekQsvEqHGd3bFu3PW0ngYBc,2880
83
86
  quollio_core/repository/snowflake.py,sha256=zL9-xi98AIftdW9MuKI-M3pZ1kQuuH-UiZH8HcJvmk4,1769
84
- quollio_core-0.4.19.dist-info/LICENSE,sha256=V8j_M8nAz8PvAOZQocyRDX7keai8UJ9skgmnwqETmdY,34520
85
- quollio_core-0.4.19.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
86
- quollio_core-0.4.19.dist-info/METADATA,sha256=cGPdKKcHhb6b3D9rz46w4FMygML4fUZ-ZQgtfI2DqLY,6887
87
- quollio_core-0.4.19.dist-info/RECORD,,
87
+ quollio_core/repository/ssm.py,sha256=xpm1FzbBnIsBptuYPUNnPgkKU2AH3XxI-ZL0bEetvW0,2182
88
+ quollio_core/repository/teradata.py,sha256=1AExxRBTswpSyF4OVyAUkoiZ0yVRfqt4T99FdllkTEI,3763
89
+ quollio_core-0.5.3.dist-info/LICENSE,sha256=V8j_M8nAz8PvAOZQocyRDX7keai8UJ9skgmnwqETmdY,34520
90
+ quollio_core-0.5.3.dist-info/WHEEL,sha256=CpUCUxeHQbRN5UGRQHYRJorO5Af-Qy_fHMctcQ8DSGI,82
91
+ quollio_core-0.5.3.dist-info/METADATA,sha256=aQkusHnQ6mw4E6KUM5Yo4LrzFFFHqDbIgIyzjHZFJc4,6924
92
+ quollio_core-0.5.3.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: flit 3.9.0
2
+ Generator: flit 3.10.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any