quollio-core 0.4.19__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quollio_core/__init__.py +1 -1
- quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.sql +1 -1
- quollio_core/helper/core.py +7 -0
- quollio_core/profilers/sqllineage.py +13 -9
- quollio_core/profilers/teradata/lineage.py +172 -0
- quollio_core/profilers/teradata/stats.py +218 -0
- quollio_core/repository/qdc.py +0 -7
- quollio_core/repository/ssm.py +59 -0
- quollio_core/repository/teradata.py +103 -0
- quollio_core/snowflake.py +26 -5
- quollio_core/teradata.py +254 -0
- {quollio_core-0.4.19.dist-info → quollio_core-0.5.0.dist-info}/METADATA +2 -1
- {quollio_core-0.4.19.dist-info → quollio_core-0.5.0.dist-info}/RECORD +15 -10
- {quollio_core-0.4.19.dist-info → quollio_core-0.5.0.dist-info}/LICENSE +0 -0
- {quollio_core-0.4.19.dist-info → quollio_core-0.5.0.dist-info}/WHEEL +0 -0
quollio_core/__init__.py
CHANGED
quollio_core/helper/core.py
CHANGED
@@ -35,3 +35,10 @@ def setup_dbt_profile(connections_json: Dict[str, str], template_path: str, temp
|
|
35
35
|
|
36
36
|
def trim_prefix(s: str, prefix: str) -> str:
|
37
37
|
return s.lstrip(prefix)
|
38
|
+
|
39
|
+
|
40
|
+
def is_valid_domain(domain: str, domain_type: str) -> bool:
|
41
|
+
if domain_type == "VPC_ENDPOINT":
|
42
|
+
return domain.endswith("/api")
|
43
|
+
else:
|
44
|
+
return domain.endswith(".com")
|
@@ -67,15 +67,19 @@ class SQLLineage:
|
|
67
67
|
dest_schema = dest_schema.upper() if dest_schema is not None else None
|
68
68
|
|
69
69
|
# MEMO: Complement sql with dialect, source database and source schema info.
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
70
|
+
# MEMO: Skipping qualify because it normalizes the table names.
|
71
|
+
if dialect == "teradata":
|
72
|
+
optimized_stmt = statement
|
73
|
+
else:
|
74
|
+
optimized_stmt: sqlglot.Expression = optimizer.qualify.qualify(
|
75
|
+
statement,
|
76
|
+
dialect=dialect,
|
77
|
+
catalog=src_db,
|
78
|
+
db=src_schema,
|
79
|
+
qualify_columns=False,
|
80
|
+
validate_qualify_columns=False,
|
81
|
+
identify=False,
|
82
|
+
)
|
79
83
|
|
80
84
|
orig_dest_table = Table(table="")
|
81
85
|
dest_table = Table(table="")
|
@@ -0,0 +1,172 @@
|
|
1
|
+
import os
|
2
|
+
from collections import OrderedDict
|
3
|
+
from typing import Dict, List, Set, Tuple, Union
|
4
|
+
|
5
|
+
from sqlglot import ParseError
|
6
|
+
|
7
|
+
from quollio_core.helper.log_utils import error_handling_decorator, logger
|
8
|
+
from quollio_core.profilers.sqllineage import SQLLineage, Table
|
9
|
+
from quollio_core.repository import qdc
|
10
|
+
from quollio_core.repository import teradata as teradata_repo
|
11
|
+
|
12
|
+
|
13
|
+
@error_handling_decorator
|
14
|
+
def load_lineage(
|
15
|
+
conn_config: teradata_repo.TeradataConfig,
|
16
|
+
endpoint: str = None,
|
17
|
+
tenant_id: str = None,
|
18
|
+
qdc_client: qdc.QDCExternalAPIClient = None,
|
19
|
+
page_size: int = None,
|
20
|
+
) -> None:
|
21
|
+
page_size = page_size or int(os.environ.get("TERADATA_PAGE_SIZE", 1000))
|
22
|
+
offset = 0
|
23
|
+
all_lineage_results = []
|
24
|
+
|
25
|
+
with teradata_repo.new_teradata_client(conn_config) as conn:
|
26
|
+
while True:
|
27
|
+
query = f"""
|
28
|
+
SELECT
|
29
|
+
a.QueryID,
|
30
|
+
TRIM(a.SqlTextInfo) AS SqlTextInfo,
|
31
|
+
a.SqlRowNo,
|
32
|
+
TRIM(d.DatabaseName) AS DefaultDatabase
|
33
|
+
FROM DBC.QryLogSQLV a
|
34
|
+
JOIN DBC.QryLogV b
|
35
|
+
ON a.QueryID = b.QueryID
|
36
|
+
JOIN DBC.DatabasesV d
|
37
|
+
ON b.DefaultDatabase = d.DatabaseName
|
38
|
+
WHERE
|
39
|
+
UPPER(TRIM(SqlTextInfo)) LIKE 'CREATE TABLE%'
|
40
|
+
OR UPPER(TRIM(SqlTextInfo)) LIKE 'CREATE VIEW%'
|
41
|
+
OR UPPER(TRIM(SqlTextInfo)) LIKE 'INSERT%'
|
42
|
+
OR UPPER(TRIM(SqlTextInfo)) LIKE 'MERGE%'
|
43
|
+
OR UPPER(TRIM(SqlTextInfo)) LIKE 'UPDATE%'
|
44
|
+
QUALIFY ROW_NUMBER() OVER (ORDER BY a.QueryID, a.SqlRowNo) > {offset}
|
45
|
+
AND ROW_NUMBER() OVER (ORDER BY a.QueryID, a.SqlRowNo) <= {offset + page_size}
|
46
|
+
"""
|
47
|
+
|
48
|
+
rows = teradata_repo.execute_query(query, conn)
|
49
|
+
if not rows:
|
50
|
+
break
|
51
|
+
|
52
|
+
logger.info(f"Concatenating split queries for page {offset // page_size + 1}...")
|
53
|
+
concatenated_queries = concatenate_split_queries(rows)
|
54
|
+
|
55
|
+
logger.info("Processing SQL statements and extracting lineage...")
|
56
|
+
lineage_results = process_sql_statements(concatenated_queries)
|
57
|
+
all_lineage_results.extend(lineage_results)
|
58
|
+
|
59
|
+
if len(rows) < page_size:
|
60
|
+
break
|
61
|
+
|
62
|
+
offset += page_size
|
63
|
+
|
64
|
+
logger.info(f"Lineage extraction complete. Found {len(all_lineage_results)} unique entries.")
|
65
|
+
for entry in all_lineage_results:
|
66
|
+
if len(entry) > 1:
|
67
|
+
logger.debug(f"Destination table: {entry[1]}")
|
68
|
+
else:
|
69
|
+
logger.debug("Destination table: Not available (out of bounds)")
|
70
|
+
|
71
|
+
if len(entry) > 0 and isinstance(entry[0], list):
|
72
|
+
logger.debug("Source tables:")
|
73
|
+
for src_table in entry[0]:
|
74
|
+
logger.debug(f" - {src_table}")
|
75
|
+
else:
|
76
|
+
logger.debug("Source tables: Not available (out of bounds or invalid type)")
|
77
|
+
|
78
|
+
logger.debug("---")
|
79
|
+
|
80
|
+
sql_lineage = SQLLineage()
|
81
|
+
update_table_lineage_inputs = [
|
82
|
+
sql_lineage.gen_lineage_input(
|
83
|
+
tenant_id=tenant_id, endpoint=endpoint, src_tables=src_tables, dest_table=dest_table
|
84
|
+
)
|
85
|
+
for src_tables, dest_table in all_lineage_results
|
86
|
+
]
|
87
|
+
|
88
|
+
table_req_count = 0
|
89
|
+
logger.info(f"Starting to update lineage information for {len(update_table_lineage_inputs)} tables.")
|
90
|
+
for update_table_lineage_input in update_table_lineage_inputs:
|
91
|
+
logger.info(
|
92
|
+
f"Generating table lineage. downstream: {update_table_lineage_input.downstream_database_name}"
|
93
|
+
f" -> {update_table_lineage_input.downstream_table_name}"
|
94
|
+
)
|
95
|
+
try:
|
96
|
+
status_code = qdc_client.update_lineage_by_id(
|
97
|
+
global_id=update_table_lineage_input.downstream_global_id,
|
98
|
+
payload=update_table_lineage_input.upstreams.as_dict(),
|
99
|
+
)
|
100
|
+
if status_code == 200:
|
101
|
+
table_req_count += 1
|
102
|
+
else:
|
103
|
+
logger.error(
|
104
|
+
f"Failed to update lineage for {update_table_lineage_input.downstream_table_name}.\
|
105
|
+
Status code: {status_code}"
|
106
|
+
)
|
107
|
+
except Exception as e:
|
108
|
+
logger.error(
|
109
|
+
f"Exception occurred while updating lineage for {update_table_lineage_input.downstream_table_name}: {e}"
|
110
|
+
)
|
111
|
+
logger.info(f"Generating table lineage is finished. {table_req_count} lineages are ingested.")
|
112
|
+
|
113
|
+
|
114
|
+
@error_handling_decorator
|
115
|
+
def extract_lineage(sql_statement: str, default_database: str = None) -> Tuple[Set[Table], Table]:
|
116
|
+
try:
|
117
|
+
logger.debug(f"Parsing SQL: {sql_statement}")
|
118
|
+
sql_lineage = SQLLineage()
|
119
|
+
source_tables, dest_table = sql_lineage.get_table_level_lineage_source(sql=sql_statement, dialect="teradata")
|
120
|
+
|
121
|
+
source_tables = {Table(db=t.db_schema or default_database, db_schema="", table=t.table) for t in source_tables}
|
122
|
+
dest_table = Table(db=dest_table.db_schema or default_database, db_schema="", table=dest_table.table)
|
123
|
+
|
124
|
+
return source_tables, dest_table
|
125
|
+
except ParseError as e:
|
126
|
+
logger.error(f"Error parsing SQL: {e}")
|
127
|
+
logger.debug(f"Problematic SQL: {sql_statement}")
|
128
|
+
except AttributeError as e:
|
129
|
+
logger.error(f"Attribute error while extracting lineage: {e}")
|
130
|
+
logger.debug(f"Problematic SQL: {sql_statement}")
|
131
|
+
except Exception as e:
|
132
|
+
logger.error(f"Unexpected error while extracting lineage: {e}")
|
133
|
+
logger.debug(f"Problematic SQL: {sql_statement}")
|
134
|
+
return set(), Table(db="", table="")
|
135
|
+
|
136
|
+
|
137
|
+
@error_handling_decorator
|
138
|
+
def process_sql_statements(queries: List[Union[str, Dict[str, Union[str, int]]]]) -> List[Tuple[Set[Table], Table]]:
|
139
|
+
lineage_dict = OrderedDict()
|
140
|
+
for query in queries:
|
141
|
+
if isinstance(query, str):
|
142
|
+
sql = query
|
143
|
+
default_database = None
|
144
|
+
else:
|
145
|
+
sql = query["SqlTextInfo"]
|
146
|
+
default_database = query.get("DefaultDatabase")
|
147
|
+
|
148
|
+
source_tables, dest_table = extract_lineage(sql, default_database)
|
149
|
+
if dest_table.table and source_tables:
|
150
|
+
if dest_table in lineage_dict:
|
151
|
+
logger.info(f"Merging duplicate entry for {dest_table}")
|
152
|
+
# Merge source tables
|
153
|
+
lineage_dict[dest_table] = lineage_dict[dest_table].union(source_tables)
|
154
|
+
else:
|
155
|
+
lineage_dict[dest_table] = source_tables
|
156
|
+
return [(src_tables, dest_table) for dest_table, src_tables in lineage_dict.items()]
|
157
|
+
|
158
|
+
|
159
|
+
def concatenate_split_queries(rows: List[Dict[str, Union[str, int]]]) -> List[Dict[str, Union[str, int]]]:
|
160
|
+
queries = {}
|
161
|
+
for row in rows:
|
162
|
+
query_id = row["QueryID"]
|
163
|
+
sql_text = row["SqlTextInfo"]
|
164
|
+
default_database = row["DefaultDatabase"]
|
165
|
+
if query_id not in queries:
|
166
|
+
queries[query_id] = {"SqlTextInfo": [], "DefaultDatabase": default_database}
|
167
|
+
queries[query_id]["SqlTextInfo"].append(sql_text)
|
168
|
+
|
169
|
+
return [
|
170
|
+
{"SqlTextInfo": "".join(query["SqlTextInfo"]), "DefaultDatabase": query["DefaultDatabase"]}
|
171
|
+
for query in queries.values()
|
172
|
+
]
|
@@ -0,0 +1,218 @@
|
|
1
|
+
from typing import Any, Dict, List, Optional
|
2
|
+
|
3
|
+
from quollio_core.helper.log_utils import error_handling_decorator, logger
|
4
|
+
from quollio_core.profilers.stats import gen_table_stats_payload
|
5
|
+
from quollio_core.repository import qdc
|
6
|
+
from quollio_core.repository import teradata as teradata_repo
|
7
|
+
|
8
|
+
NUMERIC_TYPES = ["D", "F", "I1", "I2", "I8", "I", "N"]
|
9
|
+
|
10
|
+
# I, I1, I2, I8 - INT TYPES INTEGER, BYTEINT, SMALLINT, BIGINT
|
11
|
+
# F - Float
|
12
|
+
# D - Decimal
|
13
|
+
# N - Number
|
14
|
+
|
15
|
+
|
16
|
+
def quote_identifier(identifier: str) -> str:
|
17
|
+
return f'"{identifier}"'
|
18
|
+
|
19
|
+
|
20
|
+
@error_handling_decorator
|
21
|
+
def load_stats(
|
22
|
+
conn_config: teradata_repo.TeradataConfig,
|
23
|
+
sample_percent: Optional[float] = None,
|
24
|
+
endpoint: Optional[str] = None,
|
25
|
+
tenant_id: Optional[str] = None,
|
26
|
+
qdc_client: Optional[qdc.QDCExternalAPIClient] = None,
|
27
|
+
target_databases: Optional[List[str]] = None,
|
28
|
+
target_databases_method: str = "DENYLIST",
|
29
|
+
stats_items: Optional[List[str]] = None,
|
30
|
+
) -> None:
|
31
|
+
stats_list = []
|
32
|
+
numerical_columns = 0
|
33
|
+
non_numerical_columns = 0
|
34
|
+
logger.info(
|
35
|
+
f"Starting statistics collection. " f"Sample percent: {sample_percent if sample_percent is not None else 'N/A'}"
|
36
|
+
)
|
37
|
+
|
38
|
+
with teradata_repo.new_teradata_client(conn_config) as conn:
|
39
|
+
try:
|
40
|
+
tables = teradata_repo.get_table_list(conn, target_databases, target_databases_method)
|
41
|
+
for table in tables:
|
42
|
+
logger.debug(f"Processing table: {table}")
|
43
|
+
database_name = table["DataBaseName"]
|
44
|
+
table_name = table["TableName"]
|
45
|
+
|
46
|
+
logger.info(f"Processing table {database_name}.{table_name}")
|
47
|
+
columns = teradata_repo.get_column_list(conn, database_name=database_name, table_name=table_name)
|
48
|
+
logger.debug(f"Columns: {columns}")
|
49
|
+
|
50
|
+
for column in columns:
|
51
|
+
column_name = column["ColumnName"]
|
52
|
+
column_type = column["ColumnType"]
|
53
|
+
if column_type is None:
|
54
|
+
column_type = ""
|
55
|
+
else:
|
56
|
+
column_type = column_type.strip()
|
57
|
+
|
58
|
+
is_numerical = column_type in NUMERIC_TYPES
|
59
|
+
if is_numerical:
|
60
|
+
numerical_columns += 1
|
61
|
+
else:
|
62
|
+
non_numerical_columns += 1
|
63
|
+
|
64
|
+
stats_sql = generate_column_statistics_sql(
|
65
|
+
database_name,
|
66
|
+
table_name,
|
67
|
+
column_name,
|
68
|
+
column_type,
|
69
|
+
sample_percent if is_numerical else None,
|
70
|
+
stats_items,
|
71
|
+
)
|
72
|
+
logger.debug(f"Generated SQL for column {column_name}: {stats_sql}")
|
73
|
+
|
74
|
+
try:
|
75
|
+
result = teradata_repo.execute_query(stats_sql, conn)
|
76
|
+
logger.debug(f"Query result for column {column_name}: {result}")
|
77
|
+
if result:
|
78
|
+
column_stats = parse_column_statistics_result(
|
79
|
+
result[0], database_name, table_name, column_name, stats_items, is_numerical
|
80
|
+
)
|
81
|
+
stats_list.append(column_stats)
|
82
|
+
except Exception as e:
|
83
|
+
logger.error(
|
84
|
+
f"Failed to collect statistics for {database_name}.{table_name}.{column_name}: {e}"
|
85
|
+
)
|
86
|
+
|
87
|
+
except Exception as e:
|
88
|
+
logger.error(f"Error during statistics collection: {e}")
|
89
|
+
|
90
|
+
logger.info("Statistics collection completed successfully.")
|
91
|
+
|
92
|
+
logger.debug(f"Stats list: {stats_list}")
|
93
|
+
payloads = gen_table_stats_payload(stats=stats_list, tenant_id=tenant_id, endpoint=endpoint)
|
94
|
+
logger.debug(f"Generated payloads: {payloads}")
|
95
|
+
|
96
|
+
req_count = 0
|
97
|
+
for payload in payloads:
|
98
|
+
logger.info(f"Generating table stats. asset: {payload.db} -> {payload.table} -> {payload.column}")
|
99
|
+
status_code = qdc_client.update_stats_by_id(
|
100
|
+
global_id=payload.global_id,
|
101
|
+
payload=payload.body.get_column_stats(),
|
102
|
+
)
|
103
|
+
if status_code == 200:
|
104
|
+
req_count += 1
|
105
|
+
|
106
|
+
logger.info(
|
107
|
+
f"Loading statistics is finished. {req_count} statistics are ingested. "
|
108
|
+
f"Numerical columns: {numerical_columns}, Non-numerical columns: {non_numerical_columns}"
|
109
|
+
)
|
110
|
+
|
111
|
+
|
112
|
+
@error_handling_decorator
|
113
|
+
def parse_column_statistics_result(
|
114
|
+
result: Dict[str, Any],
|
115
|
+
database_name: str,
|
116
|
+
table_name: str,
|
117
|
+
column_name: str,
|
118
|
+
stats_items: Optional[List[str]] = None,
|
119
|
+
is_numerical: bool = False,
|
120
|
+
) -> Dict[str, Any]:
|
121
|
+
stats_dict = {
|
122
|
+
"DB_NAME": database_name,
|
123
|
+
"SCHEMA_NAME": "",
|
124
|
+
"TABLE_NAME": table_name,
|
125
|
+
"COLUMN_NAME": column_name,
|
126
|
+
}
|
127
|
+
|
128
|
+
if stats_items:
|
129
|
+
for item in stats_items:
|
130
|
+
if item == "cardinality" and "num_uniques" in result:
|
131
|
+
stats_dict["CARDINALITY"] = result["num_uniques"]
|
132
|
+
elif item == "number_of_null" and "num_nulls" in result:
|
133
|
+
stats_dict["NULL_COUNT"] = result["num_nulls"] # Changed from NUM_NULLS to NULL_COUNT
|
134
|
+
|
135
|
+
if is_numerical:
|
136
|
+
if item == "min" and "min_value" in result:
|
137
|
+
stats_dict["MIN_VALUE"] = str(result["min_value"])
|
138
|
+
elif item == "max" and "max_value" in result:
|
139
|
+
stats_dict["MAX_VALUE"] = str(result["max_value"])
|
140
|
+
elif item == "median" and "median_value" in result:
|
141
|
+
stats_dict["MEDIAN_VALUE"] = str(result["median_value"])
|
142
|
+
elif item == "mean" and "avg_value" in result:
|
143
|
+
stats_dict["AVG_VALUE"] = str(result["avg_value"])
|
144
|
+
elif item == "stddev" and "stddev_value" in result:
|
145
|
+
stats_dict["STDDEV_VALUE"] = str(result["stddev_value"])
|
146
|
+
elif item == "mode" and "mode_value" in result and is_numerical:
|
147
|
+
stats_dict["MODE_VALUE"] = str(result["mode_value"])
|
148
|
+
|
149
|
+
return stats_dict
|
150
|
+
|
151
|
+
|
152
|
+
@error_handling_decorator
|
153
|
+
def generate_column_statistics_sql(
|
154
|
+
database_name: str,
|
155
|
+
table_name: str,
|
156
|
+
column_name: str,
|
157
|
+
column_type: str,
|
158
|
+
sample_percent: Optional[float] = None,
|
159
|
+
stats_items: Optional[List[str]] = None,
|
160
|
+
) -> str:
|
161
|
+
quoted_column = quote_identifier(column_name)
|
162
|
+
quoted_database = quote_identifier(database_name)
|
163
|
+
|
164
|
+
# Handle the case where table_name might include a database
|
165
|
+
if "." in table_name:
|
166
|
+
schema, table = table_name.split(".", 1)
|
167
|
+
quoted_table = f"{quote_identifier(schema)}.{quote_identifier(table)}"
|
168
|
+
else:
|
169
|
+
quoted_table = quote_identifier(table_name)
|
170
|
+
|
171
|
+
stats_clauses = []
|
172
|
+
mode_query = ""
|
173
|
+
|
174
|
+
if stats_items:
|
175
|
+
if "cardinality" in stats_items:
|
176
|
+
stats_clauses.append(f"COUNT(DISTINCT {quoted_column}) AS num_uniques")
|
177
|
+
if "number_of_null" in stats_items:
|
178
|
+
stats_clauses.append(f"SUM(CASE WHEN {quoted_column} IS NULL THEN 1 ELSE 0 END) AS num_nulls")
|
179
|
+
|
180
|
+
if column_type in NUMERIC_TYPES:
|
181
|
+
if "min" in stats_items:
|
182
|
+
stats_clauses.append(f"MIN(CAST({quoted_column} AS FLOAT)) AS min_value")
|
183
|
+
if "max" in stats_items:
|
184
|
+
stats_clauses.append(f"MAX(CAST({quoted_column} AS FLOAT)) AS max_value")
|
185
|
+
if "median" in stats_items:
|
186
|
+
stats_clauses.append(f"MEDIAN(CAST({quoted_column} AS FLOAT)) AS median_value")
|
187
|
+
if "mean" in stats_items:
|
188
|
+
stats_clauses.append(f"AVG(CAST({quoted_column} AS FLOAT)) AS avg_value")
|
189
|
+
if "stddev" in stats_items:
|
190
|
+
stats_clauses.append(f"STDDEV_SAMP(CAST({quoted_column} AS FLOAT)) AS stddev_value")
|
191
|
+
if "mode" in stats_items:
|
192
|
+
mode_query = (
|
193
|
+
f"WITH MODE_VALUE AS ("
|
194
|
+
f" SELECT {quoted_column}, COUNT(*) as freq "
|
195
|
+
f" FROM {quoted_database}.{quoted_table} "
|
196
|
+
)
|
197
|
+
|
198
|
+
if sample_percent is not None and 0 < sample_percent <= 99:
|
199
|
+
sample_fraction = sample_percent / 100
|
200
|
+
mode_query += f" SAMPLE {sample_fraction} "
|
201
|
+
|
202
|
+
mode_query += (
|
203
|
+
f" GROUP BY {quoted_column} " f" QUALIFY ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC) = 1" f") "
|
204
|
+
)
|
205
|
+
stats_clauses.append(f"(SELECT {quoted_column} FROM MODE_VALUE) AS mode_value")
|
206
|
+
|
207
|
+
if not stats_clauses:
|
208
|
+
logger.warning(f"No statistics selected for column {column_name}. Skipping this column.")
|
209
|
+
return ""
|
210
|
+
|
211
|
+
query = f"{mode_query}" f"SELECT {', '.join(stats_clauses)} " f"FROM {quoted_database}.{quoted_table}"
|
212
|
+
|
213
|
+
if sample_percent is not None and 0 < sample_percent <= 99:
|
214
|
+
sample_fraction = sample_percent / 100
|
215
|
+
query += f" SAMPLE {sample_fraction}"
|
216
|
+
|
217
|
+
logger.debug(f"Generated SQL query for {quoted_database}.{quoted_table}.{quoted_column}: {query}")
|
218
|
+
return query
|
quollio_core/repository/qdc.py
CHANGED
@@ -25,9 +25,6 @@ class QDCExternalAPIClient:
|
|
25
25
|
Tried to find a package for oauth0 client credentials flow,
|
26
26
|
but any of them contains bugs or lacks of features to handle the token refresh when it's expired
|
27
27
|
"""
|
28
|
-
is_domain_valid = is_valid_domain(domain=self.base_url)
|
29
|
-
if not is_domain_valid:
|
30
|
-
raise ValueError("The format of quollio API URL is invalid. The URL must end with `.com`")
|
31
28
|
|
32
29
|
url = f"{self.base_url}/oauth2/token"
|
33
30
|
creds = f"{self.client_id}:{self.client_secret}"
|
@@ -108,7 +105,3 @@ class QDCExternalAPIClient:
|
|
108
105
|
|
109
106
|
def initialize_qdc_client(api_url: str, client_id: str, client_secret: str) -> QDCExternalAPIClient:
|
110
107
|
return QDCExternalAPIClient(base_url=api_url, client_id=client_id, client_secret=client_secret)
|
111
|
-
|
112
|
-
|
113
|
-
def is_valid_domain(domain: str) -> bool:
|
114
|
-
return domain.endswith(".com")
|
@@ -0,0 +1,59 @@
|
|
1
|
+
import logging
|
2
|
+
import os
|
3
|
+
from typing import Tuple
|
4
|
+
|
5
|
+
import boto3
|
6
|
+
from botocore.exceptions import ClientError
|
7
|
+
|
8
|
+
logger = logging.getLogger(__name__)
|
9
|
+
|
10
|
+
|
11
|
+
def get_parameter_by_assume_role(key: str, region: str = "ap-northeast-1") -> Tuple[str, Exception]:
|
12
|
+
tenant_id = os.getenv("TENANT_ID")
|
13
|
+
if not _is_str_valid(tenant_id):
|
14
|
+
return ("", Exception("TENANT_ID is not set in get_parameter_by_assume_role."))
|
15
|
+
qdc_account_id = os.getenv("QDC_ACCOUNT_ID")
|
16
|
+
if not _is_valid_aws_account_id(qdc_account_id):
|
17
|
+
return ("", Exception("QDC_ACCOUNT_ID is not set in get_parameter_by_assume_role."))
|
18
|
+
qdc_region = os.getenv("QDC_REGION")
|
19
|
+
if not _is_str_valid(qdc_region):
|
20
|
+
return ("", Exception("QDC_REGION is not set in get_parameter_by_assume_role."))
|
21
|
+
|
22
|
+
sts_assume_role_arn = "arn:aws:iam::{account_id}:role/qdc-{tenant_id}-cross-account-access".format(
|
23
|
+
account_id=qdc_account_id, tenant_id=tenant_id
|
24
|
+
)
|
25
|
+
|
26
|
+
session = boto3.Session(region_name=region)
|
27
|
+
sts = session.client("sts", endpoint_url="https://sts.{region}.amazonaws.com".format(region=qdc_region))
|
28
|
+
assumed_role_object = sts.assume_role(
|
29
|
+
RoleArn=sts_assume_role_arn,
|
30
|
+
RoleSessionName="AssumeRoleSession",
|
31
|
+
)
|
32
|
+
credentials = assumed_role_object["Credentials"]
|
33
|
+
|
34
|
+
try:
|
35
|
+
ssm = session.client(
|
36
|
+
"ssm",
|
37
|
+
endpoint_url="https://ssm.{region}.amazonaws.com".format(region=qdc_region),
|
38
|
+
aws_access_key_id=credentials["AccessKeyId"],
|
39
|
+
aws_secret_access_key=credentials["SecretAccessKey"],
|
40
|
+
aws_session_token=credentials["SessionToken"],
|
41
|
+
)
|
42
|
+
res = ssm.get_parameter(Name=key, WithDecryption=True)
|
43
|
+
return (res["Parameter"]["Value"], None)
|
44
|
+
except ClientError as e:
|
45
|
+
logger.error(
|
46
|
+
"Failed to run ssm.get_parameter().\
|
47
|
+
Please check the value stored in parameter store is correct. error: {err}".format(
|
48
|
+
err=e
|
49
|
+
)
|
50
|
+
)
|
51
|
+
return ("", e)
|
52
|
+
|
53
|
+
|
54
|
+
def _is_valid_aws_account_id(s: str) -> bool:
|
55
|
+
return s is not None and len(s) == 12 and s.isdigit()
|
56
|
+
|
57
|
+
|
58
|
+
def _is_str_valid(s: str) -> bool:
|
59
|
+
return s is not None and s != ""
|
@@ -0,0 +1,103 @@
|
|
1
|
+
from dataclasses import dataclass, field
|
2
|
+
from typing import Any, Dict, List, Optional
|
3
|
+
|
4
|
+
import teradatasql
|
5
|
+
|
6
|
+
from quollio_core.helper.log_utils import error_handling_decorator, logger
|
7
|
+
|
8
|
+
|
9
|
+
@dataclass
|
10
|
+
class TeradataConfig:
|
11
|
+
host: str
|
12
|
+
port: int
|
13
|
+
username: str
|
14
|
+
password: str
|
15
|
+
database: str = "DBC"
|
16
|
+
encrypt_data: bool = True
|
17
|
+
additional_params: Dict[str, Any] = field(default_factory=dict)
|
18
|
+
|
19
|
+
@classmethod
|
20
|
+
def from_dict(
|
21
|
+
cls, credentials: Dict[str, str], host: str, port: str, additional_params: Dict[str, Any] = None
|
22
|
+
) -> "TeradataConfig":
|
23
|
+
return cls(
|
24
|
+
host=host,
|
25
|
+
port=int(port),
|
26
|
+
username=credentials["username"],
|
27
|
+
password=credentials["password"],
|
28
|
+
additional_params=additional_params or {},
|
29
|
+
)
|
30
|
+
|
31
|
+
def get_connection_params(self) -> Dict[str, Any]:
|
32
|
+
params = {
|
33
|
+
"host": self.host,
|
34
|
+
"user": self.username,
|
35
|
+
"password": self.password,
|
36
|
+
"database": self.database,
|
37
|
+
"dbs_port": self.port,
|
38
|
+
"encryptdata": str(self.encrypt_data).lower(),
|
39
|
+
}
|
40
|
+
params.update(self.additional_params)
|
41
|
+
return params
|
42
|
+
|
43
|
+
|
44
|
+
@error_handling_decorator
|
45
|
+
def new_teradata_client(config: TeradataConfig) -> teradatasql.connect:
|
46
|
+
conn = teradatasql.connect(**config.get_connection_params())
|
47
|
+
return conn
|
48
|
+
|
49
|
+
|
50
|
+
@error_handling_decorator
|
51
|
+
def get_table_list(
|
52
|
+
conn: teradatasql.connect, target_databases: Optional[List[str]] = None, target_databases_method: str = "DENYLIST"
|
53
|
+
) -> List[Dict[str, str]]:
|
54
|
+
if target_databases_method == "DENYLIST":
|
55
|
+
operator = "NOT"
|
56
|
+
else:
|
57
|
+
operator = ""
|
58
|
+
|
59
|
+
query_tables = f"""
|
60
|
+
SELECT DatabaseName, TableName
|
61
|
+
FROM DBC.TablesV
|
62
|
+
WHERE TableKind IN ('T', 'O', 'Q')
|
63
|
+
AND DatabaseName {operator} IN ({','.join("'" + db + "'" for db in target_databases)})
|
64
|
+
"""
|
65
|
+
logger.debug("Executing query to retrieve table names.")
|
66
|
+
tables = execute_query(query_tables, conn)
|
67
|
+
return tables
|
68
|
+
|
69
|
+
|
70
|
+
@error_handling_decorator
|
71
|
+
def get_column_list(conn: teradatasql.connect, database_name: str, table_name: str) -> List[Dict[str, str]]:
|
72
|
+
query_columns = f"""
|
73
|
+
SELECT ColumnName, ColumnType
|
74
|
+
FROM DBC.ColumnsV
|
75
|
+
WHERE DatabaseName = '{database_name}'
|
76
|
+
AND TableName = '{table_name}'
|
77
|
+
"""
|
78
|
+
logger.debug(f"Executing query to retrieve columns for {database_name}.{table_name}.")
|
79
|
+
columns = execute_query(query_columns, conn)
|
80
|
+
logger.debug(f"Retrieved columns: {columns}")
|
81
|
+
return columns
|
82
|
+
|
83
|
+
|
84
|
+
@error_handling_decorator
|
85
|
+
def execute_query(query: str, con: teradatasql.connect) -> List[Dict[str, Any]]:
|
86
|
+
try:
|
87
|
+
with con.cursor() as cur:
|
88
|
+
logger.debug(f"Executing SQL query: {query}")
|
89
|
+
cur.execute(query)
|
90
|
+
logger.debug(f"Column descriptions: {cur.description}")
|
91
|
+
columns = [desc[0] for desc in cur.description]
|
92
|
+
rows = [dict(zip(columns, row)) for row in cur.fetchall()]
|
93
|
+
logger.debug(f"Fetched {len(rows)} rows from Teradata.")
|
94
|
+
return rows
|
95
|
+
except teradatasql.OperationalError as e:
|
96
|
+
logger.error(f"Teradata Operational Error: {e}")
|
97
|
+
raise
|
98
|
+
except teradatasql.ProgrammingError as e:
|
99
|
+
logger.error(f"Teradata Programming Error: {e}")
|
100
|
+
raise
|
101
|
+
except Exception as e:
|
102
|
+
logger.error(f"Unexpected error fetching data from Teradata: {e}")
|
103
|
+
raise
|
quollio_core/snowflake.py
CHANGED
@@ -3,7 +3,7 @@ import logging
|
|
3
3
|
import os
|
4
4
|
import shutil
|
5
5
|
|
6
|
-
from quollio_core.helper.core import setup_dbt_profile
|
6
|
+
from quollio_core.helper.core import is_valid_domain, setup_dbt_profile
|
7
7
|
from quollio_core.helper.env_default import env_default
|
8
8
|
from quollio_core.helper.log import set_log_level
|
9
9
|
from quollio_core.profilers.snowflake import (
|
@@ -13,7 +13,7 @@ from quollio_core.profilers.snowflake import (
|
|
13
13
|
snowflake_table_to_table_lineage,
|
14
14
|
)
|
15
15
|
from quollio_core.profilers.stats import get_column_stats_items
|
16
|
-
from quollio_core.repository import dbt, qdc, snowflake
|
16
|
+
from quollio_core.repository import dbt, qdc, snowflake, ssm
|
17
17
|
|
18
18
|
logger = logging.getLogger(__name__)
|
19
19
|
|
@@ -298,6 +298,16 @@ if __name__ == "__main__":
|
|
298
298
|
required=False,
|
299
299
|
help="Whether to ingest column lineage into QDIC or not. Default value is False",
|
300
300
|
)
|
301
|
+
parser.add_argument(
|
302
|
+
"--external_api_access",
|
303
|
+
type=str,
|
304
|
+
choices=["PUBLIC", "VPC_ENDPOINT"],
|
305
|
+
action=env_default("EXTERNAL_API_ACCESS"),
|
306
|
+
default="PUBLIC",
|
307
|
+
required=False,
|
308
|
+
help="Access method to Quollio API. Default 'PUBLIC'. Choose 'VPC_ENDPOINT'\
|
309
|
+
if you use API Gateway VPC Endpoint, DefaultValue is set to PUBLIC.",
|
310
|
+
)
|
301
311
|
|
302
312
|
stats_items = get_column_stats_items()
|
303
313
|
parser.add_argument(
|
@@ -336,9 +346,20 @@ if __name__ == "__main__":
|
|
336
346
|
log_level=args.log_level,
|
337
347
|
dbt_macro_source=args.dbt_macro_source,
|
338
348
|
)
|
349
|
+
api_url = args.api_url
|
350
|
+
if args.external_api_access == "VPC_ENDPOINT":
|
351
|
+
api_url, err = ssm.get_parameter_by_assume_role(args.api_url)
|
352
|
+
if err is not None:
|
353
|
+
logger.error("Fail to ssm.get_parameter_by_assume_role. {err}".format(err=err))
|
354
|
+
raise Exception("Fail to ssm.get_parameter_by_assume_role. {err}".format(err=err))
|
355
|
+
|
356
|
+
is_domain_valid = is_valid_domain(domain=api_url, domain_type=args.external_api_access)
|
357
|
+
if not is_domain_valid:
|
358
|
+
raise ValueError("The format of quollio API URL is invalid. The URL must end with `.com`")
|
359
|
+
|
339
360
|
if "load_lineage" in args.commands:
|
340
361
|
qdc_client = qdc.QDCExternalAPIClient(
|
341
|
-
base_url=
|
362
|
+
base_url=api_url,
|
342
363
|
client_id=args.client_id,
|
343
364
|
client_secret=args.client_secret,
|
344
365
|
)
|
@@ -350,7 +371,7 @@ if __name__ == "__main__":
|
|
350
371
|
)
|
351
372
|
if "load_stats" in args.commands:
|
352
373
|
qdc_client = qdc.QDCExternalAPIClient(
|
353
|
-
base_url=
|
374
|
+
base_url=api_url,
|
354
375
|
client_id=args.client_id,
|
355
376
|
client_secret=args.client_secret,
|
356
377
|
)
|
@@ -362,7 +383,7 @@ if __name__ == "__main__":
|
|
362
383
|
)
|
363
384
|
if "load_sqllineage" in args.commands:
|
364
385
|
qdc_client = qdc.QDCExternalAPIClient(
|
365
|
-
base_url=
|
386
|
+
base_url=api_url,
|
366
387
|
client_id=args.client_id,
|
367
388
|
client_secret=args.client_secret,
|
368
389
|
)
|
quollio_core/teradata.py
ADDED
@@ -0,0 +1,254 @@
|
|
1
|
+
import argparse
|
2
|
+
import json
|
3
|
+
|
4
|
+
from quollio_core.helper.env_default import env_default
|
5
|
+
from quollio_core.helper.log_utils import configure_logging, error_handling_decorator, logger
|
6
|
+
from quollio_core.profilers.stats import get_column_stats_items
|
7
|
+
from quollio_core.profilers.teradata.lineage import load_lineage
|
8
|
+
from quollio_core.profilers.teradata.stats import load_stats
|
9
|
+
from quollio_core.repository import qdc
|
10
|
+
from quollio_core.repository import teradata as teradata_repo
|
11
|
+
|
12
|
+
DEFAULT_SYSTEM_DATABASES = [
|
13
|
+
"DBC",
|
14
|
+
"GLOBAL_FUNCTIONS",
|
15
|
+
"gs_tables_db",
|
16
|
+
"modelops",
|
17
|
+
"system",
|
18
|
+
"tapidb",
|
19
|
+
"TDaaS_BAR",
|
20
|
+
"TDaaS_DB",
|
21
|
+
"TDaaS_Maint",
|
22
|
+
"TDaaS_Monitor",
|
23
|
+
"TDaaS_Support",
|
24
|
+
"TDaaS_TDBCMgmt1",
|
25
|
+
"TDaaS_TDBCMgmt2",
|
26
|
+
"TDBCMgmt",
|
27
|
+
"Crashdumps",
|
28
|
+
"dbcmngr",
|
29
|
+
"DemoNow_Monitor",
|
30
|
+
"External_AP",
|
31
|
+
"LockLogShredder",
|
32
|
+
"mldb",
|
33
|
+
"SQLJ",
|
34
|
+
"SysAdmin",
|
35
|
+
"SYSBAR",
|
36
|
+
"SYSJDBC",
|
37
|
+
"SYSLIB",
|
38
|
+
"SYSSPATIAL",
|
39
|
+
"SystemFe",
|
40
|
+
"SYSUDTLIB",
|
41
|
+
"SYSUIF",
|
42
|
+
"Sys_Calendar",
|
43
|
+
"TDMaps",
|
44
|
+
"TDPUSER",
|
45
|
+
"TDQCD",
|
46
|
+
"TDStats",
|
47
|
+
"tdwm",
|
48
|
+
"TD_ANALYTICS_DB",
|
49
|
+
"TD_SERVER_DB",
|
50
|
+
"TD_SYSFNLIB",
|
51
|
+
"TD_SYSGPL",
|
52
|
+
"TD_SYSXML",
|
53
|
+
"val",
|
54
|
+
]
|
55
|
+
|
56
|
+
|
57
|
+
@error_handling_decorator
|
58
|
+
def main() -> None:
|
59
|
+
parser = argparse.ArgumentParser(
|
60
|
+
prog="Quollio Intelligence Agent for Teradata",
|
61
|
+
description="Load lineage and stats to Quollio from Teradata",
|
62
|
+
epilog="Copyright (c) 2024 Quollio Technologies, Inc.",
|
63
|
+
)
|
64
|
+
parser.add_argument(
|
65
|
+
"commands",
|
66
|
+
choices=["load_lineage", "load_stats"],
|
67
|
+
type=str,
|
68
|
+
nargs="+",
|
69
|
+
help="""
|
70
|
+
The command to execute.
|
71
|
+
'load_lineage': Load lineage data from Teradata to Quollio,
|
72
|
+
'load_stats': Load stats from Teradata to Quollio
|
73
|
+
""",
|
74
|
+
)
|
75
|
+
parser.add_argument(
|
76
|
+
"--log_level",
|
77
|
+
type=str,
|
78
|
+
choices=["debug", "info", "warn", "error", "none"],
|
79
|
+
action=env_default("LOG_LEVEL"),
|
80
|
+
default="info",
|
81
|
+
required=False,
|
82
|
+
help="The log level for commands. Default value is info",
|
83
|
+
)
|
84
|
+
parser.add_argument(
|
85
|
+
"--tenant_id",
|
86
|
+
type=str,
|
87
|
+
action=env_default("TENANT_ID"),
|
88
|
+
required=False,
|
89
|
+
help="The tenant id (company id) where the lineage and stats are loaded",
|
90
|
+
)
|
91
|
+
parser.add_argument(
|
92
|
+
"--teradata_host",
|
93
|
+
type=str,
|
94
|
+
action=env_default("TERADATA_HOST"),
|
95
|
+
required=True,
|
96
|
+
help="Teradata host",
|
97
|
+
)
|
98
|
+
parser.add_argument(
|
99
|
+
"--teradata_port",
|
100
|
+
type=str,
|
101
|
+
action=env_default("TERADATA_PORT"),
|
102
|
+
required=True,
|
103
|
+
help="Teradata port",
|
104
|
+
)
|
105
|
+
parser.add_argument(
|
106
|
+
"--teradata_user",
|
107
|
+
type=str,
|
108
|
+
action=env_default("TERADATA_USER_NAME"),
|
109
|
+
required=True,
|
110
|
+
help="Teradata username",
|
111
|
+
)
|
112
|
+
parser.add_argument(
|
113
|
+
"--teradata_password",
|
114
|
+
type=str,
|
115
|
+
action=env_default("TERADATA_PASSWORD"),
|
116
|
+
required=True,
|
117
|
+
help="Teradata password",
|
118
|
+
)
|
119
|
+
parser.add_argument(
|
120
|
+
"--teradata_connection_parameters",
|
121
|
+
type=str,
|
122
|
+
action=env_default("TERADATA_CONNECTION_PARAMETERS"),
|
123
|
+
default="{}",
|
124
|
+
help="Additional Teradata connection parameters as a JSON string",
|
125
|
+
)
|
126
|
+
parser.add_argument(
|
127
|
+
"--api_url",
|
128
|
+
type=str,
|
129
|
+
action=env_default("QDC_API_URL"),
|
130
|
+
required=False,
|
131
|
+
help="The base URL of Quollio External API",
|
132
|
+
)
|
133
|
+
parser.add_argument(
|
134
|
+
"--client_id",
|
135
|
+
type=str,
|
136
|
+
action=env_default("QDC_CLIENT_ID"),
|
137
|
+
required=False,
|
138
|
+
help="The client id that is created on Quollio console to let clients access Quollio External API",
|
139
|
+
)
|
140
|
+
parser.add_argument(
|
141
|
+
"--client_secret",
|
142
|
+
type=str,
|
143
|
+
action=env_default("QDC_CLIENT_SECRET"),
|
144
|
+
required=False,
|
145
|
+
help="The client secret that is created on Quollio console to let clients access Quollio External API",
|
146
|
+
)
|
147
|
+
parser.add_argument(
|
148
|
+
"--sample_percent",
|
149
|
+
type=float,
|
150
|
+
action=env_default("SAMPLE_PERCENT"),
|
151
|
+
default=1,
|
152
|
+
required=False,
|
153
|
+
help="Percentage of data to sample when collecting statistics (e.g., 10 for 10%). Default is 1%.",
|
154
|
+
)
|
155
|
+
parser.add_argument(
|
156
|
+
"--teradata_target_databases",
|
157
|
+
type=str,
|
158
|
+
action=env_default("TERADATA_TARGET_DATABASES"),
|
159
|
+
required=False,
|
160
|
+
default=None,
|
161
|
+
help="Comma-separated list of Teradata target databases. If not provided,\
|
162
|
+
DEFAULT_SYSTEM_DATABASES will be used.",
|
163
|
+
)
|
164
|
+
parser.add_argument(
|
165
|
+
"--teradata_target_databases_method",
|
166
|
+
type=str,
|
167
|
+
choices=["ALLOWLIST", "DENYLIST"],
|
168
|
+
action=env_default("TERADATA_TARGET_DATABASE_METHOD"),
|
169
|
+
default="DENYLIST",
|
170
|
+
help="Method to use for teradata_target_databases (allowlist or denylist)",
|
171
|
+
)
|
172
|
+
parser.add_argument(
|
173
|
+
"--teradata_page_size",
|
174
|
+
type=int,
|
175
|
+
action=env_default("TERADATA_PAGE_SIZE"),
|
176
|
+
default=1000,
|
177
|
+
required=False,
|
178
|
+
help="Page size for Teradata queries. Default is 1000.",
|
179
|
+
)
|
180
|
+
parser.add_argument(
|
181
|
+
"--target_stats_items",
|
182
|
+
type=str,
|
183
|
+
nargs="*",
|
184
|
+
choices=get_column_stats_items(),
|
185
|
+
default=get_column_stats_items(),
|
186
|
+
action=env_default("TERADATA_STATS_ITEMS"),
|
187
|
+
required=False,
|
188
|
+
help="The items for statistic values.\
|
189
|
+
You can choose the items to be aggregated for stats.\
|
190
|
+
Default is full stats.",
|
191
|
+
)
|
192
|
+
|
193
|
+
args = parser.parse_args()
|
194
|
+
|
195
|
+
configure_logging(args.log_level)
|
196
|
+
|
197
|
+
logger.info("Starting Quollio Intelligence Agent for Teradata")
|
198
|
+
|
199
|
+
credentials = {
|
200
|
+
"username": args.teradata_user,
|
201
|
+
"password": args.teradata_password,
|
202
|
+
}
|
203
|
+
|
204
|
+
# Parse additional connection parameters
|
205
|
+
try:
|
206
|
+
additional_params = json.loads(args.teradata_connection_parameters)
|
207
|
+
except json.JSONDecodeError:
|
208
|
+
logger.warning("Invalid JSON in TERADATA_CONNECTION_PARAMETERS. Using empty dict.")
|
209
|
+
additional_params = {}
|
210
|
+
|
211
|
+
logger.info("Initializing QDC client")
|
212
|
+
qdc_client = qdc.initialize_qdc_client(args.api_url, args.client_id, args.client_secret)
|
213
|
+
|
214
|
+
logger.info("Initializing Teradata client")
|
215
|
+
config = teradata_repo.TeradataConfig.from_dict(
|
216
|
+
credentials, args.teradata_host, args.teradata_port, additional_params
|
217
|
+
)
|
218
|
+
|
219
|
+
if "load_lineage" in args.commands:
|
220
|
+
logger.info("Starting lineage loading process")
|
221
|
+
load_lineage(
|
222
|
+
conn_config=config,
|
223
|
+
tenant_id=args.tenant_id,
|
224
|
+
endpoint=args.teradata_host,
|
225
|
+
qdc_client=qdc_client,
|
226
|
+
page_size=args.teradata_page_size,
|
227
|
+
)
|
228
|
+
logger.info("Lineage loading process completed")
|
229
|
+
|
230
|
+
if "load_stats" in args.commands:
|
231
|
+
logger.info("Starting statistics loading process")
|
232
|
+
logger.info(f"Selected stats items: {args.target_stats_items}")
|
233
|
+
target_databases = (
|
234
|
+
DEFAULT_SYSTEM_DATABASES
|
235
|
+
if args.teradata_target_databases is None
|
236
|
+
else args.teradata_target_databases.split(",")
|
237
|
+
)
|
238
|
+
load_stats(
|
239
|
+
conn_config=config,
|
240
|
+
sample_percent=args.sample_percent,
|
241
|
+
tenant_id=args.tenant_id,
|
242
|
+
endpoint=args.teradata_host,
|
243
|
+
qdc_client=qdc_client,
|
244
|
+
target_databases=target_databases,
|
245
|
+
target_databases_method=args.teradata_target_databases_method.upper(),
|
246
|
+
stats_items=args.target_stats_items,
|
247
|
+
)
|
248
|
+
logger.info("Statistics loading process completed")
|
249
|
+
|
250
|
+
logger.info("Quollio Intelligence Agent for Teradata completed successfully")
|
251
|
+
|
252
|
+
|
253
|
+
if __name__ == "__main__":
|
254
|
+
main()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: quollio-core
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.5.0
|
4
4
|
Summary: Quollio Core
|
5
5
|
Author-email: quollio-dev <qt.dev@quollio.com>
|
6
6
|
Maintainer-email: RyoAriyama <ryo.arym@gmail.com>, tharuta <35373297+TakumiHaruta@users.noreply.github.com>
|
@@ -37,6 +37,7 @@ Requires-Dist: google-cloud-bigquery==3.22.0
|
|
37
37
|
Requires-Dist: google-cloud-datacatalog==3.19.0
|
38
38
|
Requires-Dist: google-cloud-datacatalog-lineage==0.3.6
|
39
39
|
Requires-Dist: google-api-python-client==2.131.0
|
40
|
+
Requires-Dist: teradatasql==20.0.0.15
|
40
41
|
Requires-Dist: black>=22.3.0 ; extra == "test"
|
41
42
|
Requires-Dist: coverage>=7.3.2 ; extra == "test"
|
42
43
|
Requires-Dist: isort>=5.10.1 ; extra == "test"
|
@@ -1,8 +1,9 @@
|
|
1
|
-
quollio_core/__init__.py,sha256=
|
1
|
+
quollio_core/__init__.py,sha256=JDD0AXVIwawcOMm_hOs62bzrTO4xouhl7kPLgO7yn_c,83
|
2
2
|
quollio_core/bigquery.py,sha256=6Oq4DVGpa3X21Es_nbrsb8pK3vaxwb9Egnvq3huo95k,5894
|
3
3
|
quollio_core/bricks.py,sha256=8h3kbI2b6lGH2s-56jE_Q5-R5-nIsQYMfvtRrkFOzoU,10784
|
4
4
|
quollio_core/redshift.py,sha256=KcdljY95xYf9JYrsaMOBoP_XxQQ8wFVE5ue_XEMVSFc,11504
|
5
|
-
quollio_core/snowflake.py,sha256=
|
5
|
+
quollio_core/snowflake.py,sha256=3dHVys5c94s2pkEXkb2cWBpkxwkLQ23QENCoU-pfhuo,13202
|
6
|
+
quollio_core/teradata.py,sha256=muOCg40SPMs9Ro6f1h8AjzLhGPNXfX1n8IvTtqnQDg0,7739
|
6
7
|
quollio_core/dbt_projects/databricks/.gitignore,sha256=1jJAyXSzJ3YUm0nx3i7wUSE4RjQMX3ad6F8O88UbtzI,29
|
7
8
|
quollio_core/dbt_projects/databricks/README.md,sha256=ZpRQyhFAODAiS8dc1Kb_ndkul4cu4o4udN_EMa49CU4,440
|
8
9
|
quollio_core/dbt_projects/databricks/dbt_project.yml,sha256=3sH98RNk7TnphvI3yEdXDstb92kW5BNxr-cT0tXhwzk,480
|
@@ -55,14 +56,14 @@ quollio_core/dbt_projects/snowflake/models/quollio_sqllineage_sources.sql,sha256
|
|
55
56
|
quollio_core/dbt_projects/snowflake/models/quollio_sqllineage_sources.yml,sha256=qgazupx3ca4P8R0loY5F9hyCz2fmAcWqZ6iOySo_NoY,377
|
56
57
|
quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.sql,sha256=BzvP9gKMFItmwqEQ4bDgtS-Invxhhe6L73Qe1ucxfHo,284
|
57
58
|
quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.yml,sha256=V_BESPk6IqE52ExT26-78As9l9AlWW86-Geb5PIhThU,67
|
58
|
-
quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.sql,sha256=
|
59
|
+
quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.sql,sha256=1RFE5VP1XbLMBlQAShJSeqMNDg5GFmwLXab-q2mnGcQ,2260
|
59
60
|
quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.yml,sha256=W39VAmFnnX6RBoW7B_4CConC1lm0Jm9o50Jsz9bYZzY,538
|
60
61
|
quollio_core/dbt_projects/snowflake/models/sources.yml,sha256=vGSV33cNj4UUyPUcYS-JFgc3r8KvSLfiA7qhbDCUU9s,10975
|
61
62
|
quollio_core/dbt_projects/snowflake/profiles/profiles_template.yml,sha256=gcZsgdGP461QuUM9jLbBKdadT8cHTXgNarq_azOOMhk,379
|
62
63
|
quollio_core/dbt_projects/snowflake/seeds/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
63
64
|
quollio_core/dbt_projects/snowflake/snapshots/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
64
65
|
quollio_core/helper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
65
|
-
quollio_core/helper/core.py,sha256=
|
66
|
+
quollio_core/helper/core.py,sha256=ddV5VLa49Je11eHUjlRN5m3nhtqPMMUSeG4135HuZC8,1390
|
66
67
|
quollio_core/helper/env_default.py,sha256=H6gbSGUPrEDZr4YDrL49hbOpw6RntI4U82kX1q6vUnI,2148
|
67
68
|
quollio_core/helper/log.py,sha256=flxyZZ44G79l1TaUp3OT58uCHcnE5z_pCduwoeI6IUs,645
|
68
69
|
quollio_core/helper/log_utils.py,sha256=QontLKETHjSAbQniJ7YqS0RY2AYvFHSjrlPiGr317LE,1391
|
@@ -72,16 +73,20 @@ quollio_core/profilers/databricks.py,sha256=ik4RiR_GOeU3S7s6C6Y9SGe1D_Y_f98BDWJV
|
|
72
73
|
quollio_core/profilers/lineage.py,sha256=4FyxIuPBrUFihqZryqTQBcfB0Z7634lKl_WwkD82vzE,6865
|
73
74
|
quollio_core/profilers/redshift.py,sha256=p6ONDCkhndZAOcKAwEyQ5fsi-jsQrlwHHb7LTI_m1uk,6473
|
74
75
|
quollio_core/profilers/snowflake.py,sha256=m9Ivv2LRwnrmgKS36a039AhrO27sR1EaOOdqNF26PhI,11156
|
75
|
-
quollio_core/profilers/sqllineage.py,sha256=
|
76
|
+
quollio_core/profilers/sqllineage.py,sha256=h0FT6CYb0A20zSc68GELZ7Q8bDbaHLQnZQHsXBEXBug,5261
|
76
77
|
quollio_core/profilers/stats.py,sha256=OLQrdrh0y64jo9rmzvGlDdxy_c7gMz_GnlXPJzWkBjM,7343
|
78
|
+
quollio_core/profilers/teradata/lineage.py,sha256=M1FOrFLZrgJwIp-qEeARugoP3W6YxuXkRHoDTPcj-wA,7169
|
79
|
+
quollio_core/profilers/teradata/stats.py,sha256=oIC0pp0vJeAx5VWzgVYPrI7sCbCka86ctF2ksczCOuU,9022
|
77
80
|
quollio_core/repository/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
78
81
|
quollio_core/repository/bigquery.py,sha256=3AyGcJNYGnUyMweyc6lGm4quwrOzd-ZBS2zNnFwafII,3990
|
79
82
|
quollio_core/repository/databricks.py,sha256=9Cgdv8qBnVaHqu3RA-IUBieAqb69moQ-KAAMVSf5Ds4,1877
|
80
83
|
quollio_core/repository/dbt.py,sha256=cnLwJPywLi8VowVW7zfIBa9jxVwDWO7xzzNRn1vWiuw,659
|
81
|
-
quollio_core/repository/qdc.py,sha256=
|
84
|
+
quollio_core/repository/qdc.py,sha256=IPGiYafnJYkuD7_kLapVM98-9ZhEwq5S-dGY2bO8fVk,4624
|
82
85
|
quollio_core/repository/redshift.py,sha256=p2ouEuYcDCjx1oBhc6H1ekQsvEqHGd3bFu3PW0ngYBc,2880
|
83
86
|
quollio_core/repository/snowflake.py,sha256=zL9-xi98AIftdW9MuKI-M3pZ1kQuuH-UiZH8HcJvmk4,1769
|
84
|
-
quollio_core
|
85
|
-
quollio_core
|
86
|
-
quollio_core-0.
|
87
|
-
quollio_core-0.
|
87
|
+
quollio_core/repository/ssm.py,sha256=xpm1FzbBnIsBptuYPUNnPgkKU2AH3XxI-ZL0bEetvW0,2182
|
88
|
+
quollio_core/repository/teradata.py,sha256=DkoutYaRspPdwCvDGKxpWX0dU0d12S2sUrzvZJpY-3Q,3420
|
89
|
+
quollio_core-0.5.0.dist-info/LICENSE,sha256=V8j_M8nAz8PvAOZQocyRDX7keai8UJ9skgmnwqETmdY,34520
|
90
|
+
quollio_core-0.5.0.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
91
|
+
quollio_core-0.5.0.dist-info/METADATA,sha256=erQi_pK7TboXRC-qBE59sqsrqAINKOlFEGDx0SF1JjA,6924
|
92
|
+
quollio_core-0.5.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|