quollio-core 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quollio_core/__init__.py +1 -1
- quollio_core/dbt_projects/snowflake/models/quollio_lineage_column_level.sql +17 -0
- quollio_core/dbt_projects/snowflake/models/quollio_lineage_table_level.sql +17 -0
- quollio_core/dbt_projects/snowflake/models/quollio_sqllineage_sources.sql +18 -0
- quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.sql +18 -2
- quollio_core/dbt_projects/snowflake/profiles/profiles_template.yml +6 -1
- quollio_core/helper/core.py +1 -0
- quollio_core/models/avroasset.py +23 -0
- quollio_core/models/qdc.py +36 -0
- quollio_core/profilers/lineage.py +96 -0
- quollio_core/profilers/qdc.py +27 -0
- quollio_core/profilers/snowflake.py +113 -60
- quollio_core/profilers/stats.py +36 -0
- quollio_core/profilers/teradata/lineage.py +7 -3
- quollio_core/profilers/teradata/stats.py +9 -3
- quollio_core/repository/qdc.py +91 -0
- quollio_core/repository/snowflake.py +62 -13
- quollio_core/repository/teradata.py +19 -5
- quollio_core/snowflake.py +100 -18
- quollio_core/teradata.py +15 -1
- {quollio_core-0.5.0.dist-info → quollio_core-0.6.0.dist-info}/METADATA +5 -2
- {quollio_core-0.5.0.dist-info → quollio_core-0.6.0.dist-info}/RECORD +24 -21
- {quollio_core-0.5.0.dist-info → quollio_core-0.6.0.dist-info}/WHEEL +1 -1
- {quollio_core-0.5.0.dist-info → quollio_core-0.6.0.dist-info/licenses}/LICENSE +0 -0
@@ -27,6 +27,7 @@ def load_stats(
|
|
27
27
|
target_databases: Optional[List[str]] = None,
|
28
28
|
target_databases_method: str = "DENYLIST",
|
29
29
|
stats_items: Optional[List[str]] = None,
|
30
|
+
system_database: Optional[str] = None,
|
30
31
|
) -> None:
|
31
32
|
stats_list = []
|
32
33
|
numerical_columns = 0
|
@@ -35,16 +36,21 @@ def load_stats(
|
|
35
36
|
f"Starting statistics collection. " f"Sample percent: {sample_percent if sample_percent is not None else 'N/A'}"
|
36
37
|
)
|
37
38
|
|
39
|
+
# Use system_database from config if not provided
|
40
|
+
system_database = system_database or conn_config.system_database
|
41
|
+
|
38
42
|
with teradata_repo.new_teradata_client(conn_config) as conn:
|
39
43
|
try:
|
40
|
-
tables = teradata_repo.get_table_list(conn, target_databases, target_databases_method)
|
44
|
+
tables = teradata_repo.get_table_list(conn, target_databases, target_databases_method, system_database)
|
41
45
|
for table in tables:
|
42
46
|
logger.debug(f"Processing table: {table}")
|
43
|
-
database_name = table["
|
47
|
+
database_name = table["DatabaseName"]
|
44
48
|
table_name = table["TableName"]
|
45
49
|
|
46
50
|
logger.info(f"Processing table {database_name}.{table_name}")
|
47
|
-
columns = teradata_repo.get_column_list(
|
51
|
+
columns = teradata_repo.get_column_list(
|
52
|
+
conn, database_name=database_name, table_name=table_name, system_database=system_database
|
53
|
+
)
|
48
54
|
logger.debug(f"Columns: {columns}")
|
49
55
|
|
50
56
|
for column in columns:
|
quollio_core/repository/qdc.py
CHANGED
@@ -8,6 +8,8 @@ import jwt
|
|
8
8
|
import requests # type: ignore
|
9
9
|
from requests.exceptions import ConnectionError, HTTPError, RequestException, Timeout
|
10
10
|
|
11
|
+
from quollio_core.models.qdc import DataSourceMetadataResponseBody, GetImportURLRequest, GetImportURLResponse
|
12
|
+
|
11
13
|
logger = logging.getLogger(__name__)
|
12
14
|
|
13
15
|
|
@@ -64,6 +66,95 @@ class QDCExternalAPIClient:
|
|
64
66
|
session.mount("https://", requests.adapters.HTTPAdapter(max_retries=retry))
|
65
67
|
return session
|
66
68
|
|
69
|
+
def get_export_url(self, datasource_id: str) -> GetImportURLResponse:
|
70
|
+
self._refresh_token_if_expired()
|
71
|
+
headers = {"content-type": "application/json", "authorization": f"Bearer {self.auth_token}"}
|
72
|
+
endpoint = "{base_url}/v2/sources/{dsrc_id}/export-avro".format(base_url=self.base_url, dsrc_id=datasource_id)
|
73
|
+
try:
|
74
|
+
res = self.session.post(endpoint, headers=headers, data={})
|
75
|
+
res.raise_for_status()
|
76
|
+
except ConnectionError as ce:
|
77
|
+
logger.error(f"Connection Error: {ce} global_id: {datasource_id}.")
|
78
|
+
except HTTPError as he:
|
79
|
+
logger.error(f"HTTP Error: {he} global_id: {datasource_id}.")
|
80
|
+
except Timeout as te:
|
81
|
+
logger.error(f"Timeout Error: {te} global_id: {datasource_id}.")
|
82
|
+
except RequestException as re:
|
83
|
+
logger.error(f"RequestException Error: {re} global_id: {datasource_id}.")
|
84
|
+
else:
|
85
|
+
res = json.loads(res.text)
|
86
|
+
location = res.get("data").get("location")
|
87
|
+
return location
|
88
|
+
|
89
|
+
def download_file(self, url: str) -> requests.Response:
|
90
|
+
self._refresh_token_if_expired()
|
91
|
+
|
92
|
+
try:
|
93
|
+
res = self.session.get(url)
|
94
|
+
res.raise_for_status()
|
95
|
+
except ConnectionError as ce:
|
96
|
+
logger.error(f"Connection Error: {ce}.")
|
97
|
+
except HTTPError as he:
|
98
|
+
logger.error(f"HTTP Error: {he}.")
|
99
|
+
except Timeout as te:
|
100
|
+
logger.error(f"Timeout Error: {te}")
|
101
|
+
except RequestException as re:
|
102
|
+
logger.error(f"RequestException Error: {re}")
|
103
|
+
else:
|
104
|
+
return res
|
105
|
+
|
106
|
+
def get_import_url(self, datasource_id: str, payload: GetImportURLRequest) -> GetImportURLResponse:
|
107
|
+
self._refresh_token_if_expired()
|
108
|
+
headers = {"content-type": "application/json", "authorization": f"Bearer {self.auth_token}"}
|
109
|
+
endpoint = "{base_url}/v2/sources/{dsrc_id}/import".format(base_url=self.base_url, dsrc_id=datasource_id)
|
110
|
+
try:
|
111
|
+
payload_dict = payload.as_dict()
|
112
|
+
res = self.session.post(endpoint, headers=headers, json=payload_dict)
|
113
|
+
logger.debug(f"Got the result of import_url request: {res.text}")
|
114
|
+
res.raise_for_status()
|
115
|
+
except ConnectionError as ce:
|
116
|
+
logger.error(f"Connection Error: {ce} global_id: {datasource_id}.")
|
117
|
+
except HTTPError as he:
|
118
|
+
logger.error(f"HTTP Error: {he} global_id: {datasource_id}.")
|
119
|
+
except Timeout as te:
|
120
|
+
logger.error(f"Timeout Error: {te} global_id: {datasource_id}.")
|
121
|
+
except RequestException as re:
|
122
|
+
logger.error(f"RequestException Error: {re} global_id: {datasource_id}.")
|
123
|
+
else:
|
124
|
+
res = json.loads(res.text)
|
125
|
+
datasource_metadata_response = DataSourceMetadataResponseBody(**res.get("data").get("metadata"))
|
126
|
+
location = res.get("data").get("location")
|
127
|
+
response = GetImportURLResponse(
|
128
|
+
location=location, datasource_metadata_response_body=datasource_metadata_response
|
129
|
+
)
|
130
|
+
return response
|
131
|
+
|
132
|
+
def upload_file(self, url: str, metadata: DataSourceMetadataResponseBody, buffer: bytes):
|
133
|
+
self._refresh_token_if_expired()
|
134
|
+
headers = {
|
135
|
+
"Content-Type": "application/octet-stream",
|
136
|
+
"x-amz-meta-user_id": metadata.user_id,
|
137
|
+
"x-amz-meta-job_key": metadata.job_key,
|
138
|
+
"x-amz-meta-service_name": metadata.service_name,
|
139
|
+
"x-amz-meta-source_name": metadata.source_name,
|
140
|
+
"x-amz-meta-source_type": metadata.source_type,
|
141
|
+
"x-amz-meta-override_logical_name": metadata.override_logical_name,
|
142
|
+
"Content-Length": str(len(buffer)),
|
143
|
+
}
|
144
|
+
try:
|
145
|
+
res = self.session.put(url, headers=headers, data=buffer)
|
146
|
+
res.raise_for_status()
|
147
|
+
except ConnectionError as ce:
|
148
|
+
logger.error(f"Connection Error: {ce}.")
|
149
|
+
except HTTPError as he:
|
150
|
+
logger.error(f"HTTP Error: {he}.")
|
151
|
+
except Timeout as te:
|
152
|
+
logger.error(f"Timeout Error: {te}")
|
153
|
+
except RequestException as re:
|
154
|
+
logger.error(f"RequestException Error: {re}")
|
155
|
+
else:
|
156
|
+
return res.status_code
|
157
|
+
|
67
158
|
def update_stats_by_id(self, global_id: str, payload: Dict[str, List[str]]) -> int:
|
68
159
|
self._refresh_token_if_expired()
|
69
160
|
headers = {"content-type": "application/json", "authorization": f"Bearer {self.auth_token}"}
|
@@ -1,7 +1,9 @@
|
|
1
1
|
import logging
|
2
|
-
from dataclasses import
|
2
|
+
from dataclasses import dataclass
|
3
3
|
from typing import Dict, List, Tuple
|
4
4
|
|
5
|
+
from cryptography.hazmat.backends import default_backend
|
6
|
+
from cryptography.hazmat.primitives import serialization
|
5
7
|
from snowflake.connector import DictCursor, connect, errors
|
6
8
|
from snowflake.connector.connection import SnowflakeConnection
|
7
9
|
|
@@ -12,16 +14,67 @@ logger = logging.getLogger(__name__)
|
|
12
14
|
class SnowflakeConnectionConfig:
|
13
15
|
account_id: str
|
14
16
|
account_user: str
|
15
|
-
account_password: str
|
16
17
|
account_build_role: str
|
17
18
|
account_query_role: str
|
18
19
|
account_warehouse: str
|
19
20
|
account_database: str
|
20
21
|
account_schema: str
|
22
|
+
account_password: str = None
|
23
|
+
private_key: str = None
|
21
24
|
threads: int = 3
|
22
25
|
|
23
26
|
def as_dict(self) -> Dict[str, str]:
|
24
|
-
|
27
|
+
"""Convert config to dictionary, handling both auth methods for DBT."""
|
28
|
+
base_params = {
|
29
|
+
"account_id": self.account_id,
|
30
|
+
"account_user": self.account_user,
|
31
|
+
"account_build_role": self.account_build_role,
|
32
|
+
"account_query_role": self.account_query_role,
|
33
|
+
"account_warehouse": self.account_warehouse,
|
34
|
+
"account_database": self.account_database,
|
35
|
+
"account_schema": self.account_schema,
|
36
|
+
"threads": self.threads,
|
37
|
+
}
|
38
|
+
|
39
|
+
# Add auth parameters based on method
|
40
|
+
if self.private_key:
|
41
|
+
# Keep private key as is, template will handle formatting
|
42
|
+
base_params["private_key"] = self.private_key
|
43
|
+
elif self.account_password:
|
44
|
+
base_params["account_password"] = self.account_password
|
45
|
+
|
46
|
+
return {k: v for k, v in base_params.items() if v is not None}
|
47
|
+
|
48
|
+
def get_connection_params(self) -> Dict[str, str]:
|
49
|
+
"""Get the appropriate connection parameters based on authentication method."""
|
50
|
+
params = {
|
51
|
+
"user": self.account_user,
|
52
|
+
"account": self.account_id,
|
53
|
+
"warehouse": self.account_warehouse,
|
54
|
+
"database": self.account_database,
|
55
|
+
"schema": self.account_schema,
|
56
|
+
"role": self.account_query_role,
|
57
|
+
}
|
58
|
+
|
59
|
+
# Add authentication parameters based on method
|
60
|
+
if self.private_key:
|
61
|
+
try:
|
62
|
+
# Parse private key content into RSA key object
|
63
|
+
pkey = serialization.load_pem_private_key(
|
64
|
+
self.private_key.encode("utf-8"),
|
65
|
+
password=None,
|
66
|
+
backend=default_backend(),
|
67
|
+
)
|
68
|
+
params["private_key"] = pkey
|
69
|
+
except Exception as e:
|
70
|
+
logger.error(f"Failed to parse private key: {str(e)}")
|
71
|
+
raise
|
72
|
+
elif self.account_password:
|
73
|
+
params["password"] = self.account_password
|
74
|
+
else:
|
75
|
+
raise ValueError("Either password or private key authentication must be configured")
|
76
|
+
|
77
|
+
return params
|
25
78
|
|
26
79
|
|
27
80
|
class SnowflakeQueryExecutor:
|
@@ -35,16 +88,12 @@ class SnowflakeQueryExecutor:
|
|
35
88
|
self.conn.close()
|
36
89
|
|
37
90
|
def __initialize(self, config: SnowflakeConnectionConfig) -> SnowflakeConnection:
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
database=config.account_database,
|
45
|
-
schema=config.account_schema,
|
46
|
-
)
|
47
|
-
return conn
|
91
|
+
try:
|
92
|
+
conn: SnowflakeConnection = connect(**config.get_connection_params())
|
93
|
+
return conn
|
94
|
+
except Exception as e:
|
95
|
+
logger.error(f"Failed to initialize Snowflake connection: {str(e)}")
|
96
|
+
raise
|
48
97
|
|
49
98
|
def get_query_results(self, query: str) -> Tuple[List[Dict[str, str]], Exception]:
|
50
99
|
with self.conn.cursor(DictCursor) as cur:
|
@@ -13,18 +13,25 @@ class TeradataConfig:
|
|
13
13
|
username: str
|
14
14
|
password: str
|
15
15
|
database: str = "DBC"
|
16
|
+
system_database: str = "DBC"
|
16
17
|
encrypt_data: bool = True
|
17
18
|
additional_params: Dict[str, Any] = field(default_factory=dict)
|
18
19
|
|
19
20
|
@classmethod
|
20
21
|
def from_dict(
|
21
|
-
cls,
|
22
|
+
cls,
|
23
|
+
credentials: Dict[str, str],
|
24
|
+
host: str,
|
25
|
+
port: str,
|
26
|
+
additional_params: Dict[str, Any] = None,
|
27
|
+
system_database: str = "DBC",
|
22
28
|
) -> "TeradataConfig":
|
23
29
|
return cls(
|
24
30
|
host=host,
|
25
31
|
port=int(port),
|
26
32
|
username=credentials["username"],
|
27
33
|
password=credentials["password"],
|
34
|
+
system_database=system_database,
|
28
35
|
additional_params=additional_params or {},
|
29
36
|
)
|
30
37
|
|
@@ -49,7 +56,10 @@ def new_teradata_client(config: TeradataConfig) -> teradatasql.connect:
|
|
49
56
|
|
50
57
|
@error_handling_decorator
|
51
58
|
def get_table_list(
|
52
|
-
conn: teradatasql.connect,
|
59
|
+
conn: teradatasql.connect,
|
60
|
+
target_databases: Optional[List[str]] = None,
|
61
|
+
target_databases_method: str = "DENYLIST",
|
62
|
+
system_database: str = "DBC",
|
53
63
|
) -> List[Dict[str, str]]:
|
54
64
|
if target_databases_method == "DENYLIST":
|
55
65
|
operator = "NOT"
|
@@ -58,24 +68,28 @@ def get_table_list(
|
|
58
68
|
|
59
69
|
query_tables = f"""
|
60
70
|
SELECT DatabaseName, TableName
|
61
|
-
FROM
|
71
|
+
FROM {system_database}.TablesV
|
62
72
|
WHERE TableKind IN ('T', 'O', 'Q')
|
63
73
|
AND DatabaseName {operator} IN ({','.join("'" + db + "'" for db in target_databases)})
|
64
74
|
"""
|
65
75
|
logger.debug("Executing query to retrieve table names.")
|
76
|
+
logger.debug(f"Query: {query_tables}")
|
66
77
|
tables = execute_query(query_tables, conn)
|
67
78
|
return tables
|
68
79
|
|
69
80
|
|
70
81
|
@error_handling_decorator
|
71
|
-
def get_column_list(
|
82
|
+
def get_column_list(
|
83
|
+
conn: teradatasql.connect, database_name: str, table_name: str, system_database: str = "DBC"
|
84
|
+
) -> List[Dict[str, str]]:
|
72
85
|
query_columns = f"""
|
73
86
|
SELECT ColumnName, ColumnType
|
74
|
-
FROM
|
87
|
+
FROM {system_database}.ColumnsV
|
75
88
|
WHERE DatabaseName = '{database_name}'
|
76
89
|
AND TableName = '{table_name}'
|
77
90
|
"""
|
78
91
|
logger.debug(f"Executing query to retrieve columns for {database_name}.{table_name}.")
|
92
|
+
logger.debug(f"Query: {query_columns}")
|
79
93
|
columns = execute_query(query_columns, conn)
|
80
94
|
logger.debug(f"Retrieved columns: {columns}")
|
81
95
|
return columns
|
quollio_core/snowflake.py
CHANGED
@@ -6,6 +6,7 @@ import shutil
|
|
6
6
|
from quollio_core.helper.core import is_valid_domain, setup_dbt_profile
|
7
7
|
from quollio_core.helper.env_default import env_default
|
8
8
|
from quollio_core.helper.log import set_log_level
|
9
|
+
from quollio_core.profilers.qdc import gen_existing_global_id_dict, get_avro_file_content
|
9
10
|
from quollio_core.profilers.snowflake import (
|
10
11
|
snowflake_column_to_column_lineage,
|
11
12
|
snowflake_table_level_sqllineage,
|
@@ -24,6 +25,8 @@ def build_view(
|
|
24
25
|
target_tables: str = "",
|
25
26
|
log_level: str = "info",
|
26
27
|
dbt_macro_source: str = "hub",
|
28
|
+
target_databases_method: str = "DENYLIST",
|
29
|
+
target_databases: list[str] = [],
|
27
30
|
) -> None:
|
28
31
|
logger.info("Build profiler views using dbt")
|
29
32
|
# set parameters
|
@@ -32,10 +35,16 @@ def build_view(
|
|
32
35
|
project_path = f"{current_dir}/dbt_projects/snowflake"
|
33
36
|
template_path = f"{current_dir}/dbt_projects/snowflake/profiles"
|
34
37
|
template_name = "profiles_template.yml"
|
35
|
-
|
38
|
+
|
39
|
+
options = '{{"query_role": "{query_role}", "sample_method": "{sample_method}",\
|
40
|
+
"target_databases_method": "{target_databases_method}",\
|
41
|
+
"target_databases": {target_databases}}}'.format(
|
36
42
|
query_role=conn.account_query_role,
|
37
43
|
sample_method=stats_sample_method,
|
44
|
+
target_databases_method=target_databases_method,
|
45
|
+
target_databases=target_databases,
|
38
46
|
)
|
47
|
+
|
39
48
|
new_package_file = f"{project_path}/packages.yml"
|
40
49
|
if dbt_macro_source == "local":
|
41
50
|
shutil.copyfile(f"{project_path}/packages_local.yml", new_package_file)
|
@@ -85,10 +94,15 @@ def load_lineage(
|
|
85
94
|
) -> None:
|
86
95
|
logger.info("Generate Snowflake table to table lineage.")
|
87
96
|
|
88
|
-
|
89
|
-
conn=conn,
|
90
|
-
qdc_client=qdc_client,
|
97
|
+
file_content = get_avro_file_content(
|
91
98
|
tenant_id=tenant_id,
|
99
|
+
account_id=conn.account_id,
|
100
|
+
qdc_client=qdc_client,
|
101
|
+
)
|
102
|
+
existing_global_ids = gen_existing_global_id_dict(avro_content=file_content)
|
103
|
+
|
104
|
+
snowflake_table_to_table_lineage(
|
105
|
+
conn=conn, qdc_client=qdc_client, tenant_id=tenant_id, existing_global_ids=existing_global_ids
|
92
106
|
)
|
93
107
|
|
94
108
|
if enable_column_lineage:
|
@@ -96,9 +110,7 @@ def load_lineage(
|
|
96
110
|
f"enable_column_lineage is set to {enable_column_lineage}.Generate Snowflake column to column lineage."
|
97
111
|
)
|
98
112
|
snowflake_column_to_column_lineage(
|
99
|
-
conn=conn,
|
100
|
-
qdc_client=qdc_client,
|
101
|
-
tenant_id=tenant_id,
|
113
|
+
conn=conn, qdc_client=qdc_client, tenant_id=tenant_id, existing_global_ids=existing_global_ids
|
102
114
|
)
|
103
115
|
else:
|
104
116
|
logger.info("Skip column lineage ingestion. Set enable_column_lineage to True if you ingest column lineage.")
|
@@ -116,6 +128,13 @@ def load_stats(
|
|
116
128
|
) -> None:
|
117
129
|
logger.info("Generate Snowflake stats.")
|
118
130
|
|
131
|
+
file_content = get_avro_file_content(
|
132
|
+
tenant_id=tenant_id,
|
133
|
+
account_id=conn.account_id,
|
134
|
+
qdc_client=qdc_client,
|
135
|
+
)
|
136
|
+
existing_global_ids = gen_existing_global_id_dict(avro_content=file_content)
|
137
|
+
|
119
138
|
if stats_items is None:
|
120
139
|
raise ValueError("No stats items are not selected. Please specify any value to `stats_items` param.")
|
121
140
|
|
@@ -125,6 +144,7 @@ def load_stats(
|
|
125
144
|
qdc_client=qdc_client,
|
126
145
|
tenant_id=tenant_id,
|
127
146
|
stats_items=stats_items,
|
147
|
+
existing_global_ids=existing_global_ids,
|
128
148
|
)
|
129
149
|
|
130
150
|
logger.info("Stats data is successfully finished.")
|
@@ -229,7 +249,11 @@ if __name__ == "__main__":
|
|
229
249
|
"--target_tables",
|
230
250
|
type=str,
|
231
251
|
nargs="*",
|
232
|
-
choices=[
|
252
|
+
choices=[
|
253
|
+
"quollio_lineage_column_level",
|
254
|
+
"quollio_lineage_table_level",
|
255
|
+
"quollio_stats_columns",
|
256
|
+
],
|
233
257
|
action=env_default("SNOWFLAKE_TARGET_TABLES"),
|
234
258
|
required=False,
|
235
259
|
help="Target table name if you want to create only specific tables. \
|
@@ -237,6 +261,25 @@ if __name__ == "__main__":
|
|
237
261
|
Please specify table name with blank delimiter like tableA tableB \
|
238
262
|
if you want to create two or more tables.",
|
239
263
|
)
|
264
|
+
parser.add_argument(
|
265
|
+
"--target_databases_method",
|
266
|
+
type=str,
|
267
|
+
choices=["ALLOWLIST", "DENYLIST"],
|
268
|
+
action=env_default("SNOWFLAKE_TARGET_DATABASE_METHOD"),
|
269
|
+
required=False,
|
270
|
+
help="Method to filter databases. 'ALLOWLIST' to only include listed databases,\
|
271
|
+
'DENNYLIST' to exclude listed databases",
|
272
|
+
)
|
273
|
+
parser.add_argument(
|
274
|
+
"--target_databases",
|
275
|
+
type=str,
|
276
|
+
nargs="*",
|
277
|
+
action=env_default("SNOWFLAKE_TARGET_DATABASES"),
|
278
|
+
required=False,
|
279
|
+
help='List of databases to allow or deny based on target_database_method\
|
280
|
+
please specify database names with blank space as delimiter\
|
281
|
+
wildcards (%) are supported "DATABASE%" ',
|
282
|
+
)
|
240
283
|
parser.add_argument(
|
241
284
|
"--sample_method",
|
242
285
|
type=str,
|
@@ -308,6 +351,22 @@ if __name__ == "__main__":
|
|
308
351
|
help="Access method to Quollio API. Default 'PUBLIC'. Choose 'VPC_ENDPOINT'\
|
309
352
|
if you use API Gateway VPC Endpoint, DefaultValue is set to PUBLIC.",
|
310
353
|
)
|
354
|
+
parser.add_argument(
|
355
|
+
"--auth_method",
|
356
|
+
type=str,
|
357
|
+
choices=["PASSWORD", "KEYPAIR"],
|
358
|
+
action=env_default("SNOWFLAKE_AUTH_METHOD"),
|
359
|
+
default="PASSWORD",
|
360
|
+
required=False,
|
361
|
+
help="Authentication method to use (PASSWORD or KEYPAIR)",
|
362
|
+
)
|
363
|
+
parser.add_argument(
|
364
|
+
"--private_key",
|
365
|
+
type=str,
|
366
|
+
action=env_default("SNOWFLAKE_PRIVATE_KEY"),
|
367
|
+
required=False,
|
368
|
+
help="Private key content for keypair authentication",
|
369
|
+
)
|
311
370
|
|
312
371
|
stats_items = get_column_stats_items()
|
313
372
|
parser.add_argument(
|
@@ -324,27 +383,50 @@ if __name__ == "__main__":
|
|
324
383
|
args = parser.parse_args()
|
325
384
|
set_log_level(level=args.log_level)
|
326
385
|
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
account_build_role
|
332
|
-
account_query_role
|
333
|
-
account_warehouse
|
334
|
-
account_database
|
335
|
-
account_schema
|
336
|
-
|
386
|
+
# Update authentication handling
|
387
|
+
auth_params = {
|
388
|
+
"account_id": args.account_id,
|
389
|
+
"account_user": args.user,
|
390
|
+
"account_build_role": args.build_role,
|
391
|
+
"account_query_role": args.query_role,
|
392
|
+
"account_warehouse": args.warehouse,
|
393
|
+
"account_database": args.database,
|
394
|
+
"account_schema": args.schema,
|
395
|
+
}
|
396
|
+
|
397
|
+
# Add authentication specific parameters based on method
|
398
|
+
if args.auth_method == "KEYPAIR":
|
399
|
+
if not args.private_key:
|
400
|
+
raise ValueError("private_key is required when using keypair authentication")
|
401
|
+
auth_params["private_key"] = args.private_key
|
402
|
+
logger.info("Using keypair authentication")
|
403
|
+
else:
|
404
|
+
if not args.password:
|
405
|
+
raise ValueError("password is required when using password authentication")
|
406
|
+
auth_params["account_password"] = args.password
|
407
|
+
logger.info("Using password authentication")
|
408
|
+
logger.warning("Password authentication is being deprecated. Please consider using keypair authentication.")
|
409
|
+
|
410
|
+
conn = snowflake.SnowflakeConnectionConfig(**auth_params)
|
337
411
|
|
338
412
|
if len(args.commands) == 0:
|
339
413
|
raise ValueError("No command is provided")
|
340
414
|
|
341
415
|
if "build_view" in args.commands:
|
416
|
+
|
417
|
+
if args.target_databases:
|
418
|
+
target_databases = ["'" + db + "'" for db in args.target_databases[0].split(",")]
|
419
|
+
else:
|
420
|
+
target_databases = []
|
421
|
+
|
342
422
|
build_view(
|
343
423
|
conn=conn,
|
344
424
|
stats_sample_method=args.sample_method,
|
345
425
|
target_tables=args.target_tables,
|
346
426
|
log_level=args.log_level,
|
347
427
|
dbt_macro_source=args.dbt_macro_source,
|
428
|
+
target_databases_method=args.target_databases_method,
|
429
|
+
target_databases=target_databases,
|
348
430
|
)
|
349
431
|
api_url = args.api_url
|
350
432
|
if args.external_api_access == "VPC_ENDPOINT":
|
quollio_core/teradata.py
CHANGED
@@ -189,6 +189,14 @@ def main() -> None:
|
|
189
189
|
You can choose the items to be aggregated for stats.\
|
190
190
|
Default is full stats.",
|
191
191
|
)
|
192
|
+
parser.add_argument(
|
193
|
+
"--teradata_system_database",
|
194
|
+
type=str,
|
195
|
+
action=env_default("TERADATA_SYSTEM_DATABASE"),
|
196
|
+
default="DBC",
|
197
|
+
help="Name of the Teradata system database.\
|
198
|
+
Default is DBC",
|
199
|
+
)
|
192
200
|
|
193
201
|
args = parser.parse_args()
|
194
202
|
|
@@ -213,7 +221,11 @@ def main() -> None:
|
|
213
221
|
|
214
222
|
logger.info("Initializing Teradata client")
|
215
223
|
config = teradata_repo.TeradataConfig.from_dict(
|
216
|
-
credentials,
|
224
|
+
credentials=credentials,
|
225
|
+
host=args.teradata_host,
|
226
|
+
port=args.teradata_port,
|
227
|
+
additional_params=additional_params,
|
228
|
+
system_database=args.teradata_system_database,
|
217
229
|
)
|
218
230
|
|
219
231
|
if "load_lineage" in args.commands:
|
@@ -224,6 +236,7 @@ def main() -> None:
|
|
224
236
|
endpoint=args.teradata_host,
|
225
237
|
qdc_client=qdc_client,
|
226
238
|
page_size=args.teradata_page_size,
|
239
|
+
system_database=args.teradata_system_database,
|
227
240
|
)
|
228
241
|
logger.info("Lineage loading process completed")
|
229
242
|
|
@@ -244,6 +257,7 @@ def main() -> None:
|
|
244
257
|
target_databases=target_databases,
|
245
258
|
target_databases_method=args.teradata_target_databases_method.upper(),
|
246
259
|
stats_items=args.target_stats_items,
|
260
|
+
system_database=args.teradata_system_database,
|
247
261
|
)
|
248
262
|
logger.info("Statistics loading process completed")
|
249
263
|
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: quollio-core
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.6.0
|
4
4
|
Summary: Quollio Core
|
5
5
|
Author-email: quollio-dev <qt.dev@quollio.com>
|
6
6
|
Maintainer-email: RyoAriyama <ryo.arym@gmail.com>, tharuta <35373297+TakumiHaruta@users.noreply.github.com>
|
@@ -17,6 +17,7 @@ Classifier: Operating System :: OS Independent
|
|
17
17
|
Classifier: Development Status :: 3 - Alpha
|
18
18
|
Classifier: Intended Audience :: Developers
|
19
19
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
20
|
+
License-File: LICENSE
|
20
21
|
Requires-Dist: blake3==0.3.3
|
21
22
|
Requires-Dist: dbt-core==1.7.10
|
22
23
|
Requires-Dist: dbt-snowflake==1.7.0
|
@@ -38,6 +39,8 @@ Requires-Dist: google-cloud-datacatalog==3.19.0
|
|
38
39
|
Requires-Dist: google-cloud-datacatalog-lineage==0.3.6
|
39
40
|
Requires-Dist: google-api-python-client==2.131.0
|
40
41
|
Requires-Dist: teradatasql==20.0.0.15
|
42
|
+
Requires-Dist: dataclasses_avroschema==0.63.9
|
43
|
+
Requires-Dist: fastavro==1.9.7
|
41
44
|
Requires-Dist: black>=22.3.0 ; extra == "test"
|
42
45
|
Requires-Dist: coverage>=7.3.2 ; extra == "test"
|
43
46
|
Requires-Dist: isort>=5.10.1 ; extra == "test"
|