dcs-sdk 1.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_diff/__init__.py +221 -0
- data_diff/__main__.py +517 -0
- data_diff/abcs/__init__.py +13 -0
- data_diff/abcs/compiler.py +27 -0
- data_diff/abcs/database_types.py +402 -0
- data_diff/config.py +141 -0
- data_diff/databases/__init__.py +38 -0
- data_diff/databases/_connect.py +323 -0
- data_diff/databases/base.py +1417 -0
- data_diff/databases/bigquery.py +376 -0
- data_diff/databases/clickhouse.py +217 -0
- data_diff/databases/databricks.py +262 -0
- data_diff/databases/duckdb.py +207 -0
- data_diff/databases/mssql.py +343 -0
- data_diff/databases/mysql.py +189 -0
- data_diff/databases/oracle.py +238 -0
- data_diff/databases/postgresql.py +293 -0
- data_diff/databases/presto.py +222 -0
- data_diff/databases/redis.py +93 -0
- data_diff/databases/redshift.py +233 -0
- data_diff/databases/snowflake.py +222 -0
- data_diff/databases/sybase.py +720 -0
- data_diff/databases/trino.py +73 -0
- data_diff/databases/vertica.py +174 -0
- data_diff/diff_tables.py +489 -0
- data_diff/errors.py +17 -0
- data_diff/format.py +369 -0
- data_diff/hashdiff_tables.py +1026 -0
- data_diff/info_tree.py +76 -0
- data_diff/joindiff_tables.py +434 -0
- data_diff/lexicographic_space.py +253 -0
- data_diff/parse_time.py +88 -0
- data_diff/py.typed +0 -0
- data_diff/queries/__init__.py +13 -0
- data_diff/queries/api.py +213 -0
- data_diff/queries/ast_classes.py +811 -0
- data_diff/queries/base.py +38 -0
- data_diff/queries/extras.py +43 -0
- data_diff/query_utils.py +70 -0
- data_diff/schema.py +67 -0
- data_diff/table_segment.py +583 -0
- data_diff/thread_utils.py +112 -0
- data_diff/utils.py +1022 -0
- data_diff/version.py +15 -0
- dcs_core/__init__.py +13 -0
- dcs_core/__main__.py +17 -0
- dcs_core/__version__.py +15 -0
- dcs_core/cli/__init__.py +13 -0
- dcs_core/cli/cli.py +165 -0
- dcs_core/core/__init__.py +19 -0
- dcs_core/core/common/__init__.py +13 -0
- dcs_core/core/common/errors.py +50 -0
- dcs_core/core/common/models/__init__.py +13 -0
- dcs_core/core/common/models/configuration.py +284 -0
- dcs_core/core/common/models/dashboard.py +24 -0
- dcs_core/core/common/models/data_source_resource.py +75 -0
- dcs_core/core/common/models/metric.py +160 -0
- dcs_core/core/common/models/profile.py +75 -0
- dcs_core/core/common/models/validation.py +216 -0
- dcs_core/core/common/models/widget.py +44 -0
- dcs_core/core/configuration/__init__.py +13 -0
- dcs_core/core/configuration/config_loader.py +139 -0
- dcs_core/core/configuration/configuration_parser.py +262 -0
- dcs_core/core/configuration/configuration_parser_arc.py +328 -0
- dcs_core/core/datasource/__init__.py +13 -0
- dcs_core/core/datasource/base.py +62 -0
- dcs_core/core/datasource/manager.py +112 -0
- dcs_core/core/datasource/search_datasource.py +421 -0
- dcs_core/core/datasource/sql_datasource.py +1094 -0
- dcs_core/core/inspect.py +163 -0
- dcs_core/core/logger/__init__.py +13 -0
- dcs_core/core/logger/base.py +32 -0
- dcs_core/core/logger/default_logger.py +94 -0
- dcs_core/core/metric/__init__.py +13 -0
- dcs_core/core/metric/base.py +220 -0
- dcs_core/core/metric/combined_metric.py +98 -0
- dcs_core/core/metric/custom_metric.py +34 -0
- dcs_core/core/metric/manager.py +137 -0
- dcs_core/core/metric/numeric_metric.py +403 -0
- dcs_core/core/metric/reliability_metric.py +90 -0
- dcs_core/core/profiling/__init__.py +13 -0
- dcs_core/core/profiling/datasource_profiling.py +136 -0
- dcs_core/core/profiling/numeric_field_profiling.py +72 -0
- dcs_core/core/profiling/text_field_profiling.py +67 -0
- dcs_core/core/repository/__init__.py +13 -0
- dcs_core/core/repository/metric_repository.py +77 -0
- dcs_core/core/utils/__init__.py +13 -0
- dcs_core/core/utils/log.py +29 -0
- dcs_core/core/utils/tracking.py +105 -0
- dcs_core/core/utils/utils.py +44 -0
- dcs_core/core/validation/__init__.py +13 -0
- dcs_core/core/validation/base.py +230 -0
- dcs_core/core/validation/completeness_validation.py +153 -0
- dcs_core/core/validation/custom_query_validation.py +24 -0
- dcs_core/core/validation/manager.py +282 -0
- dcs_core/core/validation/numeric_validation.py +276 -0
- dcs_core/core/validation/reliability_validation.py +91 -0
- dcs_core/core/validation/uniqueness_validation.py +61 -0
- dcs_core/core/validation/validity_validation.py +738 -0
- dcs_core/integrations/__init__.py +13 -0
- dcs_core/integrations/databases/__init__.py +13 -0
- dcs_core/integrations/databases/bigquery.py +187 -0
- dcs_core/integrations/databases/databricks.py +51 -0
- dcs_core/integrations/databases/db2.py +652 -0
- dcs_core/integrations/databases/elasticsearch.py +61 -0
- dcs_core/integrations/databases/mssql.py +829 -0
- dcs_core/integrations/databases/mysql.py +409 -0
- dcs_core/integrations/databases/opensearch.py +64 -0
- dcs_core/integrations/databases/oracle.py +719 -0
- dcs_core/integrations/databases/postgres.py +482 -0
- dcs_core/integrations/databases/redshift.py +53 -0
- dcs_core/integrations/databases/snowflake.py +48 -0
- dcs_core/integrations/databases/spark_df.py +111 -0
- dcs_core/integrations/databases/sybase.py +1069 -0
- dcs_core/integrations/storage/__init__.py +13 -0
- dcs_core/integrations/storage/local_file.py +149 -0
- dcs_core/integrations/utils/__init__.py +13 -0
- dcs_core/integrations/utils/utils.py +36 -0
- dcs_core/report/__init__.py +13 -0
- dcs_core/report/dashboard.py +211 -0
- dcs_core/report/models.py +88 -0
- dcs_core/report/static/assets/fonts/DMSans-Bold.ttf +0 -0
- dcs_core/report/static/assets/fonts/DMSans-Medium.ttf +0 -0
- dcs_core/report/static/assets/fonts/DMSans-Regular.ttf +0 -0
- dcs_core/report/static/assets/fonts/DMSans-SemiBold.ttf +0 -0
- dcs_core/report/static/assets/images/docs.svg +6 -0
- dcs_core/report/static/assets/images/github.svg +4 -0
- dcs_core/report/static/assets/images/logo.svg +7 -0
- dcs_core/report/static/assets/images/slack.svg +13 -0
- dcs_core/report/static/index.js +2 -0
- dcs_core/report/static/index.js.LICENSE.txt +3971 -0
- dcs_sdk/__init__.py +13 -0
- dcs_sdk/__main__.py +18 -0
- dcs_sdk/__version__.py +15 -0
- dcs_sdk/cli/__init__.py +13 -0
- dcs_sdk/cli/cli.py +163 -0
- dcs_sdk/sdk/__init__.py +58 -0
- dcs_sdk/sdk/config/__init__.py +13 -0
- dcs_sdk/sdk/config/config_loader.py +491 -0
- dcs_sdk/sdk/data_diff/__init__.py +13 -0
- dcs_sdk/sdk/data_diff/data_differ.py +821 -0
- dcs_sdk/sdk/rules/__init__.py +15 -0
- dcs_sdk/sdk/rules/rules_mappping.py +31 -0
- dcs_sdk/sdk/rules/rules_repository.py +214 -0
- dcs_sdk/sdk/rules/schema_rules.py +65 -0
- dcs_sdk/sdk/utils/__init__.py +13 -0
- dcs_sdk/sdk/utils/serializer.py +25 -0
- dcs_sdk/sdk/utils/similarity_score/__init__.py +13 -0
- dcs_sdk/sdk/utils/similarity_score/base_provider.py +153 -0
- dcs_sdk/sdk/utils/similarity_score/cosine_similarity_provider.py +39 -0
- dcs_sdk/sdk/utils/similarity_score/jaccard_provider.py +24 -0
- dcs_sdk/sdk/utils/similarity_score/levenshtein_distance_provider.py +31 -0
- dcs_sdk/sdk/utils/table.py +475 -0
- dcs_sdk/sdk/utils/themes.py +40 -0
- dcs_sdk/sdk/utils/utils.py +349 -0
- dcs_sdk-1.6.5.dist-info/METADATA +150 -0
- dcs_sdk-1.6.5.dist-info/RECORD +159 -0
- dcs_sdk-1.6.5.dist-info/WHEEL +4 -0
- dcs_sdk-1.6.5.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1,719 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import secrets
|
|
16
|
+
import string
|
|
17
|
+
import time
|
|
18
|
+
from datetime import datetime
|
|
19
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
20
|
+
|
|
21
|
+
from loguru import logger
|
|
22
|
+
from sqlalchemy import create_engine, text
|
|
23
|
+
|
|
24
|
+
from dcs_core.core.common.errors import DataChecksDataSourcesConnectionError
|
|
25
|
+
from dcs_core.core.common.models.data_source_resource import RawColumnInfo
|
|
26
|
+
from dcs_core.core.datasource.sql_datasource import SQLDataSource
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class OracleDataSource(SQLDataSource):
|
|
30
|
+
def __init__(self, data_source_name: str, data_connection: Dict):
|
|
31
|
+
super().__init__(data_source_name, data_connection)
|
|
32
|
+
|
|
33
|
+
self.regex_patterns = {
|
|
34
|
+
"uuid": r"^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$",
|
|
35
|
+
"usa_phone": r"^\(\d{3}\) \d{3}-\d{4}$",
|
|
36
|
+
"email": r"^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$",
|
|
37
|
+
"usa_zip_code": r"^[0-9]{5}(?:-[0-9]{4})?$",
|
|
38
|
+
"ssn": r"^[0-6]\d{2}-(0[1-9]|[1-9]\d)-([1-9]\d{3}|\d{4})$",
|
|
39
|
+
"sedol": r"^[A-Z0-9]{6}\d$",
|
|
40
|
+
"lei": r"^[A-Z0-9]{18}[0-9]{2}$",
|
|
41
|
+
"cusip": r"^[0-9A-Z]{8}[0-9]$",
|
|
42
|
+
"figi": r"^BBG[A-Z0-9]{9}$",
|
|
43
|
+
"isin": r"^[A-Z]{2}[A-Z0-9]{9}[0-9]$",
|
|
44
|
+
"perm_id": r"^\d{4}([- ]?)\d{4}\1\d{4}\1\d{4}([- ]?)\d{3}$",
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
def connect(self) -> Any:
|
|
48
|
+
"""
|
|
49
|
+
Connect to the data source
|
|
50
|
+
"""
|
|
51
|
+
try:
|
|
52
|
+
engine = create_engine(
|
|
53
|
+
f"oracle+oracledb://:@",
|
|
54
|
+
thick_mode=False,
|
|
55
|
+
connect_args={
|
|
56
|
+
"user": self.data_connection.get("username"),
|
|
57
|
+
"password": self.data_connection.get("password"),
|
|
58
|
+
"host": self.data_connection.get("host"),
|
|
59
|
+
"port": self.data_connection.get("port"),
|
|
60
|
+
"service_name": self.data_connection.get("service_name"),
|
|
61
|
+
},
|
|
62
|
+
)
|
|
63
|
+
self.schema_name = self.data_connection.get("schema") or self.data_connection.get("username")
|
|
64
|
+
self.connection = engine.connect()
|
|
65
|
+
return self.connection
|
|
66
|
+
except Exception as e:
|
|
67
|
+
raise DataChecksDataSourcesConnectionError(message=f"Failed to connect to Oracle data source: [{str(e)}]")
|
|
68
|
+
|
|
69
|
+
def qualified_table_name(self, table_name: str) -> str:
|
|
70
|
+
"""
|
|
71
|
+
Get the qualified table name
|
|
72
|
+
:param table_name: name of the table
|
|
73
|
+
:return: qualified table name
|
|
74
|
+
"""
|
|
75
|
+
if self.schema_name:
|
|
76
|
+
return f'"{self.schema_name}"."{table_name}"'
|
|
77
|
+
return f'"{table_name}"'
|
|
78
|
+
|
|
79
|
+
def quote_column(self, column: str) -> str:
|
|
80
|
+
"""
|
|
81
|
+
Quote the column name
|
|
82
|
+
:param column: name of the column
|
|
83
|
+
:return: quoted column name
|
|
84
|
+
"""
|
|
85
|
+
return f'"{column}"'
|
|
86
|
+
|
|
87
|
+
def query_get_database_version(self, database_version_query: Optional[str] = None) -> str:
|
|
88
|
+
"""
|
|
89
|
+
Get the database version
|
|
90
|
+
:return: version string
|
|
91
|
+
"""
|
|
92
|
+
query = database_version_query or "SELECT BANNER FROM v$version"
|
|
93
|
+
result = self.fetchone(query)[0]
|
|
94
|
+
return result if result else None
|
|
95
|
+
|
|
96
|
+
def query_get_table_names(
|
|
97
|
+
self,
|
|
98
|
+
schema: str | None = None,
|
|
99
|
+
with_view: bool = False,
|
|
100
|
+
) -> dict:
|
|
101
|
+
"""
|
|
102
|
+
Get the list of tables in the database.
|
|
103
|
+
:param schema: optional schema name
|
|
104
|
+
:param with_view: whether to include views
|
|
105
|
+
:return: dictionary with table names and optionally view names
|
|
106
|
+
"""
|
|
107
|
+
schema = schema or self.schema_name
|
|
108
|
+
|
|
109
|
+
if with_view:
|
|
110
|
+
query = (
|
|
111
|
+
f"SELECT TABLE_NAME, 'TABLE' AS OBJECT_TYPE FROM ALL_ALL_TABLES WHERE OWNER = '{schema}' "
|
|
112
|
+
f"UNION "
|
|
113
|
+
f"SELECT VIEW_NAME AS TABLE_NAME, 'VIEW' AS OBJECT_TYPE FROM ALL_VIEWS WHERE OWNER = '{schema}'"
|
|
114
|
+
)
|
|
115
|
+
else:
|
|
116
|
+
query = f"SELECT TABLE_NAME, 'TABLE' AS OBJECT_TYPE FROM ALL_ALL_TABLES WHERE OWNER = '{schema}'"
|
|
117
|
+
|
|
118
|
+
rows = self.fetchall(query)
|
|
119
|
+
|
|
120
|
+
if with_view:
|
|
121
|
+
result = {"table": [], "view": []}
|
|
122
|
+
if rows:
|
|
123
|
+
for row in rows:
|
|
124
|
+
object_name = row[0]
|
|
125
|
+
object_type = row[1].strip() if row[1] else row[1]
|
|
126
|
+
|
|
127
|
+
if object_type == "TABLE":
|
|
128
|
+
result["table"].append(object_name)
|
|
129
|
+
elif object_type == "VIEW":
|
|
130
|
+
result["view"].append(object_name)
|
|
131
|
+
else:
|
|
132
|
+
result = {"table": []}
|
|
133
|
+
if rows:
|
|
134
|
+
result["table"] = [row[0] for row in rows]
|
|
135
|
+
|
|
136
|
+
return result
|
|
137
|
+
|
|
138
|
+
def query_get_table_indexes(self, table: str, schema: str | None = None) -> dict[str, dict]:
|
|
139
|
+
"""
|
|
140
|
+
Get index information for a table in Oracle DB.
|
|
141
|
+
:param table: Table name
|
|
142
|
+
:param schema: Optional schema name
|
|
143
|
+
:return: Dictionary with index details
|
|
144
|
+
"""
|
|
145
|
+
schema = schema or self.schema_name
|
|
146
|
+
table = table.upper()
|
|
147
|
+
schema = schema.upper()
|
|
148
|
+
|
|
149
|
+
query = f"""
|
|
150
|
+
SELECT
|
|
151
|
+
ind.index_name,
|
|
152
|
+
ind.index_type,
|
|
153
|
+
col.column_name,
|
|
154
|
+
col.column_position AS column_order
|
|
155
|
+
FROM
|
|
156
|
+
ALL_INDEXES ind
|
|
157
|
+
JOIN
|
|
158
|
+
ALL_IND_COLUMNS col ON ind.index_name = col.index_name AND ind.table_name = col.table_name AND ind.owner = col.index_owner
|
|
159
|
+
WHERE
|
|
160
|
+
ind.table_name = '{table}'
|
|
161
|
+
AND ind.owner = '{schema}'
|
|
162
|
+
ORDER BY
|
|
163
|
+
ind.index_name, col.column_position
|
|
164
|
+
"""
|
|
165
|
+
rows = self.fetchall(query)
|
|
166
|
+
|
|
167
|
+
if not rows:
|
|
168
|
+
raise RuntimeError(f"No index information found for table '{table}' in schema '{schema}'.")
|
|
169
|
+
|
|
170
|
+
pk_query = f"""
|
|
171
|
+
SELECT acc.column_name
|
|
172
|
+
FROM ALL_CONSTRAINTS ac
|
|
173
|
+
JOIN ALL_CONS_COLUMNS acc ON ac.constraint_name = acc.constraint_name AND ac.owner = acc.owner
|
|
174
|
+
WHERE ac.constraint_type = 'P'
|
|
175
|
+
AND ac.table_name = '{table}'
|
|
176
|
+
AND ac.owner = '{schema}'
|
|
177
|
+
ORDER BY acc.position
|
|
178
|
+
"""
|
|
179
|
+
pk_rows = self.fetchall(pk_query)
|
|
180
|
+
pk_columns = [row[0].strip() for row in pk_rows] if pk_rows else []
|
|
181
|
+
pk_columns_set = set(pk_columns)
|
|
182
|
+
|
|
183
|
+
indexes = {}
|
|
184
|
+
for row in rows:
|
|
185
|
+
index_name = row[0]
|
|
186
|
+
index_type = row[1]
|
|
187
|
+
column_info = {
|
|
188
|
+
"column_name": self.safe_get(row, 2),
|
|
189
|
+
"column_order": self.safe_get(row, 3),
|
|
190
|
+
}
|
|
191
|
+
if index_name not in indexes:
|
|
192
|
+
indexes[index_name] = {"columns": [], "index_type": index_type}
|
|
193
|
+
indexes[index_name]["columns"].append(column_info)
|
|
194
|
+
|
|
195
|
+
for index_name, idx in indexes.items():
|
|
196
|
+
index_columns = [col["column_name"].strip() for col in idx["columns"]]
|
|
197
|
+
index_columns_set = set(index_columns)
|
|
198
|
+
idx["is_primary_key"] = pk_columns_set == index_columns_set and len(index_columns) == len(pk_columns)
|
|
199
|
+
|
|
200
|
+
return indexes
|
|
201
|
+
|
|
202
|
+
def query_get_table_columns(
|
|
203
|
+
self,
|
|
204
|
+
table: str,
|
|
205
|
+
schema: str | None = None,
|
|
206
|
+
) -> RawColumnInfo:
|
|
207
|
+
"""
|
|
208
|
+
Get the schema of a table.
|
|
209
|
+
:param table: table name
|
|
210
|
+
:return: RawColumnInfo object containing column information
|
|
211
|
+
"""
|
|
212
|
+
schema = schema or self.schema_name
|
|
213
|
+
query = (
|
|
214
|
+
f"SELECT column_name, data_type, 6 as datetime_precision, data_precision as numeric_precision, "
|
|
215
|
+
f"data_scale as numeric_scale, NULL as collation_name, char_length as character_maximum_length "
|
|
216
|
+
f"FROM ALL_TAB_COLUMNS WHERE table_name = '{table}' AND owner = '{schema}'"
|
|
217
|
+
)
|
|
218
|
+
rows = self.fetchall(query)
|
|
219
|
+
if not rows:
|
|
220
|
+
raise RuntimeError(f"{table}: Table, {schema}: Schema, does not exist, or has no columns")
|
|
221
|
+
|
|
222
|
+
column_info = {
|
|
223
|
+
r[0]: RawColumnInfo(
|
|
224
|
+
column_name=self.safe_get(r, 0),
|
|
225
|
+
data_type=self.safe_get(r, 1),
|
|
226
|
+
datetime_precision=self.safe_get(r, 2),
|
|
227
|
+
numeric_precision=self.safe_get(r, 3),
|
|
228
|
+
numeric_scale=self.safe_get(r, 4),
|
|
229
|
+
collation_name=self.safe_get(r, 5),
|
|
230
|
+
character_maximum_length=self.safe_get(r, 6),
|
|
231
|
+
)
|
|
232
|
+
for r in rows
|
|
233
|
+
}
|
|
234
|
+
return column_info
|
|
235
|
+
|
|
236
|
+
def fetch_rows(
|
|
237
|
+
self,
|
|
238
|
+
query: str,
|
|
239
|
+
limit: int = 1,
|
|
240
|
+
with_column_names: bool = False,
|
|
241
|
+
complete_query: Optional[str] = None,
|
|
242
|
+
) -> Tuple[List, Optional[List[str]]]:
|
|
243
|
+
"""
|
|
244
|
+
Fetch rows from the database.
|
|
245
|
+
|
|
246
|
+
:param query: SQL query to execute.
|
|
247
|
+
:param limit: Number of rows to fetch.
|
|
248
|
+
:param with_column_names: Whether to include column names in the result.
|
|
249
|
+
:return: Tuple of (rows, column_names or None)
|
|
250
|
+
"""
|
|
251
|
+
query = complete_query or f"SELECT * FROM ({query}) subquery ORDER BY 1 FETCH NEXT {limit} ROWS ONLY"
|
|
252
|
+
|
|
253
|
+
result = self.connection.execute(text(query))
|
|
254
|
+
rows = result.fetchmany(limit)
|
|
255
|
+
|
|
256
|
+
if with_column_names:
|
|
257
|
+
column_names = result.keys()
|
|
258
|
+
return rows, list(column_names)
|
|
259
|
+
else:
|
|
260
|
+
return rows, None
|
|
261
|
+
|
|
262
|
+
def query_valid_invalid_values_validity(
|
|
263
|
+
self,
|
|
264
|
+
table: str,
|
|
265
|
+
field: str,
|
|
266
|
+
regex_pattern: str = None,
|
|
267
|
+
filters: str = None,
|
|
268
|
+
values: List[str] = None,
|
|
269
|
+
) -> Tuple[int, int]:
|
|
270
|
+
"""
|
|
271
|
+
Get the count of valid and invalid values
|
|
272
|
+
:param table: table name
|
|
273
|
+
:param field: column name
|
|
274
|
+
:param values: list of valid values
|
|
275
|
+
:param regex_pattern: regex pattern
|
|
276
|
+
:param filters: filter condition
|
|
277
|
+
:return: count of valid/invalid values and total count of valid/invalid values
|
|
278
|
+
"""
|
|
279
|
+
filters = f"WHERE {filters}" if filters else ""
|
|
280
|
+
qualified_table_name = self.qualified_table_name(table)
|
|
281
|
+
if values:
|
|
282
|
+
values_str = ", ".join([f"'{value}'" for value in values])
|
|
283
|
+
regex_query = f"CASE WHEN {field} IN ({values_str}) THEN 1 ELSE 0 END"
|
|
284
|
+
else:
|
|
285
|
+
regex_query = f"CASE WHEN REGEXP_LIKE({field}, '{regex_pattern}') THEN 1 ELSE 0 END"
|
|
286
|
+
|
|
287
|
+
query = f"""
|
|
288
|
+
SELECT SUM({regex_query}) AS valid_count, COUNT(*) AS total_count
|
|
289
|
+
FROM {qualified_table_name}
|
|
290
|
+
{filters}
|
|
291
|
+
"""
|
|
292
|
+
|
|
293
|
+
result = self.fetchone(query)
|
|
294
|
+
return result[0], result[1]
|
|
295
|
+
|
|
296
|
+
def query_string_pattern_validity(
|
|
297
|
+
self,
|
|
298
|
+
table: str,
|
|
299
|
+
field: str,
|
|
300
|
+
regex_pattern: str = None,
|
|
301
|
+
predefined_regex_pattern: str = None,
|
|
302
|
+
filters: str = None,
|
|
303
|
+
) -> Tuple[int, int]:
|
|
304
|
+
"""
|
|
305
|
+
Get the count of valid values based on the regex pattern
|
|
306
|
+
:param table: table name
|
|
307
|
+
:param field: column name
|
|
308
|
+
:param regex_pattern: regex pattern
|
|
309
|
+
:param predefined_regex_pattern: predefined regex pattern
|
|
310
|
+
:param filters: filter condition
|
|
311
|
+
:return: count of valid values, count of total row count
|
|
312
|
+
"""
|
|
313
|
+
filters = f"WHERE {filters}" if filters else ""
|
|
314
|
+
qualified_table_name = self.qualified_table_name(table)
|
|
315
|
+
field = self.quote_column(field)
|
|
316
|
+
|
|
317
|
+
if not regex_pattern and not predefined_regex_pattern:
|
|
318
|
+
raise ValueError("Either regex_pattern or predefined_regex_pattern should be provided")
|
|
319
|
+
|
|
320
|
+
if predefined_regex_pattern:
|
|
321
|
+
regex_condition = f"REGEXP_LIKE({field}, '{self.regex_patterns[predefined_regex_pattern]}')"
|
|
322
|
+
else:
|
|
323
|
+
regex_condition = f"REGEXP_LIKE({field}, '{regex_pattern}')"
|
|
324
|
+
|
|
325
|
+
regex_query = f"CASE WHEN {regex_condition} THEN 1 ELSE 0 END"
|
|
326
|
+
|
|
327
|
+
query = f"""
|
|
328
|
+
SELECT SUM({regex_query}) AS valid_count, COUNT(*) AS total_count
|
|
329
|
+
FROM {qualified_table_name} {filters}
|
|
330
|
+
"""
|
|
331
|
+
result = self.fetchone(query)
|
|
332
|
+
return result[0], result[1]
|
|
333
|
+
|
|
334
|
+
def query_get_usa_state_code_validity(self, table: str, field: str, filters: str = None) -> Tuple[int, int]:
|
|
335
|
+
"""
|
|
336
|
+
Get the count of valid USA state codes
|
|
337
|
+
:param table: table name
|
|
338
|
+
:param field: column name
|
|
339
|
+
:param filters: filter condition
|
|
340
|
+
:return: count of valid state codes, count of total row count
|
|
341
|
+
"""
|
|
342
|
+
|
|
343
|
+
valid_state_codes_str = ", ".join(f"'{code}'" for code in self.valid_state_codes)
|
|
344
|
+
|
|
345
|
+
filters = f"WHERE {filters}" if filters else ""
|
|
346
|
+
|
|
347
|
+
qualified_table_name = self.qualified_table_name(table)
|
|
348
|
+
field = self.quote_column(field)
|
|
349
|
+
|
|
350
|
+
regex_query = (
|
|
351
|
+
f"CASE WHEN REGEXP_LIKE({field}, '^[A-Z]{{2}}$') "
|
|
352
|
+
f"AND {field} IN ({valid_state_codes_str}) THEN 1 ELSE 0 END"
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
query = f"""
|
|
356
|
+
SELECT SUM({regex_query}) AS valid_count, COUNT(*) AS total_count
|
|
357
|
+
FROM {qualified_table_name} {filters}
|
|
358
|
+
"""
|
|
359
|
+
|
|
360
|
+
result = self.fetchone(query)
|
|
361
|
+
return result[0], result[1]
|
|
362
|
+
|
|
363
|
+
def query_timestamp_metric(
|
|
364
|
+
self,
|
|
365
|
+
table: str,
|
|
366
|
+
field: str,
|
|
367
|
+
predefined_regex: str,
|
|
368
|
+
filters: str = None,
|
|
369
|
+
) -> Union[float, int]:
|
|
370
|
+
"""
|
|
371
|
+
:param table: Table name
|
|
372
|
+
:param field: Column name
|
|
373
|
+
:param predefined_regex: regex pattern
|
|
374
|
+
:param filters: filter condition
|
|
375
|
+
:return: Tuple containing valid count and total count (or percentage)
|
|
376
|
+
"""
|
|
377
|
+
|
|
378
|
+
qualified_table_name = self.qualified_table_name(table)
|
|
379
|
+
field = self.quote_column(field)
|
|
380
|
+
|
|
381
|
+
if predefined_regex == "timestamp_iso":
|
|
382
|
+
filters_clause = f"WHERE {filters}" if filters else ""
|
|
383
|
+
|
|
384
|
+
query = f"""
|
|
385
|
+
WITH extracted_timestamps AS (
|
|
386
|
+
SELECT
|
|
387
|
+
{field},
|
|
388
|
+
TO_CHAR({field}, 'YYYY') AS year,
|
|
389
|
+
TO_CHAR({field}, 'MM') AS month,
|
|
390
|
+
TO_CHAR({field}, 'DD') AS day,
|
|
391
|
+
TO_CHAR({field}, 'HH24') AS hour,
|
|
392
|
+
TO_CHAR({field}, 'MI') AS minute,
|
|
393
|
+
TO_CHAR({field}, 'SS') AS second
|
|
394
|
+
FROM {qualified_table_name}
|
|
395
|
+
{filters_clause}
|
|
396
|
+
),
|
|
397
|
+
validated_timestamps AS (
|
|
398
|
+
SELECT
|
|
399
|
+
{field},
|
|
400
|
+
CASE
|
|
401
|
+
WHEN
|
|
402
|
+
REGEXP_LIKE(year, '^\\d{{4}}$') AND
|
|
403
|
+
REGEXP_LIKE(month, '^(0[1-9]|1[0-2])$') AND
|
|
404
|
+
REGEXP_LIKE(day, '^([0-2][0-9]|3[01])$') AND
|
|
405
|
+
(
|
|
406
|
+
(month IN ('01', '03', '05', '07', '08', '10', '12') AND day BETWEEN '01' AND '31') OR
|
|
407
|
+
(month IN ('04', '06', '09', '11') AND day BETWEEN '01' AND '30') OR
|
|
408
|
+
(month = '02' AND day BETWEEN '01' AND
|
|
409
|
+
CASE
|
|
410
|
+
WHEN MOD(TO_NUMBER(year), 400) = 0 OR
|
|
411
|
+
(MOD(TO_NUMBER(year), 4) = 0 AND MOD(TO_NUMBER(year), 100) != 0) THEN '29'
|
|
412
|
+
ELSE '28'
|
|
413
|
+
END
|
|
414
|
+
)
|
|
415
|
+
) AND
|
|
416
|
+
REGEXP_LIKE(hour, '^(0[0-9]|1[0-9]|2[0-3])$') AND
|
|
417
|
+
REGEXP_LIKE(minute, '^[0-5][0-9]$') AND
|
|
418
|
+
REGEXP_LIKE(second, '^[0-5][0-9]$')
|
|
419
|
+
THEN 1
|
|
420
|
+
ELSE 0
|
|
421
|
+
END AS is_valid
|
|
422
|
+
FROM extracted_timestamps
|
|
423
|
+
)
|
|
424
|
+
SELECT SUM(is_valid) AS valid_count, COUNT(*) AS total_count
|
|
425
|
+
FROM validated_timestamps
|
|
426
|
+
"""
|
|
427
|
+
try:
|
|
428
|
+
result = self.fetchone(query)
|
|
429
|
+
valid_count = result[0]
|
|
430
|
+
total_count = result[1]
|
|
431
|
+
|
|
432
|
+
return valid_count, total_count
|
|
433
|
+
except Exception as e:
|
|
434
|
+
logger.error(f"Error occurred: {e}")
|
|
435
|
+
return 0, 0
|
|
436
|
+
else:
|
|
437
|
+
raise ValueError(f"Unknown predefined regex pattern: {predefined_regex}")
|
|
438
|
+
|
|
439
|
+
def query_timestamp_not_in_future_metric(
|
|
440
|
+
self,
|
|
441
|
+
table: str,
|
|
442
|
+
field: str,
|
|
443
|
+
predefined_regex: str,
|
|
444
|
+
filters: str = None,
|
|
445
|
+
) -> Union[float, int]:
|
|
446
|
+
"""
|
|
447
|
+
:param table: Table name
|
|
448
|
+
:param field: Column name
|
|
449
|
+
:param predefined_regex: regex pattern
|
|
450
|
+
:param filters: filter condition
|
|
451
|
+
:return: Count of valid timestamps not in the future and total count or percentage
|
|
452
|
+
"""
|
|
453
|
+
qualified_table_name = self.qualified_table_name(table)
|
|
454
|
+
field = self.quote_column(field)
|
|
455
|
+
|
|
456
|
+
timestamp_iso_regex = r"^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])T([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9](?:\.\d{1,3})?(Z|[+-](0[0-9]|1[0-4]):[0-5][0-9])?$"
|
|
457
|
+
|
|
458
|
+
if predefined_regex == "timestamp_iso":
|
|
459
|
+
regex_condition = f"REGEXP_LIKE({field}, '{timestamp_iso_regex}')"
|
|
460
|
+
else:
|
|
461
|
+
raise ValueError(f"Unknown predefined regex pattern: {predefined_regex}")
|
|
462
|
+
|
|
463
|
+
filters_clause = f"WHERE {filters}" if filters else ""
|
|
464
|
+
|
|
465
|
+
query = f"""
|
|
466
|
+
WITH extracted_timestamps AS (
|
|
467
|
+
SELECT
|
|
468
|
+
TO_CHAR({field}, 'YYYY-MM-DD HH24:MI:SS') AS formatted_{field},
|
|
469
|
+
REGEXP_SUBSTR(TO_CHAR({field}, 'YYYY-MM-DD HH24:MI:SS'), '^\d{{4}}', 1, 1) AS year,
|
|
470
|
+
REGEXP_SUBSTR(TO_CHAR({field}, 'YYYY-MM-DD HH24:MI:SS'), '^\d{{4}}-(\d{{2}})', 1, 1, NULL, 1) AS month,
|
|
471
|
+
REGEXP_SUBSTR(TO_CHAR({field}, 'YYYY-MM-DD HH24:MI:SS'), '^\d{{4}}-\d{{2}}-(\d{{2}})', 1, 1, NULL, 1) AS day,
|
|
472
|
+
REGEXP_SUBSTR(TO_CHAR({field}, 'YYYY-MM-DD HH24:MI:SS'), ' (\d{{2}})', 1, 1, NULL, 1) AS hour,
|
|
473
|
+
REGEXP_SUBSTR(TO_CHAR({field}, 'YYYY-MM-DD HH24:MI:SS'), ':\d{{2}}:(\d{{2}})', 1, 1, NULL, 1) AS minute,
|
|
474
|
+
REGEXP_SUBSTR(TO_CHAR({field}, 'YYYY-MM-DD HH24:MI:SS'), ':(\d{{2}})$', 1, 1, NULL, 1) AS second
|
|
475
|
+
FROM {qualified_table_name}
|
|
476
|
+
{filters_clause}
|
|
477
|
+
),
|
|
478
|
+
validated_timestamps AS (
|
|
479
|
+
SELECT
|
|
480
|
+
formatted_{field},
|
|
481
|
+
CASE
|
|
482
|
+
WHEN
|
|
483
|
+
REGEXP_LIKE(year, '^\d{{4}}$') AND
|
|
484
|
+
REGEXP_LIKE(month, '^(0[1-9]|1[0-2])$') AND
|
|
485
|
+
REGEXP_LIKE(day, '^([0-2][0-9]|3[01])$') AND
|
|
486
|
+
(
|
|
487
|
+
(month IN ('01', '03', '05', '07', '08', '10', '12') AND day BETWEEN '01' AND '31') OR
|
|
488
|
+
(month IN ('04', '06', '09', '11') AND day BETWEEN '01' AND '30') OR
|
|
489
|
+
(month = '02' AND day BETWEEN '01' AND
|
|
490
|
+
CASE
|
|
491
|
+
WHEN MOD(TO_NUMBER(year), 400) = 0 OR
|
|
492
|
+
(MOD(TO_NUMBER(year), 4) = 0 AND MOD(TO_NUMBER(year), 100) != 0) THEN '29'
|
|
493
|
+
ELSE '28'
|
|
494
|
+
END
|
|
495
|
+
)
|
|
496
|
+
) AND
|
|
497
|
+
REGEXP_LIKE(hour, '^(0[0-9]|1[0-9]|2[0-3])$') AND
|
|
498
|
+
REGEXP_LIKE(minute, '^[0-5][0-9]$') AND
|
|
499
|
+
REGEXP_LIKE(second, '^[0-5][0-9]$')
|
|
500
|
+
THEN 1
|
|
501
|
+
ELSE 0
|
|
502
|
+
END AS is_valid
|
|
503
|
+
FROM extracted_timestamps
|
|
504
|
+
),
|
|
505
|
+
timestamps_not_in_future AS (
|
|
506
|
+
SELECT *
|
|
507
|
+
FROM validated_timestamps
|
|
508
|
+
WHERE is_valid = 1 AND TO_TIMESTAMP(formatted_{field}, 'YYYY-MM-DD HH24:MI:SS') <= CURRENT_TIMESTAMP
|
|
509
|
+
)
|
|
510
|
+
SELECT
|
|
511
|
+
(SELECT COUNT(*) FROM timestamps_not_in_future) AS valid_count,
|
|
512
|
+
(SELECT COUNT(*) FROM {qualified_table_name}) AS total_count
|
|
513
|
+
FROM dual
|
|
514
|
+
"""
|
|
515
|
+
try:
|
|
516
|
+
result = self.fetchone(query)
|
|
517
|
+
valid_count = result[0]
|
|
518
|
+
total_count = result[1]
|
|
519
|
+
|
|
520
|
+
return valid_count, total_count
|
|
521
|
+
except Exception as e:
|
|
522
|
+
logger.error(f"Error occurred: {e}")
|
|
523
|
+
return 0, 0
|
|
524
|
+
|
|
525
|
+
def query_timestamp_date_not_in_future_metric(
|
|
526
|
+
self,
|
|
527
|
+
table: str,
|
|
528
|
+
field: str,
|
|
529
|
+
predefined_regex: str,
|
|
530
|
+
filters: str = None,
|
|
531
|
+
) -> Union[float, int]:
|
|
532
|
+
"""
|
|
533
|
+
:param table: Table name
|
|
534
|
+
:param field: Column name
|
|
535
|
+
:param predefined_regex: The regex pattern to use (e.g., "timestamp_iso")
|
|
536
|
+
:param filters: Optional filter condition
|
|
537
|
+
:return: Tuple containing count of valid dates not in the future and total count
|
|
538
|
+
"""
|
|
539
|
+
|
|
540
|
+
qualified_table_name = self.qualified_table_name(table)
|
|
541
|
+
field = self.quote_column(field)
|
|
542
|
+
filters_clause = f"WHERE {filters}" if filters else ""
|
|
543
|
+
|
|
544
|
+
query = f"""
|
|
545
|
+
WITH extracted_timestamps AS (
|
|
546
|
+
SELECT
|
|
547
|
+
TO_CHAR({field}, 'YYYY-MM-DD HH24:MI:SS') AS formatted_{field},
|
|
548
|
+
REGEXP_SUBSTR(TO_CHAR({field}, 'YYYY-MM-DD HH24:MI:SS'), '^\d{{4}}', 1, 1) AS year,
|
|
549
|
+
REGEXP_SUBSTR(TO_CHAR({field}, 'YYYY-MM-DD HH24:MI:SS'), '^\d{{4}}-(\d{{2}})', 1, 1, NULL, 1) AS month,
|
|
550
|
+
REGEXP_SUBSTR(TO_CHAR({field}, 'YYYY-MM-DD HH24:MI:SS'), '^\d{{4}}-\d{{2}}-(\d{{2}})', 1, 1, NULL, 1) AS day,
|
|
551
|
+
REGEXP_SUBSTR(TO_CHAR({field}, 'YYYY-MM-DD HH24:MI:SS'), ' (\d{{2}})', 1, 1, NULL, 1) AS hour,
|
|
552
|
+
REGEXP_SUBSTR(TO_CHAR({field}, 'YYYY-MM-DD HH24:MI:SS'), ':\d{{2}}:(\d{{2}})', 1, 1, NULL, 1) AS minute,
|
|
553
|
+
REGEXP_SUBSTR(TO_CHAR({field}, 'YYYY-MM-DD HH24:MI:SS'), ':(\d{{2}})$', 1, 1, NULL, 1) AS second
|
|
554
|
+
FROM {qualified_table_name}
|
|
555
|
+
{filters_clause}
|
|
556
|
+
),
|
|
557
|
+
validated_timestamps AS (
|
|
558
|
+
SELECT
|
|
559
|
+
formatted_{field},
|
|
560
|
+
CASE
|
|
561
|
+
WHEN
|
|
562
|
+
REGEXP_LIKE(year, '^\d{{4}}$') AND
|
|
563
|
+
REGEXP_LIKE(month, '^(0[1-9]|1[0-2])$') AND
|
|
564
|
+
REGEXP_LIKE(day, '^([0-2][0-9]|3[01])$') AND
|
|
565
|
+
(
|
|
566
|
+
(month IN ('01', '03', '05', '07', '08', '10', '12') AND day BETWEEN '01' AND '31') OR
|
|
567
|
+
(month IN ('04', '06', '09', '11') AND day BETWEEN '01' AND '30') OR
|
|
568
|
+
(month = '02' AND day BETWEEN '01' AND
|
|
569
|
+
CASE
|
|
570
|
+
WHEN MOD(TO_NUMBER(year), 400) = 0 OR
|
|
571
|
+
(MOD(TO_NUMBER(year), 4) = 0 AND MOD(TO_NUMBER(year), 100) != 0) THEN '29'
|
|
572
|
+
ELSE '28'
|
|
573
|
+
END
|
|
574
|
+
)
|
|
575
|
+
) AND
|
|
576
|
+
REGEXP_LIKE(hour, '^(0[0-9]|1[0-9]|2[0-3])$') AND
|
|
577
|
+
REGEXP_LIKE(minute, '^[0-5][0-9]$') AND
|
|
578
|
+
REGEXP_LIKE(second, '^[0-5][0-9]$')
|
|
579
|
+
THEN 1
|
|
580
|
+
ELSE 0
|
|
581
|
+
END AS is_valid
|
|
582
|
+
FROM extracted_timestamps
|
|
583
|
+
),
|
|
584
|
+
validated_dates AS (
|
|
585
|
+
SELECT
|
|
586
|
+
formatted_{field},
|
|
587
|
+
is_valid
|
|
588
|
+
FROM validated_timestamps
|
|
589
|
+
WHERE is_valid = 1
|
|
590
|
+
),
|
|
591
|
+
dates_not_in_future AS (
|
|
592
|
+
SELECT *
|
|
593
|
+
FROM validated_dates
|
|
594
|
+
WHERE is_valid = 1
|
|
595
|
+
AND REGEXP_LIKE(formatted_{field}, '^\d{{4}}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) (\d{{2}}):([0-5][0-9]):([0-5][0-9])(\.\d{{1,3}})?$')
|
|
596
|
+
AND TO_TIMESTAMP(formatted_{field}, 'YYYY-MM-DD HH24:MI:SS') <= CURRENT_TIMESTAMP
|
|
597
|
+
)
|
|
598
|
+
SELECT
|
|
599
|
+
(SELECT COUNT(*) FROM dates_not_in_future) AS valid_count,
|
|
600
|
+
(SELECT COUNT(*) FROM {qualified_table_name}) AS total_count
|
|
601
|
+
FROM dual
|
|
602
|
+
"""
|
|
603
|
+
|
|
604
|
+
try:
|
|
605
|
+
valid_count = self.fetchone(query)[0]
|
|
606
|
+
total_count_query = f"SELECT COUNT(*) FROM {qualified_table_name} {filters_clause}"
|
|
607
|
+
total_count = self.fetchone(total_count_query)[0]
|
|
608
|
+
|
|
609
|
+
return valid_count, total_count
|
|
610
|
+
except Exception as e:
|
|
611
|
+
logger.error(f"Error occurred: {e}")
|
|
612
|
+
return 0, 0
|
|
613
|
+
|
|
614
|
+
def query_get_time_diff(self, table: str, field: str) -> int:
|
|
615
|
+
"""
|
|
616
|
+
Get the time difference
|
|
617
|
+
:param table: name of the index
|
|
618
|
+
:param field: field name of updated time column
|
|
619
|
+
:return: time difference in seconds
|
|
620
|
+
"""
|
|
621
|
+
qualified_table_name = self.qualified_table_name(table)
|
|
622
|
+
field = self.quote_column(field)
|
|
623
|
+
query = f"""
|
|
624
|
+
SELECT {field} from {qualified_table_name} ORDER BY {field} DESC LIMIT 1;
|
|
625
|
+
"""
|
|
626
|
+
query = f"""
|
|
627
|
+
SELECT {field}
|
|
628
|
+
FROM (
|
|
629
|
+
SELECT {field}
|
|
630
|
+
FROM {qualified_table_name}
|
|
631
|
+
ORDER BY {field} DESC
|
|
632
|
+
)
|
|
633
|
+
WHERE ROWNUM = 1
|
|
634
|
+
"""
|
|
635
|
+
result = self.fetchone(query)
|
|
636
|
+
if result:
|
|
637
|
+
return int(abs(datetime.utcnow() - result[0]).total_seconds())
|
|
638
|
+
return 0
|
|
639
|
+
|
|
640
|
+
def query_get_all_space_count(
|
|
641
|
+
self, table: str, field: str, operation: str, filters: str = None
|
|
642
|
+
) -> Union[int, float]:
|
|
643
|
+
"""
|
|
644
|
+
Get the count of rows where the specified column contains only spaces.
|
|
645
|
+
:param table: table name
|
|
646
|
+
:param field: column name
|
|
647
|
+
:param filters: filter condition
|
|
648
|
+
:return: count of rows with only spaces
|
|
649
|
+
"""
|
|
650
|
+
qualified_table_name = self.qualified_table_name(table)
|
|
651
|
+
field = self.quote_column(field)
|
|
652
|
+
|
|
653
|
+
query = f"""
|
|
654
|
+
SELECT
|
|
655
|
+
COUNT(CASE WHEN TRIM({field}) IS NULL OR TRIM({field}) = '' THEN 1 END) AS space_count,
|
|
656
|
+
COUNT(*) AS total_count
|
|
657
|
+
FROM {qualified_table_name}
|
|
658
|
+
"""
|
|
659
|
+
|
|
660
|
+
if filters:
|
|
661
|
+
query += f"WHERE {filters}"
|
|
662
|
+
|
|
663
|
+
result = self.fetchone(query)
|
|
664
|
+
|
|
665
|
+
if operation == "percent":
|
|
666
|
+
return round((result[0] / result[1]) * 100) if result[1] > 0 else 0
|
|
667
|
+
|
|
668
|
+
return result[0] if result else 0
|
|
669
|
+
|
|
670
|
+
def generate_view_name(self, view_name: str | None = None) -> str:
|
|
671
|
+
if view_name is not None:
|
|
672
|
+
return view_name.upper()
|
|
673
|
+
random_string = "".join(secrets.choice(string.ascii_letters + string.digits) for _ in range(8))
|
|
674
|
+
timestamp = int(time.time())
|
|
675
|
+
return f"dcs_view_{timestamp}_{random_string.lower()}".upper()
|
|
676
|
+
|
|
677
|
+
def get_table_foreign_key_info(self, table_name: str, schema: str | None = None):
|
|
678
|
+
schema = schema or self.schema_name
|
|
679
|
+
|
|
680
|
+
query = f"""
|
|
681
|
+
SELECT
|
|
682
|
+
ac.CONSTRAINT_NAME AS constraint_name,
|
|
683
|
+
ac.TABLE_NAME AS table_name,
|
|
684
|
+
acc.COLUMN_NAME AS fk_column,
|
|
685
|
+
r_ac.TABLE_NAME AS referenced_table,
|
|
686
|
+
r_acc.COLUMN_NAME AS referenced_column
|
|
687
|
+
FROM ALL_CONSTRAINTS ac
|
|
688
|
+
JOIN ALL_CONS_COLUMNS acc
|
|
689
|
+
ON ac.CONSTRAINT_NAME = acc.CONSTRAINT_NAME
|
|
690
|
+
AND ac.OWNER = acc.OWNER
|
|
691
|
+
JOIN ALL_CONSTRAINTS r_ac
|
|
692
|
+
ON ac.R_CONSTRAINT_NAME = r_ac.CONSTRAINT_NAME
|
|
693
|
+
AND ac.R_OWNER = r_ac.OWNER
|
|
694
|
+
JOIN ALL_CONS_COLUMNS r_acc
|
|
695
|
+
ON r_ac.CONSTRAINT_NAME = r_acc.CONSTRAINT_NAME
|
|
696
|
+
AND r_ac.OWNER = r_acc.OWNER
|
|
697
|
+
AND acc.POSITION = r_acc.POSITION
|
|
698
|
+
WHERE ac.CONSTRAINT_TYPE = 'R'
|
|
699
|
+
AND ac.TABLE_NAME = '{table_name.upper()}'
|
|
700
|
+
AND ac.OWNER = '{schema.upper()}';
|
|
701
|
+
"""
|
|
702
|
+
|
|
703
|
+
try:
|
|
704
|
+
rows = self.fetchall(query)
|
|
705
|
+
except Exception as e:
|
|
706
|
+
logger.error(f"Failed to fetch fk info for dataset: {table_name} ({e})")
|
|
707
|
+
return []
|
|
708
|
+
|
|
709
|
+
data = [
|
|
710
|
+
{
|
|
711
|
+
"constraint_name": row[0],
|
|
712
|
+
"table_name": row[1],
|
|
713
|
+
"fk_column": row[2],
|
|
714
|
+
"referenced_table": row[3],
|
|
715
|
+
"referenced_column": row[4],
|
|
716
|
+
}
|
|
717
|
+
for row in rows
|
|
718
|
+
]
|
|
719
|
+
return data
|