dcs-sdk 1.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_diff/__init__.py +221 -0
- data_diff/__main__.py +517 -0
- data_diff/abcs/__init__.py +13 -0
- data_diff/abcs/compiler.py +27 -0
- data_diff/abcs/database_types.py +402 -0
- data_diff/config.py +141 -0
- data_diff/databases/__init__.py +38 -0
- data_diff/databases/_connect.py +323 -0
- data_diff/databases/base.py +1417 -0
- data_diff/databases/bigquery.py +376 -0
- data_diff/databases/clickhouse.py +217 -0
- data_diff/databases/databricks.py +262 -0
- data_diff/databases/duckdb.py +207 -0
- data_diff/databases/mssql.py +343 -0
- data_diff/databases/mysql.py +189 -0
- data_diff/databases/oracle.py +238 -0
- data_diff/databases/postgresql.py +293 -0
- data_diff/databases/presto.py +222 -0
- data_diff/databases/redis.py +93 -0
- data_diff/databases/redshift.py +233 -0
- data_diff/databases/snowflake.py +222 -0
- data_diff/databases/sybase.py +720 -0
- data_diff/databases/trino.py +73 -0
- data_diff/databases/vertica.py +174 -0
- data_diff/diff_tables.py +489 -0
- data_diff/errors.py +17 -0
- data_diff/format.py +369 -0
- data_diff/hashdiff_tables.py +1026 -0
- data_diff/info_tree.py +76 -0
- data_diff/joindiff_tables.py +434 -0
- data_diff/lexicographic_space.py +253 -0
- data_diff/parse_time.py +88 -0
- data_diff/py.typed +0 -0
- data_diff/queries/__init__.py +13 -0
- data_diff/queries/api.py +213 -0
- data_diff/queries/ast_classes.py +811 -0
- data_diff/queries/base.py +38 -0
- data_diff/queries/extras.py +43 -0
- data_diff/query_utils.py +70 -0
- data_diff/schema.py +67 -0
- data_diff/table_segment.py +583 -0
- data_diff/thread_utils.py +112 -0
- data_diff/utils.py +1022 -0
- data_diff/version.py +15 -0
- dcs_core/__init__.py +13 -0
- dcs_core/__main__.py +17 -0
- dcs_core/__version__.py +15 -0
- dcs_core/cli/__init__.py +13 -0
- dcs_core/cli/cli.py +165 -0
- dcs_core/core/__init__.py +19 -0
- dcs_core/core/common/__init__.py +13 -0
- dcs_core/core/common/errors.py +50 -0
- dcs_core/core/common/models/__init__.py +13 -0
- dcs_core/core/common/models/configuration.py +284 -0
- dcs_core/core/common/models/dashboard.py +24 -0
- dcs_core/core/common/models/data_source_resource.py +75 -0
- dcs_core/core/common/models/metric.py +160 -0
- dcs_core/core/common/models/profile.py +75 -0
- dcs_core/core/common/models/validation.py +216 -0
- dcs_core/core/common/models/widget.py +44 -0
- dcs_core/core/configuration/__init__.py +13 -0
- dcs_core/core/configuration/config_loader.py +139 -0
- dcs_core/core/configuration/configuration_parser.py +262 -0
- dcs_core/core/configuration/configuration_parser_arc.py +328 -0
- dcs_core/core/datasource/__init__.py +13 -0
- dcs_core/core/datasource/base.py +62 -0
- dcs_core/core/datasource/manager.py +112 -0
- dcs_core/core/datasource/search_datasource.py +421 -0
- dcs_core/core/datasource/sql_datasource.py +1094 -0
- dcs_core/core/inspect.py +163 -0
- dcs_core/core/logger/__init__.py +13 -0
- dcs_core/core/logger/base.py +32 -0
- dcs_core/core/logger/default_logger.py +94 -0
- dcs_core/core/metric/__init__.py +13 -0
- dcs_core/core/metric/base.py +220 -0
- dcs_core/core/metric/combined_metric.py +98 -0
- dcs_core/core/metric/custom_metric.py +34 -0
- dcs_core/core/metric/manager.py +137 -0
- dcs_core/core/metric/numeric_metric.py +403 -0
- dcs_core/core/metric/reliability_metric.py +90 -0
- dcs_core/core/profiling/__init__.py +13 -0
- dcs_core/core/profiling/datasource_profiling.py +136 -0
- dcs_core/core/profiling/numeric_field_profiling.py +72 -0
- dcs_core/core/profiling/text_field_profiling.py +67 -0
- dcs_core/core/repository/__init__.py +13 -0
- dcs_core/core/repository/metric_repository.py +77 -0
- dcs_core/core/utils/__init__.py +13 -0
- dcs_core/core/utils/log.py +29 -0
- dcs_core/core/utils/tracking.py +105 -0
- dcs_core/core/utils/utils.py +44 -0
- dcs_core/core/validation/__init__.py +13 -0
- dcs_core/core/validation/base.py +230 -0
- dcs_core/core/validation/completeness_validation.py +153 -0
- dcs_core/core/validation/custom_query_validation.py +24 -0
- dcs_core/core/validation/manager.py +282 -0
- dcs_core/core/validation/numeric_validation.py +276 -0
- dcs_core/core/validation/reliability_validation.py +91 -0
- dcs_core/core/validation/uniqueness_validation.py +61 -0
- dcs_core/core/validation/validity_validation.py +738 -0
- dcs_core/integrations/__init__.py +13 -0
- dcs_core/integrations/databases/__init__.py +13 -0
- dcs_core/integrations/databases/bigquery.py +187 -0
- dcs_core/integrations/databases/databricks.py +51 -0
- dcs_core/integrations/databases/db2.py +652 -0
- dcs_core/integrations/databases/elasticsearch.py +61 -0
- dcs_core/integrations/databases/mssql.py +829 -0
- dcs_core/integrations/databases/mysql.py +409 -0
- dcs_core/integrations/databases/opensearch.py +64 -0
- dcs_core/integrations/databases/oracle.py +719 -0
- dcs_core/integrations/databases/postgres.py +482 -0
- dcs_core/integrations/databases/redshift.py +53 -0
- dcs_core/integrations/databases/snowflake.py +48 -0
- dcs_core/integrations/databases/spark_df.py +111 -0
- dcs_core/integrations/databases/sybase.py +1069 -0
- dcs_core/integrations/storage/__init__.py +13 -0
- dcs_core/integrations/storage/local_file.py +149 -0
- dcs_core/integrations/utils/__init__.py +13 -0
- dcs_core/integrations/utils/utils.py +36 -0
- dcs_core/report/__init__.py +13 -0
- dcs_core/report/dashboard.py +211 -0
- dcs_core/report/models.py +88 -0
- dcs_core/report/static/assets/fonts/DMSans-Bold.ttf +0 -0
- dcs_core/report/static/assets/fonts/DMSans-Medium.ttf +0 -0
- dcs_core/report/static/assets/fonts/DMSans-Regular.ttf +0 -0
- dcs_core/report/static/assets/fonts/DMSans-SemiBold.ttf +0 -0
- dcs_core/report/static/assets/images/docs.svg +6 -0
- dcs_core/report/static/assets/images/github.svg +4 -0
- dcs_core/report/static/assets/images/logo.svg +7 -0
- dcs_core/report/static/assets/images/slack.svg +13 -0
- dcs_core/report/static/index.js +2 -0
- dcs_core/report/static/index.js.LICENSE.txt +3971 -0
- dcs_sdk/__init__.py +13 -0
- dcs_sdk/__main__.py +18 -0
- dcs_sdk/__version__.py +15 -0
- dcs_sdk/cli/__init__.py +13 -0
- dcs_sdk/cli/cli.py +163 -0
- dcs_sdk/sdk/__init__.py +58 -0
- dcs_sdk/sdk/config/__init__.py +13 -0
- dcs_sdk/sdk/config/config_loader.py +491 -0
- dcs_sdk/sdk/data_diff/__init__.py +13 -0
- dcs_sdk/sdk/data_diff/data_differ.py +821 -0
- dcs_sdk/sdk/rules/__init__.py +15 -0
- dcs_sdk/sdk/rules/rules_mappping.py +31 -0
- dcs_sdk/sdk/rules/rules_repository.py +214 -0
- dcs_sdk/sdk/rules/schema_rules.py +65 -0
- dcs_sdk/sdk/utils/__init__.py +13 -0
- dcs_sdk/sdk/utils/serializer.py +25 -0
- dcs_sdk/sdk/utils/similarity_score/__init__.py +13 -0
- dcs_sdk/sdk/utils/similarity_score/base_provider.py +153 -0
- dcs_sdk/sdk/utils/similarity_score/cosine_similarity_provider.py +39 -0
- dcs_sdk/sdk/utils/similarity_score/jaccard_provider.py +24 -0
- dcs_sdk/sdk/utils/similarity_score/levenshtein_distance_provider.py +31 -0
- dcs_sdk/sdk/utils/table.py +475 -0
- dcs_sdk/sdk/utils/themes.py +40 -0
- dcs_sdk/sdk/utils/utils.py +349 -0
- dcs_sdk-1.6.5.dist-info/METADATA +150 -0
- dcs_sdk-1.6.5.dist-info/RECORD +159 -0
- dcs_sdk-1.6.5.dist-info/WHEEL +4 -0
- dcs_sdk-1.6.5.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1,1069 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import random
|
|
16
|
+
import re
|
|
17
|
+
import time
|
|
18
|
+
from datetime import datetime
|
|
19
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
20
|
+
|
|
21
|
+
import pyodbc
|
|
22
|
+
from loguru import logger
|
|
23
|
+
|
|
24
|
+
from dcs_core.core.common.errors import DataChecksDataSourcesConnectionError
|
|
25
|
+
from dcs_core.core.common.models.data_source_resource import (
|
|
26
|
+
RawColumnInfo,
|
|
27
|
+
SybaseDriverTypes,
|
|
28
|
+
)
|
|
29
|
+
from dcs_core.core.datasource.sql_datasource import SQLDataSource
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class SybaseDataSource(SQLDataSource):
|
|
33
|
+
def __init__(self, data_source_name: str, data_connection: Dict):
|
|
34
|
+
super().__init__(data_source_name, data_connection)
|
|
35
|
+
self.regex_patterns = {
|
|
36
|
+
"uuid": r"%[0-9a-fA-F]%-%[0-9a-fA-F]%-%[0-9a-fA-F]%-%[0-9a-fA-F]%-%[0-9a-fA-F]%",
|
|
37
|
+
"usa_phone": r"%[0-9][0-9][0-9] [0-9][0-9][0-9] [0-9][0-9][0-9][0-9]%",
|
|
38
|
+
"email": r"%[a-zA-Z0-9._%+-]@[a-zA-Z0-9.-]%.[a-zA-Z]%",
|
|
39
|
+
"usa_zip_code": r"[0-9][0-9][0-9][0-9][0-9]%",
|
|
40
|
+
"ssn": r"%[0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9][0-9][0-9]%",
|
|
41
|
+
"sedol": r"[B-DF-HJ-NP-TV-XZ0-9][B-DF-HJ-NP-TV-XZ0-9][B-DF-HJ-NP-TV-XZ0-9][B-DF-HJ-NP-TV-XZ0-9][B-DF-HJ-NP-TV-XZ0-9][B-DF-HJ-NP-TV-XZ0-9][0-9]",
|
|
42
|
+
"lei": r"[A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][0-9][0-9]",
|
|
43
|
+
"cusip": r"[0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z]",
|
|
44
|
+
"figi": r"BBG[A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9]",
|
|
45
|
+
"isin": r"[A-Z][A-Z][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][0-9]",
|
|
46
|
+
"perm_id": r"%[0-9][0-9][0-9][0-9][- ]%[0-9][0-9][0-9][0-9][- ]%[0-9][0-9][0-9][0-9][- ]%[0-9][0-9][0-9][0-9][- ]%[0-9][0-9][0-9]%",
|
|
47
|
+
}
|
|
48
|
+
self.sybase_driver_type = SybaseDriverTypes()
|
|
49
|
+
|
|
50
|
+
def connect(self) -> Any:
|
|
51
|
+
driver = self.data_connection.get("driver") or "FreeTDS"
|
|
52
|
+
host = self.data_connection.get("host") or ""
|
|
53
|
+
server = self.data_connection.get("server") or ""
|
|
54
|
+
port = self.data_connection.get("port", 5000)
|
|
55
|
+
database = self.data_connection.get("database")
|
|
56
|
+
username = self.data_connection.get("username")
|
|
57
|
+
password = self.data_connection.get("password")
|
|
58
|
+
self._detect_driver_type(driver)
|
|
59
|
+
|
|
60
|
+
if self.sybase_driver_type.is_freetds:
|
|
61
|
+
conn_dict = {
|
|
62
|
+
"driver": "FreeTDS",
|
|
63
|
+
"database": database,
|
|
64
|
+
"user": username,
|
|
65
|
+
"password": password,
|
|
66
|
+
"port": port,
|
|
67
|
+
"tds_version": "auto",
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
conn_dict["host"] = host or server
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
logger.debug("Attempting FreeTDS connection")
|
|
74
|
+
self.connection = pyodbc.connect(**conn_dict)
|
|
75
|
+
logger.info("Successfully connected to Sybase using FreeTDS")
|
|
76
|
+
return self.connection
|
|
77
|
+
except Exception as e:
|
|
78
|
+
error_msg = f"Failed to connect to Sybase with FreeTDS: {str(e)}"
|
|
79
|
+
logger.error(error_msg)
|
|
80
|
+
raise DataChecksDataSourcesConnectionError(message=error_msg)
|
|
81
|
+
|
|
82
|
+
base_params = {
|
|
83
|
+
"DRIVER": self._prepare_driver_string(driver),
|
|
84
|
+
"DATABASE": database,
|
|
85
|
+
"UID": username,
|
|
86
|
+
"PWD": password,
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
connection_attempts = []
|
|
90
|
+
if self.sybase_driver_type.is_ase:
|
|
91
|
+
connection_attempts = [
|
|
92
|
+
{
|
|
93
|
+
"key": "SERVER",
|
|
94
|
+
"value": host,
|
|
95
|
+
"port": port,
|
|
96
|
+
}, # ASE typically uses SERVER
|
|
97
|
+
{"key": "SERVERNAME", "value": host, "port": port},
|
|
98
|
+
{
|
|
99
|
+
"key": "HOST",
|
|
100
|
+
"value": f"{host}:{port}",
|
|
101
|
+
"port": None,
|
|
102
|
+
}, # Host:Port format
|
|
103
|
+
]
|
|
104
|
+
else:
|
|
105
|
+
connection_attempts = [
|
|
106
|
+
{"key": "HOST", "value": f"{host}:{port}", "port": None},
|
|
107
|
+
{"key": "HOST", "value": host, "port": port},
|
|
108
|
+
{"key": "SERVER", "value": server, "port": port},
|
|
109
|
+
{"key": "SERVERNAME", "value": server, "port": port},
|
|
110
|
+
]
|
|
111
|
+
|
|
112
|
+
errors = []
|
|
113
|
+
|
|
114
|
+
for attempt in connection_attempts:
|
|
115
|
+
if not attempt["value"]:
|
|
116
|
+
continue
|
|
117
|
+
|
|
118
|
+
conn_dict = base_params.copy()
|
|
119
|
+
conn_dict[attempt["key"]] = attempt["value"]
|
|
120
|
+
|
|
121
|
+
# Handle port configuration
|
|
122
|
+
if attempt["port"] is not None:
|
|
123
|
+
port_configs = [
|
|
124
|
+
{"PORT": attempt["port"]},
|
|
125
|
+
{"Server port": attempt["port"]},
|
|
126
|
+
{}, # Try without explicit port
|
|
127
|
+
]
|
|
128
|
+
else:
|
|
129
|
+
port_configs = [{}] # Port is already in the host string
|
|
130
|
+
|
|
131
|
+
for port_config in port_configs:
|
|
132
|
+
current_config = conn_dict.copy()
|
|
133
|
+
current_config.update(port_config)
|
|
134
|
+
|
|
135
|
+
# Add ASE-specific parameters if driver is ASE
|
|
136
|
+
if self.sybase_driver_type.is_ase:
|
|
137
|
+
ase_configs = [
|
|
138
|
+
{}, # Basic config
|
|
139
|
+
{"NetworkAddress": f"{host},{port}"}, # Alternative format
|
|
140
|
+
{"ServerName": host}, # Another common ASE parameter
|
|
141
|
+
]
|
|
142
|
+
else:
|
|
143
|
+
ase_configs = [{}]
|
|
144
|
+
|
|
145
|
+
for ase_config in ase_configs:
|
|
146
|
+
final_config = current_config.copy()
|
|
147
|
+
final_config.update(ase_config)
|
|
148
|
+
|
|
149
|
+
try:
|
|
150
|
+
logger.debug("Attempting connection ...")
|
|
151
|
+
self.connection = pyodbc.connect(**final_config)
|
|
152
|
+
logger.info("Successfully connected to Sybase using: " f"driver={driver}")
|
|
153
|
+
return self.connection
|
|
154
|
+
except Exception as e:
|
|
155
|
+
error_msg = "Failed to connect to sybase."
|
|
156
|
+
logger.debug(error_msg)
|
|
157
|
+
errors.append(error_msg)
|
|
158
|
+
continue
|
|
159
|
+
|
|
160
|
+
raise DataChecksDataSourcesConnectionError(
|
|
161
|
+
message=f"Failed to connect to Sybase data source with driver {driver}: " f"[{'; '.join(errors)}]"
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
def _build_base_connection_params(self, driver: str, database: str, username: str, password: str) -> Dict[str, Any]:
|
|
165
|
+
"""Build base connection parameters dictionary."""
|
|
166
|
+
return {
|
|
167
|
+
"DRIVER": self._prepare_driver_string(driver),
|
|
168
|
+
"DATABASE": database,
|
|
169
|
+
"UID": username,
|
|
170
|
+
"PWD": password,
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
def _normalize_driver(self, driver: str) -> str:
|
|
174
|
+
"""Normalize driver string by removing braces, spaces, and converting to lowercase."""
|
|
175
|
+
return driver.replace("{", "").replace("}", "").replace(" ", "").strip().lower()
|
|
176
|
+
|
|
177
|
+
def _detect_driver_type(self, driver: str) -> None:
|
|
178
|
+
"""Detect and set the appropriate driver type."""
|
|
179
|
+
normalized_driver = self._normalize_driver(driver)
|
|
180
|
+
self.sybase_driver_type.is_ase = "adaptive" in normalized_driver
|
|
181
|
+
self.sybase_driver_type.is_iq = "iq" in normalized_driver
|
|
182
|
+
self.sybase_driver_type.is_freetds = "freetds" in normalized_driver
|
|
183
|
+
|
|
184
|
+
def _prepare_driver_string(self, driver: str) -> str:
|
|
185
|
+
"""Ensure driver string is properly formatted with braces."""
|
|
186
|
+
return f"{{{driver}}}" if not driver.startswith("{") else driver
|
|
187
|
+
|
|
188
|
+
def fetchall(self, query):
|
|
189
|
+
return self.connection.cursor().execute(query).fetchall()
|
|
190
|
+
|
|
191
|
+
def fetchone(self, query):
|
|
192
|
+
return self.connection.cursor().execute(query).fetchone()
|
|
193
|
+
|
|
194
|
+
def qualified_table_name(self, table_name: str) -> str:
|
|
195
|
+
"""
|
|
196
|
+
Get the qualified table name
|
|
197
|
+
:param table_name: name of the table
|
|
198
|
+
:return: qualified table name
|
|
199
|
+
"""
|
|
200
|
+
if self.schema_name:
|
|
201
|
+
return f"[{self.schema_name}].[{table_name}]"
|
|
202
|
+
return f"[{table_name}]"
|
|
203
|
+
|
|
204
|
+
def quote_column(self, column: str) -> str:
|
|
205
|
+
"""
|
|
206
|
+
Quote the column name
|
|
207
|
+
:param column: name of the column
|
|
208
|
+
:return: quoted column name
|
|
209
|
+
"""
|
|
210
|
+
return f"[{column}]"
|
|
211
|
+
|
|
212
|
+
def query_get_row_count(self, table: str, filters: str = None) -> int:
|
|
213
|
+
"""
|
|
214
|
+
Get the row count
|
|
215
|
+
:param table: name of the table
|
|
216
|
+
:param filters: optional filter
|
|
217
|
+
"""
|
|
218
|
+
qualified_table_name = self.qualified_table_name(table)
|
|
219
|
+
query = f"SELECT COUNT(*) FROM {qualified_table_name}"
|
|
220
|
+
if filters:
|
|
221
|
+
query += f" WHERE {filters}"
|
|
222
|
+
return self.fetchone(query)[0]
|
|
223
|
+
|
|
224
|
+
def query_get_table_columns(self, table: str, schema: str | None = None) -> RawColumnInfo:
|
|
225
|
+
"""
|
|
226
|
+
Get the schema of a table.
|
|
227
|
+
:param table: table name
|
|
228
|
+
:return: RawColumnInfo object containing column information
|
|
229
|
+
"""
|
|
230
|
+
schema = schema or self.schema_name
|
|
231
|
+
database = self.database
|
|
232
|
+
rows = None
|
|
233
|
+
if self.sybase_driver_type.is_iq:
|
|
234
|
+
query = (
|
|
235
|
+
f"SELECT c.column_name, d.domain_name AS data_type, "
|
|
236
|
+
f"CASE WHEN d.domain_name IN ('DATE', 'TIME', 'TIMESTAMP') THEN c.scale ELSE NULL END AS datetime_precision, "
|
|
237
|
+
f"CASE WHEN t.name IN ('float') THEN 15 WHEN t.name IN ('real') THEN 7 ELSE c.prec END AS numeric_precision, "
|
|
238
|
+
f"CASE WHEN t.name IN ('float', 'real') THEN NULL ELSE c.scale END AS numeric_scale, "
|
|
239
|
+
f"NULL AS collation_name, c.width AS character_maximum_length "
|
|
240
|
+
f"FROM {database}.SYS.SYSTABLE t "
|
|
241
|
+
f"JOIN {database}.SYS.SYSCOLUMN c ON t.table_id = c.table_id "
|
|
242
|
+
f"JOIN {database}.SYS.SYSDOMAIN d ON c.domain_id = d.domain_id "
|
|
243
|
+
f"JOIN {database}.SYS.SYSUSER u ON t.creator = u.user_id "
|
|
244
|
+
f"WHERE t.table_name = '{table}' "
|
|
245
|
+
f"AND u.user_name = '{schema}'"
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
elif self.sybase_driver_type.is_ase:
|
|
249
|
+
query = (
|
|
250
|
+
f"SELECT c.name AS column_name, t.name AS data_type, "
|
|
251
|
+
f"CASE WHEN c.type IN (61, 111) THEN c.prec ELSE NULL END AS datetime_precision, "
|
|
252
|
+
f"CASE WHEN t.name IN ('float') THEN 15 WHEN t.name IN ('real') THEN 7 ELSE c.prec END AS numeric_precision, "
|
|
253
|
+
f"CASE WHEN t.name IN ('float', 'real') THEN NULL ELSE c.scale END AS numeric_scale, "
|
|
254
|
+
f"NULL AS collation_name, c.length AS character_maximum_length "
|
|
255
|
+
f"FROM {database}..sysobjects o "
|
|
256
|
+
f"JOIN {database}..syscolumns c ON o.id = c.id "
|
|
257
|
+
f"JOIN {database}..systypes t ON c.usertype = t.usertype "
|
|
258
|
+
f"JOIN {database}..sysusers u ON o.uid = u.uid "
|
|
259
|
+
f"WHERE o.name = '{table}' "
|
|
260
|
+
f"AND u.name = '{schema}'"
|
|
261
|
+
)
|
|
262
|
+
elif self.sybase_driver_type.is_freetds:
|
|
263
|
+
try:
|
|
264
|
+
ase_query = (
|
|
265
|
+
f"SELECT c.name AS column_name, t.name AS data_type, "
|
|
266
|
+
f"CASE WHEN c.type IN (61, 111) THEN c.prec ELSE NULL END AS datetime_precision, "
|
|
267
|
+
f"CASE WHEN t.name IN ('float') THEN 15 WHEN t.name IN ('real') THEN 7 ELSE c.prec END AS numeric_precision, "
|
|
268
|
+
f"CASE WHEN t.name IN ('float', 'real') THEN NULL ELSE c.scale END AS numeric_scale, "
|
|
269
|
+
f"NULL AS collation_name, c.length AS character_maximum_length "
|
|
270
|
+
f"FROM {database}..sysobjects o "
|
|
271
|
+
f"JOIN {database}..syscolumns c ON o.id = c.id "
|
|
272
|
+
f"JOIN {database}..systypes t ON c.usertype = t.usertype "
|
|
273
|
+
f"JOIN {database}..sysusers u ON o.uid = u.uid "
|
|
274
|
+
f"WHERE o.name = '{table}' "
|
|
275
|
+
f"AND u.name = '{schema}'"
|
|
276
|
+
)
|
|
277
|
+
rows = self.fetchall(ase_query)
|
|
278
|
+
|
|
279
|
+
except Exception as _:
|
|
280
|
+
iq_query = (
|
|
281
|
+
f"SELECT c.name AS column_name, t.name AS data_type, "
|
|
282
|
+
f"CASE WHEN c.type IN (61, 111) THEN c.prec ELSE NULL END AS datetime_precision, "
|
|
283
|
+
f"CASE WHEN t.name IN ('float') THEN 15 WHEN t.name IN ('real') THEN 7 ELSE c.prec END AS numeric_precision, "
|
|
284
|
+
f"CASE WHEN t.name IN ('float', 'real') THEN NULL ELSE c.scale END AS numeric_scale, "
|
|
285
|
+
f"NULL AS collation_name, c.length AS character_maximum_length "
|
|
286
|
+
f"FROM {database}.dbo.sysobjects o "
|
|
287
|
+
f"JOIN {database}.dbo.syscolumns c ON o.id = c.id "
|
|
288
|
+
f"JOIN {database}.dbo.systypes t ON c.usertype = t.usertype "
|
|
289
|
+
f"JOIN {database}.dbo.sysusers u ON o.uid = u.uid "
|
|
290
|
+
f"WHERE o.name = '{table}' AND u.name = '{schema}'"
|
|
291
|
+
)
|
|
292
|
+
rows = self.fetchall(iq_query)
|
|
293
|
+
else:
|
|
294
|
+
raise ValueError("Unknown Sybase driver type")
|
|
295
|
+
if not rows:
|
|
296
|
+
rows = self.fetchall(query)
|
|
297
|
+
if not rows:
|
|
298
|
+
raise RuntimeError(f"{table}: Table, {schema}: Schema, does not exist, or has no columns")
|
|
299
|
+
|
|
300
|
+
column_info = {
|
|
301
|
+
r[0]: RawColumnInfo(
|
|
302
|
+
column_name=self.safe_get(r, 0),
|
|
303
|
+
data_type=self.safe_get(r, 1),
|
|
304
|
+
datetime_precision=self.safe_get(r, 2),
|
|
305
|
+
numeric_precision=self.safe_get(r, 3),
|
|
306
|
+
numeric_scale=self.safe_get(r, 4),
|
|
307
|
+
collation_name=self.safe_get(r, 5),
|
|
308
|
+
character_maximum_length=self.safe_get(r, 6),
|
|
309
|
+
)
|
|
310
|
+
for r in rows
|
|
311
|
+
}
|
|
312
|
+
return column_info
|
|
313
|
+
|
|
314
|
+
def query_get_table_indexes(self, table: str, schema: str | None = None) -> dict[str, dict]:
|
|
315
|
+
"""
|
|
316
|
+
Get index information for a table in Sybase (IQ/ASE/FreeTDS).
|
|
317
|
+
:param table: Table name
|
|
318
|
+
:param schema: Optional schema name
|
|
319
|
+
:return: Dictionary with index details
|
|
320
|
+
"""
|
|
321
|
+
schema = schema or self.schema_name
|
|
322
|
+
database = self.database
|
|
323
|
+
rows = None
|
|
324
|
+
|
|
325
|
+
if self.sybase_driver_type.is_iq:
|
|
326
|
+
query = (
|
|
327
|
+
"SELECT\n"
|
|
328
|
+
" t.table_name,\n"
|
|
329
|
+
" i.index_name,\n"
|
|
330
|
+
" i.index_type,\n"
|
|
331
|
+
" c.column_name,\n"
|
|
332
|
+
" ic.sequence AS column_order\n"
|
|
333
|
+
"FROM\n"
|
|
334
|
+
f" {database}.sys.systable t\n"
|
|
335
|
+
"JOIN\n"
|
|
336
|
+
f" {database}.sys.sysindex i ON t.table_id = i.table_id\n"
|
|
337
|
+
"JOIN\n"
|
|
338
|
+
f" {database}.sys.sysixcol ic ON i.index_id = ic.index_id AND i.table_id = ic.table_id\n"
|
|
339
|
+
"JOIN\n"
|
|
340
|
+
f" {database}.sys.syscolumn c ON ic.column_id = c.column_id AND ic.table_id = c.table_id\n"
|
|
341
|
+
"JOIN\n"
|
|
342
|
+
f" {database}.sys.sysuser u ON t.creator = u.user_id\n"
|
|
343
|
+
"WHERE\n"
|
|
344
|
+
" t.table_type = 'BASE'\n"
|
|
345
|
+
f" AND t.table_name = '{table}'\n"
|
|
346
|
+
f" AND u.user_name = '{schema}'\n"
|
|
347
|
+
" AND i.index_name IS NOT NULL\n"
|
|
348
|
+
"ORDER BY\n"
|
|
349
|
+
" i.index_name, ic.sequence"
|
|
350
|
+
)
|
|
351
|
+
rows = self.fetchall(query)
|
|
352
|
+
elif self.sybase_driver_type.is_ase:
|
|
353
|
+
query = (
|
|
354
|
+
"SELECT\n"
|
|
355
|
+
" t.name AS table_name,\n"
|
|
356
|
+
" i.name AS index_name,\n"
|
|
357
|
+
" CASE \n"
|
|
358
|
+
" WHEN i.indid = 1 THEN 'CLUSTERED'\n"
|
|
359
|
+
" WHEN i.indid > 1 AND i.status & 2048 = 2048 THEN 'UNIQUE'\n"
|
|
360
|
+
" ELSE 'NONCLUSTERED'\n"
|
|
361
|
+
" END AS index_type,\n"
|
|
362
|
+
" c.name AS column_name,\n"
|
|
363
|
+
" ic.keyno AS column_order\n"
|
|
364
|
+
"FROM\n"
|
|
365
|
+
" sysobjects t\n"
|
|
366
|
+
"JOIN\n"
|
|
367
|
+
" sysindexes i ON t.id = i.id\n"
|
|
368
|
+
"JOIN\n"
|
|
369
|
+
" sysindexkeys ic ON i.id = ic.id AND i.indid = ic.indid\n"
|
|
370
|
+
"JOIN\n"
|
|
371
|
+
" syscolumns c ON ic.id = c.id AND ic.colid = c.colid\n"
|
|
372
|
+
"JOIN\n"
|
|
373
|
+
" sysusers u ON t.uid = u.uid\n"
|
|
374
|
+
"WHERE\n"
|
|
375
|
+
" t.type = 'U'\n"
|
|
376
|
+
f" AND t.name = '{table}'\n"
|
|
377
|
+
f" AND u.name = '{schema}'\n"
|
|
378
|
+
" AND i.name IS NOT NULL\n"
|
|
379
|
+
"ORDER BY\n"
|
|
380
|
+
" i.name, ic.keyno"
|
|
381
|
+
)
|
|
382
|
+
rows = self.fetchall(query)
|
|
383
|
+
|
|
384
|
+
elif self.sybase_driver_type.is_freetds:
|
|
385
|
+
try:
|
|
386
|
+
# Try ASE-compatible query
|
|
387
|
+
ase_query = (
|
|
388
|
+
f"SELECT\n"
|
|
389
|
+
f" o.name AS table_name,\n"
|
|
390
|
+
f" i.name AS index_name,\n"
|
|
391
|
+
f" CASE\n"
|
|
392
|
+
f" WHEN i.indid = 1 THEN 'CLUSTERED'\n"
|
|
393
|
+
f" ELSE 'NONCLUSTERED'\n"
|
|
394
|
+
f" END AS index_type,\n"
|
|
395
|
+
f" index_col(o.name, i.indid, c.colid, o.uid) AS column_name,\n"
|
|
396
|
+
f" c.colid AS column_order\n"
|
|
397
|
+
f"FROM\n"
|
|
398
|
+
f" sysobjects o\n"
|
|
399
|
+
f"JOIN\n"
|
|
400
|
+
f" sysindexes i ON o.id = i.id\n"
|
|
401
|
+
f"JOIN\n"
|
|
402
|
+
f" syscolumns c ON c.id = o.id\n"
|
|
403
|
+
f"WHERE\n"
|
|
404
|
+
f" o.type = 'U'\n"
|
|
405
|
+
f" AND o.name = '{table}'\n"
|
|
406
|
+
f" AND user_name(o.uid) = '{schema}'\n"
|
|
407
|
+
f" AND i.name IS NOT NULL\n"
|
|
408
|
+
f" AND index_col(o.name, i.indid, c.colid, o.uid) IS NOT NULL\n"
|
|
409
|
+
f"ORDER BY\n"
|
|
410
|
+
f" i.name, c.colid\n"
|
|
411
|
+
)
|
|
412
|
+
rows = self.fetchall(ase_query)
|
|
413
|
+
except Exception as e:
|
|
414
|
+
# Fallback to IQ-style query
|
|
415
|
+
iq_query = (
|
|
416
|
+
"SELECT\n"
|
|
417
|
+
" t.table_name,\n"
|
|
418
|
+
" i.index_name,\n"
|
|
419
|
+
" i.index_type,\n"
|
|
420
|
+
" c.column_name,\n"
|
|
421
|
+
" ic.sequence AS column_order\n"
|
|
422
|
+
"FROM\n"
|
|
423
|
+
f" {database}.sys.systable t\n"
|
|
424
|
+
"JOIN\n"
|
|
425
|
+
f" {database}.sys.sysindex i ON t.table_id = i.table_id\n"
|
|
426
|
+
"JOIN\n"
|
|
427
|
+
f" {database}.sys.sysixcol ic ON i.index_id = ic.index_id AND i.table_id = ic.table_id\n"
|
|
428
|
+
"JOIN\n"
|
|
429
|
+
f" {database}.sys.syscolumn c ON ic.column_id = c.column_id AND ic.table_id = c.table_id\n"
|
|
430
|
+
"JOIN\n"
|
|
431
|
+
f" {database}.sys.sysuser u ON t.creator = u.user_id\n"
|
|
432
|
+
"WHERE\n"
|
|
433
|
+
" t.table_type = 'BASE'\n"
|
|
434
|
+
f" AND t.table_name = '{table}'\n"
|
|
435
|
+
f" AND u.user_name = '{schema}'\n"
|
|
436
|
+
" AND i.index_name IS NOT NULL\n"
|
|
437
|
+
"ORDER BY\n"
|
|
438
|
+
" i.index_name, ic.sequence"
|
|
439
|
+
)
|
|
440
|
+
rows = self.fetchall(iq_query)
|
|
441
|
+
|
|
442
|
+
else:
|
|
443
|
+
raise ValueError("Unknown Sybase driver type")
|
|
444
|
+
|
|
445
|
+
if not rows:
|
|
446
|
+
raise RuntimeError(f"No index information found for table '{table}' in schema '{schema}'.")
|
|
447
|
+
|
|
448
|
+
# Primary key extraction
|
|
449
|
+
pk_columns = []
|
|
450
|
+
if self.sybase_driver_type.is_iq:
|
|
451
|
+
pk_sql = f"sp_iqpkeys {table}, NULL, {schema}"
|
|
452
|
+
pk_rows = self.fetchall(pk_sql)
|
|
453
|
+
if pk_rows:
|
|
454
|
+
raw_columns = pk_rows[0][2]
|
|
455
|
+
pk_columns = [col.strip() for col in raw_columns.split(",")]
|
|
456
|
+
elif self.sybase_driver_type.is_ase:
|
|
457
|
+
pk_sql = (
|
|
458
|
+
"SELECT c.name "
|
|
459
|
+
"FROM sysobjects t "
|
|
460
|
+
"JOIN sysindexes i ON t.id = i.id "
|
|
461
|
+
"JOIN sysindexkeys ic ON i.id = ic.id AND i.indid = ic.indid "
|
|
462
|
+
"JOIN syscolumns c ON ic.id = c.id AND ic.colid = c.colid "
|
|
463
|
+
"JOIN sysusers u ON t.uid = u.uid "
|
|
464
|
+
f"WHERE t.type = 'U' AND t.name = '{table}' AND u.name = '{schema}' "
|
|
465
|
+
"AND i.status & 2 = 2 "
|
|
466
|
+
"ORDER BY ic.keyno"
|
|
467
|
+
)
|
|
468
|
+
pk_rows = self.fetchall(pk_sql)
|
|
469
|
+
pk_columns = [row[0].strip() for row in pk_rows] if pk_rows else []
|
|
470
|
+
elif self.sybase_driver_type.is_freetds:
|
|
471
|
+
try:
|
|
472
|
+
self.connection.autocommit = True
|
|
473
|
+
pk_sql = f"EXEC sp_pkeys @table_name = '{table}', @table_owner = '{schema}'"
|
|
474
|
+
pk_rows = self.fetchall(pk_sql)
|
|
475
|
+
pk_columns = [row[3].strip() for row in pk_rows] if pk_rows else []
|
|
476
|
+
except Exception as e:
|
|
477
|
+
pk_sql = f"sp_iqpkeys {table}, NULL, {schema}"
|
|
478
|
+
pk_rows = self.fetchall(pk_sql)
|
|
479
|
+
if pk_rows:
|
|
480
|
+
raw_columns = pk_rows[0][2]
|
|
481
|
+
pk_columns = [col.strip() for col in raw_columns.split(",")]
|
|
482
|
+
else:
|
|
483
|
+
raise ValueError("Unknown Sybase driver type")
|
|
484
|
+
|
|
485
|
+
pk_columns_set = set(pk_columns)
|
|
486
|
+
|
|
487
|
+
indexes = {}
|
|
488
|
+
for row in rows:
|
|
489
|
+
index_name = row[1]
|
|
490
|
+
index_type = row[2]
|
|
491
|
+
column_info = {
|
|
492
|
+
"column_name": self.safe_get(row, 3),
|
|
493
|
+
"column_order": self.safe_get(row, 4),
|
|
494
|
+
}
|
|
495
|
+
if index_name not in indexes:
|
|
496
|
+
indexes[index_name] = {"columns": [], "index_type": index_type}
|
|
497
|
+
indexes[index_name]["columns"].append(column_info)
|
|
498
|
+
|
|
499
|
+
for index_name, idx in indexes.items():
|
|
500
|
+
index_columns = [col["column_name"].strip() for col in idx["columns"]]
|
|
501
|
+
index_columns_set = set(index_columns)
|
|
502
|
+
idx["is_primary_key"] = pk_columns_set == index_columns_set and len(index_columns) == len(pk_columns)
|
|
503
|
+
|
|
504
|
+
return indexes
|
|
505
|
+
|
|
506
|
+
def query_get_table_names(
|
|
507
|
+
self,
|
|
508
|
+
schema: str | None = None,
|
|
509
|
+
with_view: bool = False,
|
|
510
|
+
) -> dict:
|
|
511
|
+
"""
|
|
512
|
+
Get the list of tables in the database.
|
|
513
|
+
:param schema: optional schema name
|
|
514
|
+
:param with_view: whether to include views
|
|
515
|
+
:return: dictionary with table names and optionally view names
|
|
516
|
+
"""
|
|
517
|
+
schema = schema or self.schema_name
|
|
518
|
+
database = self.database
|
|
519
|
+
if with_view:
|
|
520
|
+
type_condition = "IN ('U', 'V')"
|
|
521
|
+
else:
|
|
522
|
+
type_condition = "= 'U'"
|
|
523
|
+
|
|
524
|
+
if self.sybase_driver_type.is_iq:
|
|
525
|
+
table_type_condition = "table_type IN ('BASE', 'VIEW')" if with_view else "table_type = 'BASE'"
|
|
526
|
+
query = f"SELECT table_name, table_type FROM {database}.SYS.SYSTABLE WHERE creator = USER_ID('{schema}') AND {table_type_condition}"
|
|
527
|
+
elif self.sybase_driver_type.is_ase:
|
|
528
|
+
query = f"SELECT name AS table_name, type FROM {database}..sysobjects WHERE type {type_condition} AND uid = USER_ID('{schema}')"
|
|
529
|
+
elif self.sybase_driver_type.is_freetds:
|
|
530
|
+
query = f"SELECT name AS table_name, type FROM {database}.dbo.sysobjects WHERE type {type_condition} AND uid = USER_ID('{schema}')"
|
|
531
|
+
else:
|
|
532
|
+
raise ValueError("Unknown Sybase driver type")
|
|
533
|
+
|
|
534
|
+
rows = self.fetchall(query)
|
|
535
|
+
|
|
536
|
+
if with_view:
|
|
537
|
+
result = {"table": [], "view": []}
|
|
538
|
+
if rows:
|
|
539
|
+
for row in rows:
|
|
540
|
+
table_name = row[0]
|
|
541
|
+
table_type = row[1].strip() if row[1] else row[1]
|
|
542
|
+
|
|
543
|
+
if self.sybase_driver_type.is_iq:
|
|
544
|
+
if table_type == "BASE":
|
|
545
|
+
result["table"].append(table_name)
|
|
546
|
+
elif table_type == "VIEW":
|
|
547
|
+
result["view"].append(table_name)
|
|
548
|
+
else: # ASE or FreeTDS
|
|
549
|
+
if table_type == "U":
|
|
550
|
+
result["table"].append(table_name)
|
|
551
|
+
elif table_type == "V":
|
|
552
|
+
result["view"].append(table_name)
|
|
553
|
+
else:
|
|
554
|
+
result = {"table": []}
|
|
555
|
+
if rows:
|
|
556
|
+
result["table"] = [row[0] for row in rows]
|
|
557
|
+
|
|
558
|
+
return result
|
|
559
|
+
|
|
560
|
+
def fetch_rows(
|
|
561
|
+
self,
|
|
562
|
+
query: str,
|
|
563
|
+
limit: int = 1,
|
|
564
|
+
with_column_names: bool = False,
|
|
565
|
+
complete_query: Optional[str] = None,
|
|
566
|
+
) -> Tuple[List, Optional[List[str]]]:
|
|
567
|
+
"""
|
|
568
|
+
Fetch rows from the database using pyodbc.
|
|
569
|
+
|
|
570
|
+
:param query: SQL query to execute.
|
|
571
|
+
:param limit: Number of rows to fetch.
|
|
572
|
+
:param with_column_names: Whether to include column names in the result.
|
|
573
|
+
:return: Tuple of (rows, column_names or None)
|
|
574
|
+
"""
|
|
575
|
+
query = complete_query or f"SELECT TOP {limit} * FROM ({query}) AS subquery"
|
|
576
|
+
cursor = self.connection.cursor()
|
|
577
|
+
cursor.execute(query)
|
|
578
|
+
rows = cursor.fetchmany(limit)
|
|
579
|
+
|
|
580
|
+
if with_column_names:
|
|
581
|
+
column_names = [column[0] for column in cursor.description]
|
|
582
|
+
return rows, column_names
|
|
583
|
+
else:
|
|
584
|
+
return rows, None
|
|
585
|
+
|
|
586
|
+
def fetch_sample_values_from_database(
|
|
587
|
+
self,
|
|
588
|
+
table_name: str,
|
|
589
|
+
column_names: list[str],
|
|
590
|
+
limit: int = 5,
|
|
591
|
+
) -> list[Tuple]:
|
|
592
|
+
table_name = self.qualified_table_name(table_name)
|
|
593
|
+
if not column_names:
|
|
594
|
+
raise ValueError("At least one column name must be provided")
|
|
595
|
+
columns = ", ".join([self.quote_column(col) for col in column_names])
|
|
596
|
+
query = f"SELECT TOP {limit} {columns} FROM {table_name}"
|
|
597
|
+
cursor = self.connection.cursor()
|
|
598
|
+
cursor.execute(query)
|
|
599
|
+
rows = cursor.fetchmany(limit)
|
|
600
|
+
return rows
|
|
601
|
+
|
|
602
|
+
def convert_regex_to_sybase_pattern(self, regex_pattern: str) -> str:
|
|
603
|
+
"""
|
|
604
|
+
Convert a regex pattern into a Sybase-compatible LIKE pattern.
|
|
605
|
+
"""
|
|
606
|
+
sybase_pattern = re.sub(r"([%_])", r"[\1]", regex_pattern)
|
|
607
|
+
|
|
608
|
+
sybase_pattern = sybase_pattern.replace(".*", "%")
|
|
609
|
+
sybase_pattern = sybase_pattern.replace(".", "_")
|
|
610
|
+
sybase_pattern = sybase_pattern.replace(".+", "_%")
|
|
611
|
+
|
|
612
|
+
sybase_pattern = sybase_pattern.replace("?", "_")
|
|
613
|
+
|
|
614
|
+
sybase_pattern = re.sub(r"\[([^\]]+)\]", lambda m: f"%[{m.group(1)}]%", sybase_pattern)
|
|
615
|
+
|
|
616
|
+
sybase_pattern = sybase_pattern.lstrip("^").rstrip("$")
|
|
617
|
+
|
|
618
|
+
return sybase_pattern
|
|
619
|
+
|
|
620
|
+
def query_valid_invalid_values_validity(
|
|
621
|
+
self,
|
|
622
|
+
table: str,
|
|
623
|
+
field: str,
|
|
624
|
+
regex_pattern: str = None,
|
|
625
|
+
filters: str = None,
|
|
626
|
+
values: List[str] = None,
|
|
627
|
+
) -> Tuple[int, int]:
|
|
628
|
+
"""
|
|
629
|
+
Get the count of valid and invalid values
|
|
630
|
+
:param table: table name
|
|
631
|
+
:param field: column name
|
|
632
|
+
:param values: list of valid values
|
|
633
|
+
:param regex_pattern: regex pattern
|
|
634
|
+
:param filters: filter condition
|
|
635
|
+
:return: count of valid/invalid values and total count of valid/invalid values
|
|
636
|
+
"""
|
|
637
|
+
filters = f"WHERE {filters}" if filters else ""
|
|
638
|
+
qualified_table_name = self.qualified_table_name(table)
|
|
639
|
+
field = self.quote_column(field)
|
|
640
|
+
if values:
|
|
641
|
+
values_str = ", ".join([f"'{value}'" for value in values])
|
|
642
|
+
validation_query = f"CASE WHEN {field} IN ({values_str}) THEN 1 ELSE 0 END"
|
|
643
|
+
else:
|
|
644
|
+
sybase_pattern = self.convert_regex_to_sybase_pattern(regex_pattern)
|
|
645
|
+
validation_query = f"CASE WHEN {field} LIKE '{sybase_pattern}' THEN 1 ELSE 0 END"
|
|
646
|
+
|
|
647
|
+
query = f"""
|
|
648
|
+
SELECT SUM({validation_query}) AS valid_count, COUNT(*) as total_count
|
|
649
|
+
FROM {qualified_table_name}
|
|
650
|
+
{filters}
|
|
651
|
+
"""
|
|
652
|
+
result = self.fetchone(query)
|
|
653
|
+
return result[0], result[1]
|
|
654
|
+
|
|
655
|
+
def query_get_percentile(self, table: str, field: str, percentile: float, filters: str = None) -> float:
|
|
656
|
+
raise NotImplementedError("Method not implemented for Sybase data source")
|
|
657
|
+
|
|
658
|
+
def query_get_all_space_count(
|
|
659
|
+
self, table: str, field: str, operation: str, filters: str = None
|
|
660
|
+
) -> Union[int, float]:
|
|
661
|
+
"""
|
|
662
|
+
Get the count of rows where the specified column contains only spaces.
|
|
663
|
+
:param table: table name
|
|
664
|
+
:param field: column name
|
|
665
|
+
:param filters: filter condition
|
|
666
|
+
:return: count of rows with only spaces
|
|
667
|
+
"""
|
|
668
|
+
qualified_table_name = self.qualified_table_name(table)
|
|
669
|
+
field = self.quote_column(field)
|
|
670
|
+
|
|
671
|
+
query = f"""
|
|
672
|
+
SELECT COUNT(*) AS space_count
|
|
673
|
+
FROM {qualified_table_name}
|
|
674
|
+
WHERE {field} LIKE '% %' OR {field} LIKE '%' + CHAR(160) + '%'
|
|
675
|
+
"""
|
|
676
|
+
|
|
677
|
+
if filters:
|
|
678
|
+
query += f" AND {filters}"
|
|
679
|
+
|
|
680
|
+
total_query = f"SELECT COUNT(*) AS total_count FROM {qualified_table_name}"
|
|
681
|
+
if filters:
|
|
682
|
+
total_query += f" WHERE {filters}"
|
|
683
|
+
|
|
684
|
+
space_count = self.fetchone(query)[0]
|
|
685
|
+
total_count = self.fetchone(total_query)[0]
|
|
686
|
+
|
|
687
|
+
if operation == "percent":
|
|
688
|
+
return round((space_count / total_count) * 100, 2) if total_count > 0 else 0
|
|
689
|
+
|
|
690
|
+
return space_count if space_count is not None else 0
|
|
691
|
+
|
|
692
|
+
def query_get_null_keyword_count(
|
|
693
|
+
self, table: str, field: str, operation: str, filters: str = None
|
|
694
|
+
) -> Union[int, float]:
|
|
695
|
+
"""
|
|
696
|
+
Get the count of NULL-like values (specific keywords) in the specified column.
|
|
697
|
+
:param table: table name
|
|
698
|
+
:param field: column name
|
|
699
|
+
:param filters: filter condition
|
|
700
|
+
:return: count of NULL-like keyword values
|
|
701
|
+
"""
|
|
702
|
+
qualified_table_name = self.qualified_table_name(table)
|
|
703
|
+
field = self.quote_column(field)
|
|
704
|
+
|
|
705
|
+
# Query that checks for both NULL and specific NULL-like values
|
|
706
|
+
query = f"""
|
|
707
|
+
SELECT SUM(CASE
|
|
708
|
+
WHEN {field} IS NULL OR LOWER({field}) IN ('nothing', 'nil', 'null', 'none', 'n/a')
|
|
709
|
+
THEN 1
|
|
710
|
+
ELSE 0
|
|
711
|
+
END) AS null_count, COUNT(*) AS total_count
|
|
712
|
+
FROM {qualified_table_name}
|
|
713
|
+
"""
|
|
714
|
+
if filters:
|
|
715
|
+
query += f" WHERE {filters}"
|
|
716
|
+
|
|
717
|
+
result = self.fetchone(query)
|
|
718
|
+
|
|
719
|
+
if result:
|
|
720
|
+
if operation == "percent":
|
|
721
|
+
return round((result[0] / result[1]) * 100, 2) if result[1] > 0 else 0
|
|
722
|
+
return result[0]
|
|
723
|
+
|
|
724
|
+
return 0
|
|
725
|
+
|
|
726
|
+
def query_get_string_length_metric(
|
|
727
|
+
self, table: str, field: str, metric: str, filters: str = None
|
|
728
|
+
) -> Union[int, float]:
|
|
729
|
+
"""
|
|
730
|
+
Get the string length metric (max, min, avg) in a column of a table.
|
|
731
|
+
|
|
732
|
+
:param table: table name
|
|
733
|
+
:param field: column name
|
|
734
|
+
:param metric: the metric to calculate ('max', 'min', 'avg')
|
|
735
|
+
:param filters: filter condition
|
|
736
|
+
:return: the calculated metric as int for 'max' and 'min', float for 'avg'
|
|
737
|
+
"""
|
|
738
|
+
qualified_table_name = self.qualified_table_name(table)
|
|
739
|
+
field = self.quote_column(field)
|
|
740
|
+
|
|
741
|
+
if metric.lower() == "max":
|
|
742
|
+
sql_function = "MAX(LEN"
|
|
743
|
+
elif metric.lower() == "min":
|
|
744
|
+
sql_function = "MIN(LEN"
|
|
745
|
+
elif metric.lower() == "avg":
|
|
746
|
+
sql_function = "AVG(CAST(LEN(" + field + ") AS FLOAT))"
|
|
747
|
+
else:
|
|
748
|
+
raise ValueError(f"Invalid metric '{metric}'. Choose from 'max', 'min', or 'avg'.")
|
|
749
|
+
if metric.lower() == "avg":
|
|
750
|
+
query = f"SELECT {sql_function} FROM {qualified_table_name}"
|
|
751
|
+
else:
|
|
752
|
+
query = f"SELECT {sql_function}({field})) FROM {qualified_table_name}"
|
|
753
|
+
if filters:
|
|
754
|
+
query += f" WHERE {filters}"
|
|
755
|
+
|
|
756
|
+
result = self.fetchone(query)[0]
|
|
757
|
+
return round(result, 2) if metric.lower() == "avg" else result
|
|
758
|
+
|
|
759
|
+
def query_string_pattern_validity(
|
|
760
|
+
self,
|
|
761
|
+
table: str,
|
|
762
|
+
field: str,
|
|
763
|
+
regex_pattern: str = None,
|
|
764
|
+
predefined_regex_pattern: str = None,
|
|
765
|
+
filters: str = None,
|
|
766
|
+
) -> Tuple[int, int]:
|
|
767
|
+
"""
|
|
768
|
+
Get the count of valid values based on the regex pattern
|
|
769
|
+
:param table: table name
|
|
770
|
+
:param field: column name
|
|
771
|
+
:param regex_pattern: regex pattern
|
|
772
|
+
:param predefined_regex_pattern: predefined regex pattern
|
|
773
|
+
:param filters: filter condition
|
|
774
|
+
:return: count of valid values, count of total row count
|
|
775
|
+
"""
|
|
776
|
+
filters = f"WHERE {filters}" if filters else ""
|
|
777
|
+
qualified_table_name = self.qualified_table_name(table)
|
|
778
|
+
field = self.quote_column(field)
|
|
779
|
+
|
|
780
|
+
if not regex_pattern and not predefined_regex_pattern:
|
|
781
|
+
raise ValueError("Either regex_pattern or predefined_regex_pattern should be provided")
|
|
782
|
+
|
|
783
|
+
if predefined_regex_pattern:
|
|
784
|
+
length_query = None
|
|
785
|
+
pt = self.regex_patterns[predefined_regex_pattern]
|
|
786
|
+
if predefined_regex_pattern == "uuid":
|
|
787
|
+
length_query = f"LEN({field}) = 36"
|
|
788
|
+
elif predefined_regex_pattern == "perm_id":
|
|
789
|
+
length_query = f"LEN({field}) BETWEEN 19 AND 23 "
|
|
790
|
+
elif predefined_regex_pattern == "lei":
|
|
791
|
+
length_query = f"LEN({field}) = 20"
|
|
792
|
+
elif predefined_regex_pattern == "cusip":
|
|
793
|
+
length_query = f"LEN({field}) = 9"
|
|
794
|
+
elif predefined_regex_pattern == "figi":
|
|
795
|
+
length_query = f"LEN({field}) = 12"
|
|
796
|
+
elif predefined_regex_pattern == "isin":
|
|
797
|
+
length_query = f"LEN({field}) = 12"
|
|
798
|
+
elif predefined_regex_pattern == "sedol":
|
|
799
|
+
length_query = f"LEN({field}) = 7"
|
|
800
|
+
elif predefined_regex_pattern == "ssn":
|
|
801
|
+
length_query = f"LEN({field}) = 11"
|
|
802
|
+
elif predefined_regex_pattern == "usa_zip_code":
|
|
803
|
+
query = f"""
|
|
804
|
+
SELECT
|
|
805
|
+
SUM(CASE
|
|
806
|
+
WHEN PATINDEX('%[0-9][0-9][0-9][0-9][0-9]%', CAST({field} AS VARCHAR)) > 0
|
|
807
|
+
AND (LEN(CAST({field} AS VARCHAR)) = 5 OR LEN(CAST({field} AS VARCHAR)) = 9)
|
|
808
|
+
THEN 1
|
|
809
|
+
ELSE 0
|
|
810
|
+
END) AS valid_count,
|
|
811
|
+
COUNT(*) AS total_count
|
|
812
|
+
FROM {qualified_table_name} {filters};
|
|
813
|
+
"""
|
|
814
|
+
result = self.fetchone(query)
|
|
815
|
+
return result[0], result[1]
|
|
816
|
+
if not length_query:
|
|
817
|
+
regex_query = f"PATINDEX('{pt}', {field}) > 0"
|
|
818
|
+
else:
|
|
819
|
+
regex_query = f"PATINDEX('{pt}', {field}) > 0 AND {length_query}"
|
|
820
|
+
else:
|
|
821
|
+
regex_query = self.convert_regex_to_sybase_pattern(regex_pattern)
|
|
822
|
+
query = f"""
|
|
823
|
+
SELECT
|
|
824
|
+
SUM(CASE
|
|
825
|
+
WHEN {regex_query}
|
|
826
|
+
THEN 1
|
|
827
|
+
ELSE 0
|
|
828
|
+
END) AS valid_count,
|
|
829
|
+
COUNT(*) AS total_count
|
|
830
|
+
FROM {qualified_table_name} {filters}
|
|
831
|
+
"""
|
|
832
|
+
result = self.fetchone(query)
|
|
833
|
+
return result[0], result[1]
|
|
834
|
+
|
|
835
|
+
def query_get_time_diff(self, table: str, field: str) -> int:
|
|
836
|
+
"""
|
|
837
|
+
Get the time difference
|
|
838
|
+
:param table: name of the index
|
|
839
|
+
:param field: field name of updated time column
|
|
840
|
+
:return: time difference in seconds
|
|
841
|
+
"""
|
|
842
|
+
qualified_table_name = self.qualified_table_name(table)
|
|
843
|
+
field = self.quote_column(field)
|
|
844
|
+
query = f"""
|
|
845
|
+
SELECT TOP 1 {field}
|
|
846
|
+
FROM {qualified_table_name}
|
|
847
|
+
ORDER BY {field} DESC;
|
|
848
|
+
"""
|
|
849
|
+
result = self.fetchone(query)
|
|
850
|
+
if result:
|
|
851
|
+
updated_time = result[0]
|
|
852
|
+
if isinstance(updated_time, str):
|
|
853
|
+
updated_time = datetime.strptime(updated_time, "%Y-%m-%d %H:%M:%S.%f")
|
|
854
|
+
return int((datetime.utcnow() - updated_time).total_seconds())
|
|
855
|
+
return 0
|
|
856
|
+
|
|
857
|
+
def query_timestamp_metric(
|
|
858
|
+
self,
|
|
859
|
+
table: str,
|
|
860
|
+
field: str,
|
|
861
|
+
predefined_regex: str,
|
|
862
|
+
filters: str = None,
|
|
863
|
+
) -> Union[float, int]:
|
|
864
|
+
"""
|
|
865
|
+
:param table: Table name
|
|
866
|
+
:param field: Column name
|
|
867
|
+
:param predefined_regex: regex pattern
|
|
868
|
+
:param filters: filter condition
|
|
869
|
+
:return: Tuple containing valid count and total count (or percentage)
|
|
870
|
+
"""
|
|
871
|
+
|
|
872
|
+
qualified_table_name = self.qualified_table_name(table)
|
|
873
|
+
field = self.quote_column(field)
|
|
874
|
+
|
|
875
|
+
temp_table_suffix = f"{int(time.time())}_{random.randint(1000, 9999)}"
|
|
876
|
+
extracted_table = f"#extracted_timestamps_{temp_table_suffix}"
|
|
877
|
+
validated_table = f"#validated_timestamps_{temp_table_suffix}"
|
|
878
|
+
|
|
879
|
+
if predefined_regex == "timestamp_iso":
|
|
880
|
+
filters_clause = f"WHERE {filters}" if filters else ""
|
|
881
|
+
|
|
882
|
+
query = f"""
|
|
883
|
+
-- Extract timestamp components
|
|
884
|
+
SELECT
|
|
885
|
+
{field},
|
|
886
|
+
LEFT(CONVERT(VARCHAR, {field}, 120), 4) AS year, -- Extract year
|
|
887
|
+
SUBSTRING(CONVERT(VARCHAR, {field}, 120), 6, 2) AS month, -- Extract month
|
|
888
|
+
SUBSTRING(CONVERT(VARCHAR, {field}, 120), 9, 2) AS day, -- Extract day
|
|
889
|
+
SUBSTRING(CONVERT(VARCHAR, {field}, 120), 12, 2) AS hour, -- Extract hour
|
|
890
|
+
SUBSTRING(CONVERT(VARCHAR, {field}, 120), 15, 2) AS minute, -- Extract minute
|
|
891
|
+
SUBSTRING(CONVERT(VARCHAR, {field}, 120), 18, 2) AS second -- Extract second
|
|
892
|
+
INTO {extracted_table}
|
|
893
|
+
FROM {qualified_table_name}
|
|
894
|
+
{filters_clause};
|
|
895
|
+
|
|
896
|
+
-- Validate timestamps and calculate the is_valid flag
|
|
897
|
+
SELECT
|
|
898
|
+
{field},
|
|
899
|
+
CASE
|
|
900
|
+
WHEN
|
|
901
|
+
-- Validate year, month, and day formats
|
|
902
|
+
year LIKE '[0-9][0-9][0-9][0-9]' AND
|
|
903
|
+
month LIKE '[0-1][0-9]' AND month BETWEEN '01' AND '12' AND
|
|
904
|
+
day LIKE '[0-3][0-9]' AND day BETWEEN '01' AND
|
|
905
|
+
CASE
|
|
906
|
+
-- Check for days in each month
|
|
907
|
+
WHEN month IN ('01', '03', '05', '07', '08', '10', '12') THEN '31'
|
|
908
|
+
WHEN month IN ('04', '06', '09', '11') THEN '30'
|
|
909
|
+
WHEN month = '02' THEN
|
|
910
|
+
CASE
|
|
911
|
+
-- Check for leap years
|
|
912
|
+
WHEN (CAST(year AS INT) % 400 = 0 OR (CAST(year AS INT) % 100 != 0 AND CAST(year AS INT) % 4 = 0)) THEN '29'
|
|
913
|
+
ELSE '28'
|
|
914
|
+
END
|
|
915
|
+
ELSE '00' -- Invalid month
|
|
916
|
+
END AND
|
|
917
|
+
-- Validate time components
|
|
918
|
+
hour LIKE '[0-2][0-9]' AND hour BETWEEN '00' AND '23' AND
|
|
919
|
+
minute LIKE '[0-5][0-9]' AND
|
|
920
|
+
second LIKE '[0-5][0-9]'
|
|
921
|
+
THEN 1
|
|
922
|
+
ELSE 0
|
|
923
|
+
END AS is_valid
|
|
924
|
+
INTO {validated_table}
|
|
925
|
+
FROM {extracted_table};
|
|
926
|
+
|
|
927
|
+
-- Get the counts
|
|
928
|
+
SELECT
|
|
929
|
+
SUM(is_valid) AS valid_count,
|
|
930
|
+
COUNT(*) AS total_count
|
|
931
|
+
FROM {validated_table};
|
|
932
|
+
"""
|
|
933
|
+
try:
|
|
934
|
+
result = self.fetchone(query)
|
|
935
|
+
valid_count = result[0]
|
|
936
|
+
total_count = result[1]
|
|
937
|
+
|
|
938
|
+
return valid_count, total_count
|
|
939
|
+
except Exception as e:
|
|
940
|
+
logger.error(f"Error occurred: {e}")
|
|
941
|
+
return 0, 0
|
|
942
|
+
else:
|
|
943
|
+
raise ValueError(f"Unknown predefined regex pattern: {predefined_regex}")
|
|
944
|
+
|
|
945
|
+
def query_timestamp_not_in_future_metric(
|
|
946
|
+
self,
|
|
947
|
+
table: str,
|
|
948
|
+
field: str,
|
|
949
|
+
predefined_regex: str,
|
|
950
|
+
filters: str = None,
|
|
951
|
+
) -> Union[float, int]:
|
|
952
|
+
"""
|
|
953
|
+
:param table: Table name
|
|
954
|
+
:param field: Column name
|
|
955
|
+
:param predefined_regex: regex pattern
|
|
956
|
+
:param filters: filter condition
|
|
957
|
+
:return: Count of valid timestamps not in the future and total count or percentage
|
|
958
|
+
"""
|
|
959
|
+
qualified_table_name = self.qualified_table_name(table)
|
|
960
|
+
field = self.quote_column(field)
|
|
961
|
+
|
|
962
|
+
if predefined_regex != "timestamp_iso":
|
|
963
|
+
raise ValueError(f"Unknown predefined regex pattern: {predefined_regex}")
|
|
964
|
+
|
|
965
|
+
filters_clause = f"WHERE {filters}" if filters else ""
|
|
966
|
+
|
|
967
|
+
query = f"""
|
|
968
|
+
SELECT
|
|
969
|
+
SUM(CASE
|
|
970
|
+
WHEN
|
|
971
|
+
-- Validate year, month, day
|
|
972
|
+
DATEPART(yy, {field}) BETWEEN 1 AND 9999 AND
|
|
973
|
+
DATEPART(mm, {field}) BETWEEN 1 AND 12 AND
|
|
974
|
+
DATEPART(dd, {field}) BETWEEN 1 AND
|
|
975
|
+
CASE
|
|
976
|
+
WHEN DATEPART(mm, {field}) IN (1, 3, 5, 7, 8, 10, 12) THEN 31
|
|
977
|
+
WHEN DATEPART(mm, {field}) IN (4, 6, 9, 11) THEN 30
|
|
978
|
+
WHEN DATEPART(mm, {field}) = 2 THEN
|
|
979
|
+
CASE
|
|
980
|
+
WHEN DATEPART(yy, {field}) % 400 = 0 OR
|
|
981
|
+
(DATEPART(yy, {field}) % 4 = 0 AND DATEPART(yy, {field}) % 100 != 0) THEN 29
|
|
982
|
+
ELSE 28
|
|
983
|
+
END
|
|
984
|
+
ELSE 0
|
|
985
|
+
END AND
|
|
986
|
+
-- Validate hour, minute, second
|
|
987
|
+
DATEPART(hh, {field}) BETWEEN 0 AND 23 AND
|
|
988
|
+
DATEPART(mi, {field}) BETWEEN 0 AND 59 AND
|
|
989
|
+
DATEPART(ss, {field}) BETWEEN 0 AND 59 AND
|
|
990
|
+
-- Ensure timestamp is not in the future
|
|
991
|
+
{field} <= GETDATE()
|
|
992
|
+
THEN 1
|
|
993
|
+
ELSE 0
|
|
994
|
+
END) AS valid_count,
|
|
995
|
+
COUNT(*) AS total_count
|
|
996
|
+
FROM {qualified_table_name}
|
|
997
|
+
{filters_clause}
|
|
998
|
+
"""
|
|
999
|
+
|
|
1000
|
+
try:
|
|
1001
|
+
result = self.fetchone(query)
|
|
1002
|
+
valid_count = result[0]
|
|
1003
|
+
total_count = result[1]
|
|
1004
|
+
|
|
1005
|
+
return valid_count, total_count
|
|
1006
|
+
except Exception as e:
|
|
1007
|
+
logger.error(f"Error occurred: {e}")
|
|
1008
|
+
return 0, 0
|
|
1009
|
+
|
|
1010
|
+
def query_timestamp_date_not_in_future_metric(
|
|
1011
|
+
self,
|
|
1012
|
+
table: str,
|
|
1013
|
+
field: str,
|
|
1014
|
+
predefined_regex: str,
|
|
1015
|
+
filters: str = None,
|
|
1016
|
+
) -> Union[float, int]:
|
|
1017
|
+
"""
|
|
1018
|
+
:param table: Table name
|
|
1019
|
+
:param field: Column name
|
|
1020
|
+
:param predefined_regex: The regex pattern to use (e.g., "timestamp_iso")
|
|
1021
|
+
:param filters: Optional filter condition
|
|
1022
|
+
:return: Tuple containing count of valid dates not in the future and total count
|
|
1023
|
+
"""
|
|
1024
|
+
qualified_table_name = self.qualified_table_name(table)
|
|
1025
|
+
field = self.quote_column(field)
|
|
1026
|
+
filters_clause = f"WHERE {filters}" if filters else ""
|
|
1027
|
+
|
|
1028
|
+
query = f"""
|
|
1029
|
+
SELECT
|
|
1030
|
+
SUM(CASE
|
|
1031
|
+
WHEN
|
|
1032
|
+
-- Validate year, month, and day
|
|
1033
|
+
DATEPART(yy, {field}) BETWEEN 1 AND 9999 AND
|
|
1034
|
+
DATEPART(mm, {field}) BETWEEN 1 AND 12 AND
|
|
1035
|
+
DATEPART(dd, {field}) BETWEEN 1 AND
|
|
1036
|
+
CASE
|
|
1037
|
+
WHEN DATEPART(mm, {field}) IN (1, 3, 5, 7, 8, 10, 12) THEN 31
|
|
1038
|
+
WHEN DATEPART(mm, {field}) IN (4, 6, 9, 11) THEN 30
|
|
1039
|
+
WHEN DATEPART(mm, {field}) = 2 THEN
|
|
1040
|
+
CASE
|
|
1041
|
+
WHEN DATEPART(yy, {field}) % 400 = 0 OR
|
|
1042
|
+
(DATEPART(yy, {field}) % 4 = 0 AND DATEPART(yy, {field}) % 100 != 0) THEN 29
|
|
1043
|
+
ELSE 28
|
|
1044
|
+
END
|
|
1045
|
+
ELSE 0
|
|
1046
|
+
END AND
|
|
1047
|
+
-- Validate hour, minute, and second
|
|
1048
|
+
DATEPART(hh, {field}) BETWEEN 0 AND 23 AND
|
|
1049
|
+
DATEPART(mi, {field}) BETWEEN 0 AND 59 AND
|
|
1050
|
+
DATEPART(ss, {field}) BETWEEN 0 AND 59 AND
|
|
1051
|
+
-- Ensure the timestamp is not in the future
|
|
1052
|
+
{field} <= GETDATE()
|
|
1053
|
+
THEN 1
|
|
1054
|
+
ELSE 0
|
|
1055
|
+
END) AS valid_count,
|
|
1056
|
+
COUNT(*) AS total_count
|
|
1057
|
+
FROM {qualified_table_name}
|
|
1058
|
+
{filters_clause}
|
|
1059
|
+
"""
|
|
1060
|
+
|
|
1061
|
+
try:
|
|
1062
|
+
result = self.fetchone(query)
|
|
1063
|
+
valid_count = result[0]
|
|
1064
|
+
total_count = result[1]
|
|
1065
|
+
|
|
1066
|
+
return valid_count, total_count
|
|
1067
|
+
except Exception as e:
|
|
1068
|
+
logger.error(f"Error occurred: {e}")
|
|
1069
|
+
return 0, 0
|