dcs-sdk 1.6.4__py3-none-any.whl → 1.6.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. dcs_core/__init__.py +13 -0
  2. dcs_core/__main__.py +17 -0
  3. dcs_core/__version__.py +15 -0
  4. dcs_core/cli/__init__.py +13 -0
  5. dcs_core/cli/cli.py +165 -0
  6. dcs_core/core/__init__.py +19 -0
  7. dcs_core/core/common/__init__.py +13 -0
  8. dcs_core/core/common/errors.py +50 -0
  9. dcs_core/core/common/models/__init__.py +13 -0
  10. dcs_core/core/common/models/configuration.py +284 -0
  11. dcs_core/core/common/models/dashboard.py +24 -0
  12. dcs_core/core/common/models/data_source_resource.py +75 -0
  13. dcs_core/core/common/models/metric.py +160 -0
  14. dcs_core/core/common/models/profile.py +75 -0
  15. dcs_core/core/common/models/validation.py +216 -0
  16. dcs_core/core/common/models/widget.py +44 -0
  17. dcs_core/core/configuration/__init__.py +13 -0
  18. dcs_core/core/configuration/config_loader.py +139 -0
  19. dcs_core/core/configuration/configuration_parser.py +262 -0
  20. dcs_core/core/configuration/configuration_parser_arc.py +328 -0
  21. dcs_core/core/datasource/__init__.py +13 -0
  22. dcs_core/core/datasource/base.py +62 -0
  23. dcs_core/core/datasource/manager.py +112 -0
  24. dcs_core/core/datasource/search_datasource.py +421 -0
  25. dcs_core/core/datasource/sql_datasource.py +1094 -0
  26. dcs_core/core/inspect.py +163 -0
  27. dcs_core/core/logger/__init__.py +13 -0
  28. dcs_core/core/logger/base.py +32 -0
  29. dcs_core/core/logger/default_logger.py +94 -0
  30. dcs_core/core/metric/__init__.py +13 -0
  31. dcs_core/core/metric/base.py +220 -0
  32. dcs_core/core/metric/combined_metric.py +98 -0
  33. dcs_core/core/metric/custom_metric.py +34 -0
  34. dcs_core/core/metric/manager.py +137 -0
  35. dcs_core/core/metric/numeric_metric.py +403 -0
  36. dcs_core/core/metric/reliability_metric.py +90 -0
  37. dcs_core/core/profiling/__init__.py +13 -0
  38. dcs_core/core/profiling/datasource_profiling.py +136 -0
  39. dcs_core/core/profiling/numeric_field_profiling.py +72 -0
  40. dcs_core/core/profiling/text_field_profiling.py +67 -0
  41. dcs_core/core/repository/__init__.py +13 -0
  42. dcs_core/core/repository/metric_repository.py +77 -0
  43. dcs_core/core/utils/__init__.py +13 -0
  44. dcs_core/core/utils/log.py +29 -0
  45. dcs_core/core/utils/tracking.py +105 -0
  46. dcs_core/core/utils/utils.py +44 -0
  47. dcs_core/core/validation/__init__.py +13 -0
  48. dcs_core/core/validation/base.py +230 -0
  49. dcs_core/core/validation/completeness_validation.py +153 -0
  50. dcs_core/core/validation/custom_query_validation.py +24 -0
  51. dcs_core/core/validation/manager.py +282 -0
  52. dcs_core/core/validation/numeric_validation.py +276 -0
  53. dcs_core/core/validation/reliability_validation.py +91 -0
  54. dcs_core/core/validation/uniqueness_validation.py +61 -0
  55. dcs_core/core/validation/validity_validation.py +738 -0
  56. dcs_core/integrations/__init__.py +13 -0
  57. dcs_core/integrations/databases/__init__.py +13 -0
  58. dcs_core/integrations/databases/bigquery.py +187 -0
  59. dcs_core/integrations/databases/databricks.py +51 -0
  60. dcs_core/integrations/databases/db2.py +652 -0
  61. dcs_core/integrations/databases/elasticsearch.py +61 -0
  62. dcs_core/integrations/databases/mssql.py +979 -0
  63. dcs_core/integrations/databases/mysql.py +409 -0
  64. dcs_core/integrations/databases/opensearch.py +64 -0
  65. dcs_core/integrations/databases/oracle.py +719 -0
  66. dcs_core/integrations/databases/postgres.py +570 -0
  67. dcs_core/integrations/databases/redshift.py +53 -0
  68. dcs_core/integrations/databases/snowflake.py +48 -0
  69. dcs_core/integrations/databases/spark_df.py +111 -0
  70. dcs_core/integrations/databases/sybase.py +1069 -0
  71. dcs_core/integrations/storage/__init__.py +13 -0
  72. dcs_core/integrations/storage/local_file.py +149 -0
  73. dcs_core/integrations/utils/__init__.py +13 -0
  74. dcs_core/integrations/utils/utils.py +36 -0
  75. dcs_core/report/__init__.py +13 -0
  76. dcs_core/report/dashboard.py +211 -0
  77. dcs_core/report/models.py +88 -0
  78. dcs_core/report/static/assets/fonts/DMSans-Bold.ttf +0 -0
  79. dcs_core/report/static/assets/fonts/DMSans-Medium.ttf +0 -0
  80. dcs_core/report/static/assets/fonts/DMSans-Regular.ttf +0 -0
  81. dcs_core/report/static/assets/fonts/DMSans-SemiBold.ttf +0 -0
  82. dcs_core/report/static/assets/images/docs.svg +6 -0
  83. dcs_core/report/static/assets/images/github.svg +4 -0
  84. dcs_core/report/static/assets/images/logo.svg +7 -0
  85. dcs_core/report/static/assets/images/slack.svg +13 -0
  86. dcs_core/report/static/index.js +2 -0
  87. dcs_core/report/static/index.js.LICENSE.txt +3971 -0
  88. dcs_sdk/__version__.py +1 -1
  89. dcs_sdk/cli/cli.py +3 -0
  90. {dcs_sdk-1.6.4.dist-info → dcs_sdk-1.6.6.dist-info}/METADATA +24 -2
  91. dcs_sdk-1.6.6.dist-info/RECORD +159 -0
  92. {dcs_sdk-1.6.4.dist-info → dcs_sdk-1.6.6.dist-info}/entry_points.txt +1 -0
  93. dcs_sdk-1.6.4.dist-info/RECORD +0 -72
  94. {dcs_sdk-1.6.4.dist-info → dcs_sdk-1.6.6.dist-info}/WHEEL +0 -0
@@ -0,0 +1,1069 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import random
16
+ import re
17
+ import time
18
+ from datetime import datetime
19
+ from typing import Any, Dict, List, Optional, Tuple, Union
20
+
21
+ import pyodbc
22
+ from loguru import logger
23
+
24
+ from dcs_core.core.common.errors import DataChecksDataSourcesConnectionError
25
+ from dcs_core.core.common.models.data_source_resource import (
26
+ RawColumnInfo,
27
+ SybaseDriverTypes,
28
+ )
29
+ from dcs_core.core.datasource.sql_datasource import SQLDataSource
30
+
31
+
32
+ class SybaseDataSource(SQLDataSource):
33
+ def __init__(self, data_source_name: str, data_connection: Dict):
34
+ super().__init__(data_source_name, data_connection)
35
+ self.regex_patterns = {
36
+ "uuid": r"%[0-9a-fA-F]%-%[0-9a-fA-F]%-%[0-9a-fA-F]%-%[0-9a-fA-F]%-%[0-9a-fA-F]%",
37
+ "usa_phone": r"%[0-9][0-9][0-9] [0-9][0-9][0-9] [0-9][0-9][0-9][0-9]%",
38
+ "email": r"%[a-zA-Z0-9._%+-]@[a-zA-Z0-9.-]%.[a-zA-Z]%",
39
+ "usa_zip_code": r"[0-9][0-9][0-9][0-9][0-9]%",
40
+ "ssn": r"%[0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9][0-9][0-9]%",
41
+ "sedol": r"[B-DF-HJ-NP-TV-XZ0-9][B-DF-HJ-NP-TV-XZ0-9][B-DF-HJ-NP-TV-XZ0-9][B-DF-HJ-NP-TV-XZ0-9][B-DF-HJ-NP-TV-XZ0-9][B-DF-HJ-NP-TV-XZ0-9][0-9]",
42
+ "lei": r"[A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][0-9][0-9]",
43
+ "cusip": r"[0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z]",
44
+ "figi": r"BBG[A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9]",
45
+ "isin": r"[A-Z][A-Z][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][0-9]",
46
+ "perm_id": r"%[0-9][0-9][0-9][0-9][- ]%[0-9][0-9][0-9][0-9][- ]%[0-9][0-9][0-9][0-9][- ]%[0-9][0-9][0-9][0-9][- ]%[0-9][0-9][0-9]%",
47
+ }
48
+ self.sybase_driver_type = SybaseDriverTypes()
49
+
50
+ def connect(self) -> Any:
51
+ driver = self.data_connection.get("driver") or "FreeTDS"
52
+ host = self.data_connection.get("host") or ""
53
+ server = self.data_connection.get("server") or ""
54
+ port = self.data_connection.get("port", 5000)
55
+ database = self.data_connection.get("database")
56
+ username = self.data_connection.get("username")
57
+ password = self.data_connection.get("password")
58
+ self._detect_driver_type(driver)
59
+
60
+ if self.sybase_driver_type.is_freetds:
61
+ conn_dict = {
62
+ "driver": "FreeTDS",
63
+ "database": database,
64
+ "user": username,
65
+ "password": password,
66
+ "port": port,
67
+ "tds_version": "auto",
68
+ }
69
+
70
+ conn_dict["host"] = host or server
71
+
72
+ try:
73
+ logger.debug("Attempting FreeTDS connection")
74
+ self.connection = pyodbc.connect(**conn_dict)
75
+ logger.info("Successfully connected to Sybase using FreeTDS")
76
+ return self.connection
77
+ except Exception as e:
78
+ error_msg = f"Failed to connect to Sybase with FreeTDS: {str(e)}"
79
+ logger.error(error_msg)
80
+ raise DataChecksDataSourcesConnectionError(message=error_msg)
81
+
82
+ base_params = {
83
+ "DRIVER": self._prepare_driver_string(driver),
84
+ "DATABASE": database,
85
+ "UID": username,
86
+ "PWD": password,
87
+ }
88
+
89
+ connection_attempts = []
90
+ if self.sybase_driver_type.is_ase:
91
+ connection_attempts = [
92
+ {
93
+ "key": "SERVER",
94
+ "value": host,
95
+ "port": port,
96
+ }, # ASE typically uses SERVER
97
+ {"key": "SERVERNAME", "value": host, "port": port},
98
+ {
99
+ "key": "HOST",
100
+ "value": f"{host}:{port}",
101
+ "port": None,
102
+ }, # Host:Port format
103
+ ]
104
+ else:
105
+ connection_attempts = [
106
+ {"key": "HOST", "value": f"{host}:{port}", "port": None},
107
+ {"key": "HOST", "value": host, "port": port},
108
+ {"key": "SERVER", "value": server, "port": port},
109
+ {"key": "SERVERNAME", "value": server, "port": port},
110
+ ]
111
+
112
+ errors = []
113
+
114
+ for attempt in connection_attempts:
115
+ if not attempt["value"]:
116
+ continue
117
+
118
+ conn_dict = base_params.copy()
119
+ conn_dict[attempt["key"]] = attempt["value"]
120
+
121
+ # Handle port configuration
122
+ if attempt["port"] is not None:
123
+ port_configs = [
124
+ {"PORT": attempt["port"]},
125
+ {"Server port": attempt["port"]},
126
+ {}, # Try without explicit port
127
+ ]
128
+ else:
129
+ port_configs = [{}] # Port is already in the host string
130
+
131
+ for port_config in port_configs:
132
+ current_config = conn_dict.copy()
133
+ current_config.update(port_config)
134
+
135
+ # Add ASE-specific parameters if driver is ASE
136
+ if self.sybase_driver_type.is_ase:
137
+ ase_configs = [
138
+ {}, # Basic config
139
+ {"NetworkAddress": f"{host},{port}"}, # Alternative format
140
+ {"ServerName": host}, # Another common ASE parameter
141
+ ]
142
+ else:
143
+ ase_configs = [{}]
144
+
145
+ for ase_config in ase_configs:
146
+ final_config = current_config.copy()
147
+ final_config.update(ase_config)
148
+
149
+ try:
150
+ logger.debug("Attempting connection ...")
151
+ self.connection = pyodbc.connect(**final_config)
152
+ logger.info("Successfully connected to Sybase using: " f"driver={driver}")
153
+ return self.connection
154
+ except Exception as e:
155
+ error_msg = "Failed to connect to sybase."
156
+ logger.debug(error_msg)
157
+ errors.append(error_msg)
158
+ continue
159
+
160
+ raise DataChecksDataSourcesConnectionError(
161
+ message=f"Failed to connect to Sybase data source with driver {driver}: " f"[{'; '.join(errors)}]"
162
+ )
163
+
164
+ def _build_base_connection_params(self, driver: str, database: str, username: str, password: str) -> Dict[str, Any]:
165
+ """Build base connection parameters dictionary."""
166
+ return {
167
+ "DRIVER": self._prepare_driver_string(driver),
168
+ "DATABASE": database,
169
+ "UID": username,
170
+ "PWD": password,
171
+ }
172
+
173
+ def _normalize_driver(self, driver: str) -> str:
174
+ """Normalize driver string by removing braces, spaces, and converting to lowercase."""
175
+ return driver.replace("{", "").replace("}", "").replace(" ", "").strip().lower()
176
+
177
+ def _detect_driver_type(self, driver: str) -> None:
178
+ """Detect and set the appropriate driver type."""
179
+ normalized_driver = self._normalize_driver(driver)
180
+ self.sybase_driver_type.is_ase = "adaptive" in normalized_driver
181
+ self.sybase_driver_type.is_iq = "iq" in normalized_driver
182
+ self.sybase_driver_type.is_freetds = "freetds" in normalized_driver
183
+
184
+ def _prepare_driver_string(self, driver: str) -> str:
185
+ """Ensure driver string is properly formatted with braces."""
186
+ return f"{{{driver}}}" if not driver.startswith("{") else driver
187
+
188
+ def fetchall(self, query):
189
+ return self.connection.cursor().execute(query).fetchall()
190
+
191
+ def fetchone(self, query):
192
+ return self.connection.cursor().execute(query).fetchone()
193
+
194
+ def qualified_table_name(self, table_name: str) -> str:
195
+ """
196
+ Get the qualified table name
197
+ :param table_name: name of the table
198
+ :return: qualified table name
199
+ """
200
+ if self.schema_name:
201
+ return f"[{self.schema_name}].[{table_name}]"
202
+ return f"[{table_name}]"
203
+
204
+ def quote_column(self, column: str) -> str:
205
+ """
206
+ Quote the column name
207
+ :param column: name of the column
208
+ :return: quoted column name
209
+ """
210
+ return f"[{column}]"
211
+
212
+ def query_get_row_count(self, table: str, filters: str = None) -> int:
213
+ """
214
+ Get the row count
215
+ :param table: name of the table
216
+ :param filters: optional filter
217
+ """
218
+ qualified_table_name = self.qualified_table_name(table)
219
+ query = f"SELECT COUNT(*) FROM {qualified_table_name}"
220
+ if filters:
221
+ query += f" WHERE {filters}"
222
+ return self.fetchone(query)[0]
223
+
224
+ def query_get_table_columns(self, table: str, schema: str | None = None) -> RawColumnInfo:
225
+ """
226
+ Get the schema of a table.
227
+ :param table: table name
228
+ :return: RawColumnInfo object containing column information
229
+ """
230
+ schema = schema or self.schema_name
231
+ database = self.database
232
+ rows = None
233
+ if self.sybase_driver_type.is_iq:
234
+ query = (
235
+ f"SELECT c.column_name, d.domain_name AS data_type, "
236
+ f"CASE WHEN d.domain_name IN ('DATE', 'TIME', 'TIMESTAMP') THEN c.scale ELSE NULL END AS datetime_precision, "
237
+ f"CASE WHEN t.name IN ('float') THEN 15 WHEN t.name IN ('real') THEN 7 ELSE c.prec END AS numeric_precision, "
238
+ f"CASE WHEN t.name IN ('float', 'real') THEN NULL ELSE c.scale END AS numeric_scale, "
239
+ f"NULL AS collation_name, c.width AS character_maximum_length "
240
+ f"FROM {database}.SYS.SYSTABLE t "
241
+ f"JOIN {database}.SYS.SYSCOLUMN c ON t.table_id = c.table_id "
242
+ f"JOIN {database}.SYS.SYSDOMAIN d ON c.domain_id = d.domain_id "
243
+ f"JOIN {database}.SYS.SYSUSER u ON t.creator = u.user_id "
244
+ f"WHERE t.table_name = '{table}' "
245
+ f"AND u.user_name = '{schema}'"
246
+ )
247
+
248
+ elif self.sybase_driver_type.is_ase:
249
+ query = (
250
+ f"SELECT c.name AS column_name, t.name AS data_type, "
251
+ f"CASE WHEN c.type IN (61, 111) THEN c.prec ELSE NULL END AS datetime_precision, "
252
+ f"CASE WHEN t.name IN ('float') THEN 15 WHEN t.name IN ('real') THEN 7 ELSE c.prec END AS numeric_precision, "
253
+ f"CASE WHEN t.name IN ('float', 'real') THEN NULL ELSE c.scale END AS numeric_scale, "
254
+ f"NULL AS collation_name, c.length AS character_maximum_length "
255
+ f"FROM {database}..sysobjects o "
256
+ f"JOIN {database}..syscolumns c ON o.id = c.id "
257
+ f"JOIN {database}..systypes t ON c.usertype = t.usertype "
258
+ f"JOIN {database}..sysusers u ON o.uid = u.uid "
259
+ f"WHERE o.name = '{table}' "
260
+ f"AND u.name = '{schema}'"
261
+ )
262
+ elif self.sybase_driver_type.is_freetds:
263
+ try:
264
+ ase_query = (
265
+ f"SELECT c.name AS column_name, t.name AS data_type, "
266
+ f"CASE WHEN c.type IN (61, 111) THEN c.prec ELSE NULL END AS datetime_precision, "
267
+ f"CASE WHEN t.name IN ('float') THEN 15 WHEN t.name IN ('real') THEN 7 ELSE c.prec END AS numeric_precision, "
268
+ f"CASE WHEN t.name IN ('float', 'real') THEN NULL ELSE c.scale END AS numeric_scale, "
269
+ f"NULL AS collation_name, c.length AS character_maximum_length "
270
+ f"FROM {database}..sysobjects o "
271
+ f"JOIN {database}..syscolumns c ON o.id = c.id "
272
+ f"JOIN {database}..systypes t ON c.usertype = t.usertype "
273
+ f"JOIN {database}..sysusers u ON o.uid = u.uid "
274
+ f"WHERE o.name = '{table}' "
275
+ f"AND u.name = '{schema}'"
276
+ )
277
+ rows = self.fetchall(ase_query)
278
+
279
+ except Exception as _:
280
+ iq_query = (
281
+ f"SELECT c.name AS column_name, t.name AS data_type, "
282
+ f"CASE WHEN c.type IN (61, 111) THEN c.prec ELSE NULL END AS datetime_precision, "
283
+ f"CASE WHEN t.name IN ('float') THEN 15 WHEN t.name IN ('real') THEN 7 ELSE c.prec END AS numeric_precision, "
284
+ f"CASE WHEN t.name IN ('float', 'real') THEN NULL ELSE c.scale END AS numeric_scale, "
285
+ f"NULL AS collation_name, c.length AS character_maximum_length "
286
+ f"FROM {database}.dbo.sysobjects o "
287
+ f"JOIN {database}.dbo.syscolumns c ON o.id = c.id "
288
+ f"JOIN {database}.dbo.systypes t ON c.usertype = t.usertype "
289
+ f"JOIN {database}.dbo.sysusers u ON o.uid = u.uid "
290
+ f"WHERE o.name = '{table}' AND u.name = '{schema}'"
291
+ )
292
+ rows = self.fetchall(iq_query)
293
+ else:
294
+ raise ValueError("Unknown Sybase driver type")
295
+ if not rows:
296
+ rows = self.fetchall(query)
297
+ if not rows:
298
+ raise RuntimeError(f"{table}: Table, {schema}: Schema, does not exist, or has no columns")
299
+
300
+ column_info = {
301
+ r[0]: RawColumnInfo(
302
+ column_name=self.safe_get(r, 0),
303
+ data_type=self.safe_get(r, 1),
304
+ datetime_precision=self.safe_get(r, 2),
305
+ numeric_precision=self.safe_get(r, 3),
306
+ numeric_scale=self.safe_get(r, 4),
307
+ collation_name=self.safe_get(r, 5),
308
+ character_maximum_length=self.safe_get(r, 6),
309
+ )
310
+ for r in rows
311
+ }
312
+ return column_info
313
+
314
+ def query_get_table_indexes(self, table: str, schema: str | None = None) -> dict[str, dict]:
315
+ """
316
+ Get index information for a table in Sybase (IQ/ASE/FreeTDS).
317
+ :param table: Table name
318
+ :param schema: Optional schema name
319
+ :return: Dictionary with index details
320
+ """
321
+ schema = schema or self.schema_name
322
+ database = self.database
323
+ rows = None
324
+
325
+ if self.sybase_driver_type.is_iq:
326
+ query = (
327
+ "SELECT\n"
328
+ " t.table_name,\n"
329
+ " i.index_name,\n"
330
+ " i.index_type,\n"
331
+ " c.column_name,\n"
332
+ " ic.sequence AS column_order\n"
333
+ "FROM\n"
334
+ f" {database}.sys.systable t\n"
335
+ "JOIN\n"
336
+ f" {database}.sys.sysindex i ON t.table_id = i.table_id\n"
337
+ "JOIN\n"
338
+ f" {database}.sys.sysixcol ic ON i.index_id = ic.index_id AND i.table_id = ic.table_id\n"
339
+ "JOIN\n"
340
+ f" {database}.sys.syscolumn c ON ic.column_id = c.column_id AND ic.table_id = c.table_id\n"
341
+ "JOIN\n"
342
+ f" {database}.sys.sysuser u ON t.creator = u.user_id\n"
343
+ "WHERE\n"
344
+ " t.table_type = 'BASE'\n"
345
+ f" AND t.table_name = '{table}'\n"
346
+ f" AND u.user_name = '{schema}'\n"
347
+ " AND i.index_name IS NOT NULL\n"
348
+ "ORDER BY\n"
349
+ " i.index_name, ic.sequence"
350
+ )
351
+ rows = self.fetchall(query)
352
+ elif self.sybase_driver_type.is_ase:
353
+ query = (
354
+ "SELECT\n"
355
+ " t.name AS table_name,\n"
356
+ " i.name AS index_name,\n"
357
+ " CASE \n"
358
+ " WHEN i.indid = 1 THEN 'CLUSTERED'\n"
359
+ " WHEN i.indid > 1 AND i.status & 2048 = 2048 THEN 'UNIQUE'\n"
360
+ " ELSE 'NONCLUSTERED'\n"
361
+ " END AS index_type,\n"
362
+ " c.name AS column_name,\n"
363
+ " ic.keyno AS column_order\n"
364
+ "FROM\n"
365
+ " sysobjects t\n"
366
+ "JOIN\n"
367
+ " sysindexes i ON t.id = i.id\n"
368
+ "JOIN\n"
369
+ " sysindexkeys ic ON i.id = ic.id AND i.indid = ic.indid\n"
370
+ "JOIN\n"
371
+ " syscolumns c ON ic.id = c.id AND ic.colid = c.colid\n"
372
+ "JOIN\n"
373
+ " sysusers u ON t.uid = u.uid\n"
374
+ "WHERE\n"
375
+ " t.type = 'U'\n"
376
+ f" AND t.name = '{table}'\n"
377
+ f" AND u.name = '{schema}'\n"
378
+ " AND i.name IS NOT NULL\n"
379
+ "ORDER BY\n"
380
+ " i.name, ic.keyno"
381
+ )
382
+ rows = self.fetchall(query)
383
+
384
+ elif self.sybase_driver_type.is_freetds:
385
+ try:
386
+ # Try ASE-compatible query
387
+ ase_query = (
388
+ f"SELECT\n"
389
+ f" o.name AS table_name,\n"
390
+ f" i.name AS index_name,\n"
391
+ f" CASE\n"
392
+ f" WHEN i.indid = 1 THEN 'CLUSTERED'\n"
393
+ f" ELSE 'NONCLUSTERED'\n"
394
+ f" END AS index_type,\n"
395
+ f" index_col(o.name, i.indid, c.colid, o.uid) AS column_name,\n"
396
+ f" c.colid AS column_order\n"
397
+ f"FROM\n"
398
+ f" sysobjects o\n"
399
+ f"JOIN\n"
400
+ f" sysindexes i ON o.id = i.id\n"
401
+ f"JOIN\n"
402
+ f" syscolumns c ON c.id = o.id\n"
403
+ f"WHERE\n"
404
+ f" o.type = 'U'\n"
405
+ f" AND o.name = '{table}'\n"
406
+ f" AND user_name(o.uid) = '{schema}'\n"
407
+ f" AND i.name IS NOT NULL\n"
408
+ f" AND index_col(o.name, i.indid, c.colid, o.uid) IS NOT NULL\n"
409
+ f"ORDER BY\n"
410
+ f" i.name, c.colid\n"
411
+ )
412
+ rows = self.fetchall(ase_query)
413
+ except Exception as e:
414
+ # Fallback to IQ-style query
415
+ iq_query = (
416
+ "SELECT\n"
417
+ " t.table_name,\n"
418
+ " i.index_name,\n"
419
+ " i.index_type,\n"
420
+ " c.column_name,\n"
421
+ " ic.sequence AS column_order\n"
422
+ "FROM\n"
423
+ f" {database}.sys.systable t\n"
424
+ "JOIN\n"
425
+ f" {database}.sys.sysindex i ON t.table_id = i.table_id\n"
426
+ "JOIN\n"
427
+ f" {database}.sys.sysixcol ic ON i.index_id = ic.index_id AND i.table_id = ic.table_id\n"
428
+ "JOIN\n"
429
+ f" {database}.sys.syscolumn c ON ic.column_id = c.column_id AND ic.table_id = c.table_id\n"
430
+ "JOIN\n"
431
+ f" {database}.sys.sysuser u ON t.creator = u.user_id\n"
432
+ "WHERE\n"
433
+ " t.table_type = 'BASE'\n"
434
+ f" AND t.table_name = '{table}'\n"
435
+ f" AND u.user_name = '{schema}'\n"
436
+ " AND i.index_name IS NOT NULL\n"
437
+ "ORDER BY\n"
438
+ " i.index_name, ic.sequence"
439
+ )
440
+ rows = self.fetchall(iq_query)
441
+
442
+ else:
443
+ raise ValueError("Unknown Sybase driver type")
444
+
445
+ if not rows:
446
+ raise RuntimeError(f"No index information found for table '{table}' in schema '{schema}'.")
447
+
448
+ # Primary key extraction
449
+ pk_columns = []
450
+ if self.sybase_driver_type.is_iq:
451
+ pk_sql = f"sp_iqpkeys {table}, NULL, {schema}"
452
+ pk_rows = self.fetchall(pk_sql)
453
+ if pk_rows:
454
+ raw_columns = pk_rows[0][2]
455
+ pk_columns = [col.strip() for col in raw_columns.split(",")]
456
+ elif self.sybase_driver_type.is_ase:
457
+ pk_sql = (
458
+ "SELECT c.name "
459
+ "FROM sysobjects t "
460
+ "JOIN sysindexes i ON t.id = i.id "
461
+ "JOIN sysindexkeys ic ON i.id = ic.id AND i.indid = ic.indid "
462
+ "JOIN syscolumns c ON ic.id = c.id AND ic.colid = c.colid "
463
+ "JOIN sysusers u ON t.uid = u.uid "
464
+ f"WHERE t.type = 'U' AND t.name = '{table}' AND u.name = '{schema}' "
465
+ "AND i.status & 2 = 2 "
466
+ "ORDER BY ic.keyno"
467
+ )
468
+ pk_rows = self.fetchall(pk_sql)
469
+ pk_columns = [row[0].strip() for row in pk_rows] if pk_rows else []
470
+ elif self.sybase_driver_type.is_freetds:
471
+ try:
472
+ self.connection.autocommit = True
473
+ pk_sql = f"EXEC sp_pkeys @table_name = '{table}', @table_owner = '{schema}'"
474
+ pk_rows = self.fetchall(pk_sql)
475
+ pk_columns = [row[3].strip() for row in pk_rows] if pk_rows else []
476
+ except Exception as e:
477
+ pk_sql = f"sp_iqpkeys {table}, NULL, {schema}"
478
+ pk_rows = self.fetchall(pk_sql)
479
+ if pk_rows:
480
+ raw_columns = pk_rows[0][2]
481
+ pk_columns = [col.strip() for col in raw_columns.split(",")]
482
+ else:
483
+ raise ValueError("Unknown Sybase driver type")
484
+
485
+ pk_columns_set = set(pk_columns)
486
+
487
+ indexes = {}
488
+ for row in rows:
489
+ index_name = row[1]
490
+ index_type = row[2]
491
+ column_info = {
492
+ "column_name": self.safe_get(row, 3),
493
+ "column_order": self.safe_get(row, 4),
494
+ }
495
+ if index_name not in indexes:
496
+ indexes[index_name] = {"columns": [], "index_type": index_type}
497
+ indexes[index_name]["columns"].append(column_info)
498
+
499
+ for index_name, idx in indexes.items():
500
+ index_columns = [col["column_name"].strip() for col in idx["columns"]]
501
+ index_columns_set = set(index_columns)
502
+ idx["is_primary_key"] = pk_columns_set == index_columns_set and len(index_columns) == len(pk_columns)
503
+
504
+ return indexes
505
+
506
+ def query_get_table_names(
507
+ self,
508
+ schema: str | None = None,
509
+ with_view: bool = False,
510
+ ) -> dict:
511
+ """
512
+ Get the list of tables in the database.
513
+ :param schema: optional schema name
514
+ :param with_view: whether to include views
515
+ :return: dictionary with table names and optionally view names
516
+ """
517
+ schema = schema or self.schema_name
518
+ database = self.database
519
+ if with_view:
520
+ type_condition = "IN ('U', 'V')"
521
+ else:
522
+ type_condition = "= 'U'"
523
+
524
+ if self.sybase_driver_type.is_iq:
525
+ table_type_condition = "table_type IN ('BASE', 'VIEW')" if with_view else "table_type = 'BASE'"
526
+ query = f"SELECT table_name, table_type FROM {database}.SYS.SYSTABLE WHERE creator = USER_ID('{schema}') AND {table_type_condition}"
527
+ elif self.sybase_driver_type.is_ase:
528
+ query = f"SELECT name AS table_name, type FROM {database}..sysobjects WHERE type {type_condition} AND uid = USER_ID('{schema}')"
529
+ elif self.sybase_driver_type.is_freetds:
530
+ query = f"SELECT name AS table_name, type FROM {database}.dbo.sysobjects WHERE type {type_condition} AND uid = USER_ID('{schema}')"
531
+ else:
532
+ raise ValueError("Unknown Sybase driver type")
533
+
534
+ rows = self.fetchall(query)
535
+
536
+ if with_view:
537
+ result = {"table": [], "view": []}
538
+ if rows:
539
+ for row in rows:
540
+ table_name = row[0]
541
+ table_type = row[1].strip() if row[1] else row[1]
542
+
543
+ if self.sybase_driver_type.is_iq:
544
+ if table_type == "BASE":
545
+ result["table"].append(table_name)
546
+ elif table_type == "VIEW":
547
+ result["view"].append(table_name)
548
+ else: # ASE or FreeTDS
549
+ if table_type == "U":
550
+ result["table"].append(table_name)
551
+ elif table_type == "V":
552
+ result["view"].append(table_name)
553
+ else:
554
+ result = {"table": []}
555
+ if rows:
556
+ result["table"] = [row[0] for row in rows]
557
+
558
+ return result
559
+
560
+ def fetch_rows(
561
+ self,
562
+ query: str,
563
+ limit: int = 1,
564
+ with_column_names: bool = False,
565
+ complete_query: Optional[str] = None,
566
+ ) -> Tuple[List, Optional[List[str]]]:
567
+ """
568
+ Fetch rows from the database using pyodbc.
569
+
570
+ :param query: SQL query to execute.
571
+ :param limit: Number of rows to fetch.
572
+ :param with_column_names: Whether to include column names in the result.
573
+ :return: Tuple of (rows, column_names or None)
574
+ """
575
+ query = complete_query or f"SELECT TOP {limit} * FROM ({query}) AS subquery"
576
+ cursor = self.connection.cursor()
577
+ cursor.execute(query)
578
+ rows = cursor.fetchmany(limit)
579
+
580
+ if with_column_names:
581
+ column_names = [column[0] for column in cursor.description]
582
+ return rows, column_names
583
+ else:
584
+ return rows, None
585
+
586
+ def fetch_sample_values_from_database(
587
+ self,
588
+ table_name: str,
589
+ column_names: list[str],
590
+ limit: int = 5,
591
+ ) -> list[Tuple]:
592
+ table_name = self.qualified_table_name(table_name)
593
+ if not column_names:
594
+ raise ValueError("At least one column name must be provided")
595
+ columns = ", ".join([self.quote_column(col) for col in column_names])
596
+ query = f"SELECT TOP {limit} {columns} FROM {table_name}"
597
+ cursor = self.connection.cursor()
598
+ cursor.execute(query)
599
+ rows = cursor.fetchmany(limit)
600
+ return rows
601
+
602
+ def convert_regex_to_sybase_pattern(self, regex_pattern: str) -> str:
603
+ """
604
+ Convert a regex pattern into a Sybase-compatible LIKE pattern.
605
+ """
606
+ sybase_pattern = re.sub(r"([%_])", r"[\1]", regex_pattern)
607
+
608
+ sybase_pattern = sybase_pattern.replace(".*", "%")
609
+ sybase_pattern = sybase_pattern.replace(".", "_")
610
+ sybase_pattern = sybase_pattern.replace(".+", "_%")
611
+
612
+ sybase_pattern = sybase_pattern.replace("?", "_")
613
+
614
+ sybase_pattern = re.sub(r"\[([^\]]+)\]", lambda m: f"%[{m.group(1)}]%", sybase_pattern)
615
+
616
+ sybase_pattern = sybase_pattern.lstrip("^").rstrip("$")
617
+
618
+ return sybase_pattern
619
+
620
+ def query_valid_invalid_values_validity(
621
+ self,
622
+ table: str,
623
+ field: str,
624
+ regex_pattern: str = None,
625
+ filters: str = None,
626
+ values: List[str] = None,
627
+ ) -> Tuple[int, int]:
628
+ """
629
+ Get the count of valid and invalid values
630
+ :param table: table name
631
+ :param field: column name
632
+ :param values: list of valid values
633
+ :param regex_pattern: regex pattern
634
+ :param filters: filter condition
635
+ :return: count of valid/invalid values and total count of valid/invalid values
636
+ """
637
+ filters = f"WHERE {filters}" if filters else ""
638
+ qualified_table_name = self.qualified_table_name(table)
639
+ field = self.quote_column(field)
640
+ if values:
641
+ values_str = ", ".join([f"'{value}'" for value in values])
642
+ validation_query = f"CASE WHEN {field} IN ({values_str}) THEN 1 ELSE 0 END"
643
+ else:
644
+ sybase_pattern = self.convert_regex_to_sybase_pattern(regex_pattern)
645
+ validation_query = f"CASE WHEN {field} LIKE '{sybase_pattern}' THEN 1 ELSE 0 END"
646
+
647
+ query = f"""
648
+ SELECT SUM({validation_query}) AS valid_count, COUNT(*) as total_count
649
+ FROM {qualified_table_name}
650
+ {filters}
651
+ """
652
+ result = self.fetchone(query)
653
+ return result[0], result[1]
654
+
655
+ def query_get_percentile(self, table: str, field: str, percentile: float, filters: str = None) -> float:
656
+ raise NotImplementedError("Method not implemented for Sybase data source")
657
+
658
+ def query_get_all_space_count(
659
+ self, table: str, field: str, operation: str, filters: str = None
660
+ ) -> Union[int, float]:
661
+ """
662
+ Get the count of rows where the specified column contains only spaces.
663
+ :param table: table name
664
+ :param field: column name
665
+ :param filters: filter condition
666
+ :return: count of rows with only spaces
667
+ """
668
+ qualified_table_name = self.qualified_table_name(table)
669
+ field = self.quote_column(field)
670
+
671
+ query = f"""
672
+ SELECT COUNT(*) AS space_count
673
+ FROM {qualified_table_name}
674
+ WHERE {field} LIKE '% %' OR {field} LIKE '%' + CHAR(160) + '%'
675
+ """
676
+
677
+ if filters:
678
+ query += f" AND {filters}"
679
+
680
+ total_query = f"SELECT COUNT(*) AS total_count FROM {qualified_table_name}"
681
+ if filters:
682
+ total_query += f" WHERE {filters}"
683
+
684
+ space_count = self.fetchone(query)[0]
685
+ total_count = self.fetchone(total_query)[0]
686
+
687
+ if operation == "percent":
688
+ return round((space_count / total_count) * 100, 2) if total_count > 0 else 0
689
+
690
+ return space_count if space_count is not None else 0
691
+
692
+ def query_get_null_keyword_count(
693
+ self, table: str, field: str, operation: str, filters: str = None
694
+ ) -> Union[int, float]:
695
+ """
696
+ Get the count of NULL-like values (specific keywords) in the specified column.
697
+ :param table: table name
698
+ :param field: column name
699
+ :param filters: filter condition
700
+ :return: count of NULL-like keyword values
701
+ """
702
+ qualified_table_name = self.qualified_table_name(table)
703
+ field = self.quote_column(field)
704
+
705
+ # Query that checks for both NULL and specific NULL-like values
706
+ query = f"""
707
+ SELECT SUM(CASE
708
+ WHEN {field} IS NULL OR LOWER({field}) IN ('nothing', 'nil', 'null', 'none', 'n/a')
709
+ THEN 1
710
+ ELSE 0
711
+ END) AS null_count, COUNT(*) AS total_count
712
+ FROM {qualified_table_name}
713
+ """
714
+ if filters:
715
+ query += f" WHERE {filters}"
716
+
717
+ result = self.fetchone(query)
718
+
719
+ if result:
720
+ if operation == "percent":
721
+ return round((result[0] / result[1]) * 100, 2) if result[1] > 0 else 0
722
+ return result[0]
723
+
724
+ return 0
725
+
726
+ def query_get_string_length_metric(
727
+ self, table: str, field: str, metric: str, filters: str = None
728
+ ) -> Union[int, float]:
729
+ """
730
+ Get the string length metric (max, min, avg) in a column of a table.
731
+
732
+ :param table: table name
733
+ :param field: column name
734
+ :param metric: the metric to calculate ('max', 'min', 'avg')
735
+ :param filters: filter condition
736
+ :return: the calculated metric as int for 'max' and 'min', float for 'avg'
737
+ """
738
+ qualified_table_name = self.qualified_table_name(table)
739
+ field = self.quote_column(field)
740
+
741
+ if metric.lower() == "max":
742
+ sql_function = "MAX(LEN"
743
+ elif metric.lower() == "min":
744
+ sql_function = "MIN(LEN"
745
+ elif metric.lower() == "avg":
746
+ sql_function = "AVG(CAST(LEN(" + field + ") AS FLOAT))"
747
+ else:
748
+ raise ValueError(f"Invalid metric '{metric}'. Choose from 'max', 'min', or 'avg'.")
749
+ if metric.lower() == "avg":
750
+ query = f"SELECT {sql_function} FROM {qualified_table_name}"
751
+ else:
752
+ query = f"SELECT {sql_function}({field})) FROM {qualified_table_name}"
753
+ if filters:
754
+ query += f" WHERE {filters}"
755
+
756
+ result = self.fetchone(query)[0]
757
+ return round(result, 2) if metric.lower() == "avg" else result
758
+
759
+ def query_string_pattern_validity(
760
+ self,
761
+ table: str,
762
+ field: str,
763
+ regex_pattern: str = None,
764
+ predefined_regex_pattern: str = None,
765
+ filters: str = None,
766
+ ) -> Tuple[int, int]:
767
+ """
768
+ Get the count of valid values based on the regex pattern
769
+ :param table: table name
770
+ :param field: column name
771
+ :param regex_pattern: regex pattern
772
+ :param predefined_regex_pattern: predefined regex pattern
773
+ :param filters: filter condition
774
+ :return: count of valid values, count of total row count
775
+ """
776
+ filters = f"WHERE {filters}" if filters else ""
777
+ qualified_table_name = self.qualified_table_name(table)
778
+ field = self.quote_column(field)
779
+
780
+ if not regex_pattern and not predefined_regex_pattern:
781
+ raise ValueError("Either regex_pattern or predefined_regex_pattern should be provided")
782
+
783
+ if predefined_regex_pattern:
784
+ length_query = None
785
+ pt = self.regex_patterns[predefined_regex_pattern]
786
+ if predefined_regex_pattern == "uuid":
787
+ length_query = f"LEN({field}) = 36"
788
+ elif predefined_regex_pattern == "perm_id":
789
+ length_query = f"LEN({field}) BETWEEN 19 AND 23 "
790
+ elif predefined_regex_pattern == "lei":
791
+ length_query = f"LEN({field}) = 20"
792
+ elif predefined_regex_pattern == "cusip":
793
+ length_query = f"LEN({field}) = 9"
794
+ elif predefined_regex_pattern == "figi":
795
+ length_query = f"LEN({field}) = 12"
796
+ elif predefined_regex_pattern == "isin":
797
+ length_query = f"LEN({field}) = 12"
798
+ elif predefined_regex_pattern == "sedol":
799
+ length_query = f"LEN({field}) = 7"
800
+ elif predefined_regex_pattern == "ssn":
801
+ length_query = f"LEN({field}) = 11"
802
+ elif predefined_regex_pattern == "usa_zip_code":
803
+ query = f"""
804
+ SELECT
805
+ SUM(CASE
806
+ WHEN PATINDEX('%[0-9][0-9][0-9][0-9][0-9]%', CAST({field} AS VARCHAR)) > 0
807
+ AND (LEN(CAST({field} AS VARCHAR)) = 5 OR LEN(CAST({field} AS VARCHAR)) = 9)
808
+ THEN 1
809
+ ELSE 0
810
+ END) AS valid_count,
811
+ COUNT(*) AS total_count
812
+ FROM {qualified_table_name} {filters};
813
+ """
814
+ result = self.fetchone(query)
815
+ return result[0], result[1]
816
+ if not length_query:
817
+ regex_query = f"PATINDEX('{pt}', {field}) > 0"
818
+ else:
819
+ regex_query = f"PATINDEX('{pt}', {field}) > 0 AND {length_query}"
820
+ else:
821
+ regex_query = self.convert_regex_to_sybase_pattern(regex_pattern)
822
+ query = f"""
823
+ SELECT
824
+ SUM(CASE
825
+ WHEN {regex_query}
826
+ THEN 1
827
+ ELSE 0
828
+ END) AS valid_count,
829
+ COUNT(*) AS total_count
830
+ FROM {qualified_table_name} {filters}
831
+ """
832
+ result = self.fetchone(query)
833
+ return result[0], result[1]
834
+
835
+ def query_get_time_diff(self, table: str, field: str) -> int:
836
+ """
837
+ Get the time difference
838
+ :param table: name of the index
839
+ :param field: field name of updated time column
840
+ :return: time difference in seconds
841
+ """
842
+ qualified_table_name = self.qualified_table_name(table)
843
+ field = self.quote_column(field)
844
+ query = f"""
845
+ SELECT TOP 1 {field}
846
+ FROM {qualified_table_name}
847
+ ORDER BY {field} DESC;
848
+ """
849
+ result = self.fetchone(query)
850
+ if result:
851
+ updated_time = result[0]
852
+ if isinstance(updated_time, str):
853
+ updated_time = datetime.strptime(updated_time, "%Y-%m-%d %H:%M:%S.%f")
854
+ return int((datetime.utcnow() - updated_time).total_seconds())
855
+ return 0
856
+
857
+ def query_timestamp_metric(
858
+ self,
859
+ table: str,
860
+ field: str,
861
+ predefined_regex: str,
862
+ filters: str = None,
863
+ ) -> Union[float, int]:
864
+ """
865
+ :param table: Table name
866
+ :param field: Column name
867
+ :param predefined_regex: regex pattern
868
+ :param filters: filter condition
869
+ :return: Tuple containing valid count and total count (or percentage)
870
+ """
871
+
872
+ qualified_table_name = self.qualified_table_name(table)
873
+ field = self.quote_column(field)
874
+
875
+ temp_table_suffix = f"{int(time.time())}_{random.randint(1000, 9999)}"
876
+ extracted_table = f"#extracted_timestamps_{temp_table_suffix}"
877
+ validated_table = f"#validated_timestamps_{temp_table_suffix}"
878
+
879
+ if predefined_regex == "timestamp_iso":
880
+ filters_clause = f"WHERE {filters}" if filters else ""
881
+
882
+ query = f"""
883
+ -- Extract timestamp components
884
+ SELECT
885
+ {field},
886
+ LEFT(CONVERT(VARCHAR, {field}, 120), 4) AS year, -- Extract year
887
+ SUBSTRING(CONVERT(VARCHAR, {field}, 120), 6, 2) AS month, -- Extract month
888
+ SUBSTRING(CONVERT(VARCHAR, {field}, 120), 9, 2) AS day, -- Extract day
889
+ SUBSTRING(CONVERT(VARCHAR, {field}, 120), 12, 2) AS hour, -- Extract hour
890
+ SUBSTRING(CONVERT(VARCHAR, {field}, 120), 15, 2) AS minute, -- Extract minute
891
+ SUBSTRING(CONVERT(VARCHAR, {field}, 120), 18, 2) AS second -- Extract second
892
+ INTO {extracted_table}
893
+ FROM {qualified_table_name}
894
+ {filters_clause};
895
+
896
+ -- Validate timestamps and calculate the is_valid flag
897
+ SELECT
898
+ {field},
899
+ CASE
900
+ WHEN
901
+ -- Validate year, month, and day formats
902
+ year LIKE '[0-9][0-9][0-9][0-9]' AND
903
+ month LIKE '[0-1][0-9]' AND month BETWEEN '01' AND '12' AND
904
+ day LIKE '[0-3][0-9]' AND day BETWEEN '01' AND
905
+ CASE
906
+ -- Check for days in each month
907
+ WHEN month IN ('01', '03', '05', '07', '08', '10', '12') THEN '31'
908
+ WHEN month IN ('04', '06', '09', '11') THEN '30'
909
+ WHEN month = '02' THEN
910
+ CASE
911
+ -- Check for leap years
912
+ WHEN (CAST(year AS INT) % 400 = 0 OR (CAST(year AS INT) % 100 != 0 AND CAST(year AS INT) % 4 = 0)) THEN '29'
913
+ ELSE '28'
914
+ END
915
+ ELSE '00' -- Invalid month
916
+ END AND
917
+ -- Validate time components
918
+ hour LIKE '[0-2][0-9]' AND hour BETWEEN '00' AND '23' AND
919
+ minute LIKE '[0-5][0-9]' AND
920
+ second LIKE '[0-5][0-9]'
921
+ THEN 1
922
+ ELSE 0
923
+ END AS is_valid
924
+ INTO {validated_table}
925
+ FROM {extracted_table};
926
+
927
+ -- Get the counts
928
+ SELECT
929
+ SUM(is_valid) AS valid_count,
930
+ COUNT(*) AS total_count
931
+ FROM {validated_table};
932
+ """
933
+ try:
934
+ result = self.fetchone(query)
935
+ valid_count = result[0]
936
+ total_count = result[1]
937
+
938
+ return valid_count, total_count
939
+ except Exception as e:
940
+ logger.error(f"Error occurred: {e}")
941
+ return 0, 0
942
+ else:
943
+ raise ValueError(f"Unknown predefined regex pattern: {predefined_regex}")
944
+
945
+ def query_timestamp_not_in_future_metric(
946
+ self,
947
+ table: str,
948
+ field: str,
949
+ predefined_regex: str,
950
+ filters: str = None,
951
+ ) -> Union[float, int]:
952
+ """
953
+ :param table: Table name
954
+ :param field: Column name
955
+ :param predefined_regex: regex pattern
956
+ :param filters: filter condition
957
+ :return: Count of valid timestamps not in the future and total count or percentage
958
+ """
959
+ qualified_table_name = self.qualified_table_name(table)
960
+ field = self.quote_column(field)
961
+
962
+ if predefined_regex != "timestamp_iso":
963
+ raise ValueError(f"Unknown predefined regex pattern: {predefined_regex}")
964
+
965
+ filters_clause = f"WHERE {filters}" if filters else ""
966
+
967
+ query = f"""
968
+ SELECT
969
+ SUM(CASE
970
+ WHEN
971
+ -- Validate year, month, day
972
+ DATEPART(yy, {field}) BETWEEN 1 AND 9999 AND
973
+ DATEPART(mm, {field}) BETWEEN 1 AND 12 AND
974
+ DATEPART(dd, {field}) BETWEEN 1 AND
975
+ CASE
976
+ WHEN DATEPART(mm, {field}) IN (1, 3, 5, 7, 8, 10, 12) THEN 31
977
+ WHEN DATEPART(mm, {field}) IN (4, 6, 9, 11) THEN 30
978
+ WHEN DATEPART(mm, {field}) = 2 THEN
979
+ CASE
980
+ WHEN DATEPART(yy, {field}) % 400 = 0 OR
981
+ (DATEPART(yy, {field}) % 4 = 0 AND DATEPART(yy, {field}) % 100 != 0) THEN 29
982
+ ELSE 28
983
+ END
984
+ ELSE 0
985
+ END AND
986
+ -- Validate hour, minute, second
987
+ DATEPART(hh, {field}) BETWEEN 0 AND 23 AND
988
+ DATEPART(mi, {field}) BETWEEN 0 AND 59 AND
989
+ DATEPART(ss, {field}) BETWEEN 0 AND 59 AND
990
+ -- Ensure timestamp is not in the future
991
+ {field} <= GETDATE()
992
+ THEN 1
993
+ ELSE 0
994
+ END) AS valid_count,
995
+ COUNT(*) AS total_count
996
+ FROM {qualified_table_name}
997
+ {filters_clause}
998
+ """
999
+
1000
+ try:
1001
+ result = self.fetchone(query)
1002
+ valid_count = result[0]
1003
+ total_count = result[1]
1004
+
1005
+ return valid_count, total_count
1006
+ except Exception as e:
1007
+ logger.error(f"Error occurred: {e}")
1008
+ return 0, 0
1009
+
1010
+ def query_timestamp_date_not_in_future_metric(
1011
+ self,
1012
+ table: str,
1013
+ field: str,
1014
+ predefined_regex: str,
1015
+ filters: str = None,
1016
+ ) -> Union[float, int]:
1017
+ """
1018
+ :param table: Table name
1019
+ :param field: Column name
1020
+ :param predefined_regex: The regex pattern to use (e.g., "timestamp_iso")
1021
+ :param filters: Optional filter condition
1022
+ :return: Tuple containing count of valid dates not in the future and total count
1023
+ """
1024
+ qualified_table_name = self.qualified_table_name(table)
1025
+ field = self.quote_column(field)
1026
+ filters_clause = f"WHERE {filters}" if filters else ""
1027
+
1028
+ query = f"""
1029
+ SELECT
1030
+ SUM(CASE
1031
+ WHEN
1032
+ -- Validate year, month, and day
1033
+ DATEPART(yy, {field}) BETWEEN 1 AND 9999 AND
1034
+ DATEPART(mm, {field}) BETWEEN 1 AND 12 AND
1035
+ DATEPART(dd, {field}) BETWEEN 1 AND
1036
+ CASE
1037
+ WHEN DATEPART(mm, {field}) IN (1, 3, 5, 7, 8, 10, 12) THEN 31
1038
+ WHEN DATEPART(mm, {field}) IN (4, 6, 9, 11) THEN 30
1039
+ WHEN DATEPART(mm, {field}) = 2 THEN
1040
+ CASE
1041
+ WHEN DATEPART(yy, {field}) % 400 = 0 OR
1042
+ (DATEPART(yy, {field}) % 4 = 0 AND DATEPART(yy, {field}) % 100 != 0) THEN 29
1043
+ ELSE 28
1044
+ END
1045
+ ELSE 0
1046
+ END AND
1047
+ -- Validate hour, minute, and second
1048
+ DATEPART(hh, {field}) BETWEEN 0 AND 23 AND
1049
+ DATEPART(mi, {field}) BETWEEN 0 AND 59 AND
1050
+ DATEPART(ss, {field}) BETWEEN 0 AND 59 AND
1051
+ -- Ensure the timestamp is not in the future
1052
+ {field} <= GETDATE()
1053
+ THEN 1
1054
+ ELSE 0
1055
+ END) AS valid_count,
1056
+ COUNT(*) AS total_count
1057
+ FROM {qualified_table_name}
1058
+ {filters_clause}
1059
+ """
1060
+
1061
+ try:
1062
+ result = self.fetchone(query)
1063
+ valid_count = result[0]
1064
+ total_count = result[1]
1065
+
1066
+ return valid_count, total_count
1067
+ except Exception as e:
1068
+ logger.error(f"Error occurred: {e}")
1069
+ return 0, 0