dcs-sdk 1.6.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. data_diff/__init__.py +221 -0
  2. data_diff/__main__.py +517 -0
  3. data_diff/abcs/__init__.py +13 -0
  4. data_diff/abcs/compiler.py +27 -0
  5. data_diff/abcs/database_types.py +402 -0
  6. data_diff/config.py +141 -0
  7. data_diff/databases/__init__.py +38 -0
  8. data_diff/databases/_connect.py +323 -0
  9. data_diff/databases/base.py +1417 -0
  10. data_diff/databases/bigquery.py +376 -0
  11. data_diff/databases/clickhouse.py +217 -0
  12. data_diff/databases/databricks.py +262 -0
  13. data_diff/databases/duckdb.py +207 -0
  14. data_diff/databases/mssql.py +343 -0
  15. data_diff/databases/mysql.py +189 -0
  16. data_diff/databases/oracle.py +238 -0
  17. data_diff/databases/postgresql.py +293 -0
  18. data_diff/databases/presto.py +222 -0
  19. data_diff/databases/redis.py +93 -0
  20. data_diff/databases/redshift.py +233 -0
  21. data_diff/databases/snowflake.py +222 -0
  22. data_diff/databases/sybase.py +720 -0
  23. data_diff/databases/trino.py +73 -0
  24. data_diff/databases/vertica.py +174 -0
  25. data_diff/diff_tables.py +489 -0
  26. data_diff/errors.py +17 -0
  27. data_diff/format.py +369 -0
  28. data_diff/hashdiff_tables.py +1026 -0
  29. data_diff/info_tree.py +76 -0
  30. data_diff/joindiff_tables.py +434 -0
  31. data_diff/lexicographic_space.py +253 -0
  32. data_diff/parse_time.py +88 -0
  33. data_diff/py.typed +0 -0
  34. data_diff/queries/__init__.py +13 -0
  35. data_diff/queries/api.py +213 -0
  36. data_diff/queries/ast_classes.py +811 -0
  37. data_diff/queries/base.py +38 -0
  38. data_diff/queries/extras.py +43 -0
  39. data_diff/query_utils.py +70 -0
  40. data_diff/schema.py +67 -0
  41. data_diff/table_segment.py +583 -0
  42. data_diff/thread_utils.py +112 -0
  43. data_diff/utils.py +1022 -0
  44. data_diff/version.py +15 -0
  45. dcs_core/__init__.py +13 -0
  46. dcs_core/__main__.py +17 -0
  47. dcs_core/__version__.py +15 -0
  48. dcs_core/cli/__init__.py +13 -0
  49. dcs_core/cli/cli.py +165 -0
  50. dcs_core/core/__init__.py +19 -0
  51. dcs_core/core/common/__init__.py +13 -0
  52. dcs_core/core/common/errors.py +50 -0
  53. dcs_core/core/common/models/__init__.py +13 -0
  54. dcs_core/core/common/models/configuration.py +284 -0
  55. dcs_core/core/common/models/dashboard.py +24 -0
  56. dcs_core/core/common/models/data_source_resource.py +75 -0
  57. dcs_core/core/common/models/metric.py +160 -0
  58. dcs_core/core/common/models/profile.py +75 -0
  59. dcs_core/core/common/models/validation.py +216 -0
  60. dcs_core/core/common/models/widget.py +44 -0
  61. dcs_core/core/configuration/__init__.py +13 -0
  62. dcs_core/core/configuration/config_loader.py +139 -0
  63. dcs_core/core/configuration/configuration_parser.py +262 -0
  64. dcs_core/core/configuration/configuration_parser_arc.py +328 -0
  65. dcs_core/core/datasource/__init__.py +13 -0
  66. dcs_core/core/datasource/base.py +62 -0
  67. dcs_core/core/datasource/manager.py +112 -0
  68. dcs_core/core/datasource/search_datasource.py +421 -0
  69. dcs_core/core/datasource/sql_datasource.py +1094 -0
  70. dcs_core/core/inspect.py +163 -0
  71. dcs_core/core/logger/__init__.py +13 -0
  72. dcs_core/core/logger/base.py +32 -0
  73. dcs_core/core/logger/default_logger.py +94 -0
  74. dcs_core/core/metric/__init__.py +13 -0
  75. dcs_core/core/metric/base.py +220 -0
  76. dcs_core/core/metric/combined_metric.py +98 -0
  77. dcs_core/core/metric/custom_metric.py +34 -0
  78. dcs_core/core/metric/manager.py +137 -0
  79. dcs_core/core/metric/numeric_metric.py +403 -0
  80. dcs_core/core/metric/reliability_metric.py +90 -0
  81. dcs_core/core/profiling/__init__.py +13 -0
  82. dcs_core/core/profiling/datasource_profiling.py +136 -0
  83. dcs_core/core/profiling/numeric_field_profiling.py +72 -0
  84. dcs_core/core/profiling/text_field_profiling.py +67 -0
  85. dcs_core/core/repository/__init__.py +13 -0
  86. dcs_core/core/repository/metric_repository.py +77 -0
  87. dcs_core/core/utils/__init__.py +13 -0
  88. dcs_core/core/utils/log.py +29 -0
  89. dcs_core/core/utils/tracking.py +105 -0
  90. dcs_core/core/utils/utils.py +44 -0
  91. dcs_core/core/validation/__init__.py +13 -0
  92. dcs_core/core/validation/base.py +230 -0
  93. dcs_core/core/validation/completeness_validation.py +153 -0
  94. dcs_core/core/validation/custom_query_validation.py +24 -0
  95. dcs_core/core/validation/manager.py +282 -0
  96. dcs_core/core/validation/numeric_validation.py +276 -0
  97. dcs_core/core/validation/reliability_validation.py +91 -0
  98. dcs_core/core/validation/uniqueness_validation.py +61 -0
  99. dcs_core/core/validation/validity_validation.py +738 -0
  100. dcs_core/integrations/__init__.py +13 -0
  101. dcs_core/integrations/databases/__init__.py +13 -0
  102. dcs_core/integrations/databases/bigquery.py +187 -0
  103. dcs_core/integrations/databases/databricks.py +51 -0
  104. dcs_core/integrations/databases/db2.py +652 -0
  105. dcs_core/integrations/databases/elasticsearch.py +61 -0
  106. dcs_core/integrations/databases/mssql.py +829 -0
  107. dcs_core/integrations/databases/mysql.py +409 -0
  108. dcs_core/integrations/databases/opensearch.py +64 -0
  109. dcs_core/integrations/databases/oracle.py +719 -0
  110. dcs_core/integrations/databases/postgres.py +482 -0
  111. dcs_core/integrations/databases/redshift.py +53 -0
  112. dcs_core/integrations/databases/snowflake.py +48 -0
  113. dcs_core/integrations/databases/spark_df.py +111 -0
  114. dcs_core/integrations/databases/sybase.py +1069 -0
  115. dcs_core/integrations/storage/__init__.py +13 -0
  116. dcs_core/integrations/storage/local_file.py +149 -0
  117. dcs_core/integrations/utils/__init__.py +13 -0
  118. dcs_core/integrations/utils/utils.py +36 -0
  119. dcs_core/report/__init__.py +13 -0
  120. dcs_core/report/dashboard.py +211 -0
  121. dcs_core/report/models.py +88 -0
  122. dcs_core/report/static/assets/fonts/DMSans-Bold.ttf +0 -0
  123. dcs_core/report/static/assets/fonts/DMSans-Medium.ttf +0 -0
  124. dcs_core/report/static/assets/fonts/DMSans-Regular.ttf +0 -0
  125. dcs_core/report/static/assets/fonts/DMSans-SemiBold.ttf +0 -0
  126. dcs_core/report/static/assets/images/docs.svg +6 -0
  127. dcs_core/report/static/assets/images/github.svg +4 -0
  128. dcs_core/report/static/assets/images/logo.svg +7 -0
  129. dcs_core/report/static/assets/images/slack.svg +13 -0
  130. dcs_core/report/static/index.js +2 -0
  131. dcs_core/report/static/index.js.LICENSE.txt +3971 -0
  132. dcs_sdk/__init__.py +13 -0
  133. dcs_sdk/__main__.py +18 -0
  134. dcs_sdk/__version__.py +15 -0
  135. dcs_sdk/cli/__init__.py +13 -0
  136. dcs_sdk/cli/cli.py +163 -0
  137. dcs_sdk/sdk/__init__.py +58 -0
  138. dcs_sdk/sdk/config/__init__.py +13 -0
  139. dcs_sdk/sdk/config/config_loader.py +491 -0
  140. dcs_sdk/sdk/data_diff/__init__.py +13 -0
  141. dcs_sdk/sdk/data_diff/data_differ.py +821 -0
  142. dcs_sdk/sdk/rules/__init__.py +15 -0
  143. dcs_sdk/sdk/rules/rules_mappping.py +31 -0
  144. dcs_sdk/sdk/rules/rules_repository.py +214 -0
  145. dcs_sdk/sdk/rules/schema_rules.py +65 -0
  146. dcs_sdk/sdk/utils/__init__.py +13 -0
  147. dcs_sdk/sdk/utils/serializer.py +25 -0
  148. dcs_sdk/sdk/utils/similarity_score/__init__.py +13 -0
  149. dcs_sdk/sdk/utils/similarity_score/base_provider.py +153 -0
  150. dcs_sdk/sdk/utils/similarity_score/cosine_similarity_provider.py +39 -0
  151. dcs_sdk/sdk/utils/similarity_score/jaccard_provider.py +24 -0
  152. dcs_sdk/sdk/utils/similarity_score/levenshtein_distance_provider.py +31 -0
  153. dcs_sdk/sdk/utils/table.py +475 -0
  154. dcs_sdk/sdk/utils/themes.py +40 -0
  155. dcs_sdk/sdk/utils/utils.py +349 -0
  156. dcs_sdk-1.6.5.dist-info/METADATA +150 -0
  157. dcs_sdk-1.6.5.dist-info/RECORD +159 -0
  158. dcs_sdk-1.6.5.dist-info/WHEEL +4 -0
  159. dcs_sdk-1.6.5.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,720 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import re
16
+ import time
17
+ from typing import Any, ClassVar, Dict, List, Optional, Tuple, Type
18
+
19
+ import attrs
20
+ from loguru import logger
21
+
22
+ from data_diff.abcs.database_types import (
23
+ JSON,
24
+ Boolean,
25
+ ColType,
26
+ ColType_UUID,
27
+ Date,
28
+ Datetime,
29
+ DbPath,
30
+ DbTime,
31
+ Decimal,
32
+ Float,
33
+ FractionalType,
34
+ Integer,
35
+ Native_UUID,
36
+ NumericType,
37
+ String_UUID,
38
+ TemporalType,
39
+ Text,
40
+ Time,
41
+ Timestamp,
42
+ TimestampTZ,
43
+ )
44
+ from data_diff.databases.base import (
45
+ CHECKSUM_HEXDIGITS,
46
+ CHECKSUM_OFFSET,
47
+ BaseDialect,
48
+ ConnectError,
49
+ QueryError,
50
+ QueryResult,
51
+ ThreadedDatabase,
52
+ import_helper,
53
+ )
54
+ from data_diff.schema import RawColumnInfo
55
+
56
+
57
+ @import_helper("sybase")
58
+ def import_sybase():
59
+ import pyodbc
60
+
61
+ return pyodbc
62
+
63
+
64
+ def generate_primes(limit: int) -> List[int]:
65
+ sieve = [True] * (limit + 1)
66
+ sieve[0:2] = [False, False]
67
+ for i in range(2, int(limit**0.5) + 1):
68
+ if sieve[i]:
69
+ sieve[i * i : limit + 1 : i] = [False] * len(range(i * i, limit + 1, i))
70
+ return [i for i, is_prime in enumerate(sieve) if is_prime]
71
+
72
+
73
+ @attrs.define(frozen=False)
74
+ class Dialect(BaseDialect):
75
+ name = "Sybase"
76
+ ROUNDS_ON_PREC_LOSS = True
77
+ SUPPORTS_PRIMARY_KEY: ClassVar[bool] = True
78
+ SUPPORTS_INDEXES = True
79
+ primes: List[int] = attrs.Factory(lambda: generate_primes(1000))
80
+ column_prime_map: Dict[str, int] = attrs.Factory(dict)
81
+ TYPE_CLASSES = {
82
+ # Timestamps
83
+ "datetimeoffset": TimestampTZ,
84
+ "Datetimeoffset": TimestampTZ,
85
+ "datetime2": Timestamp,
86
+ "smalldatetime": Datetime,
87
+ "datetime": Datetime,
88
+ "timestamp": Datetime,
89
+ "date": Date,
90
+ "time": Time,
91
+ "timestamp with time zone": TimestampTZ,
92
+ # Numbers
93
+ "float": Float,
94
+ "real": Float,
95
+ "decimal": Decimal,
96
+ "money": Decimal,
97
+ "smallmoney": Decimal,
98
+ "numeric": Decimal,
99
+ # int
100
+ "int": Integer,
101
+ "bigint": Integer,
102
+ "tinyint": Integer,
103
+ "smallint": Integer,
104
+ "integer": Integer,
105
+ "unsigned big int": Integer,
106
+ "unsigned int": Integer,
107
+ "unsigned small int": Integer,
108
+ # Text
109
+ "varchar": Text,
110
+ "char": Text,
111
+ "text": Text,
112
+ "ntext": Text, # ASE only
113
+ "nvarchar": Text, # ASE only
114
+ "nchar": Text, # ASE only
115
+ "binary": Text,
116
+ "varbinary": Text,
117
+ "xml": Text,
118
+ # UUID
119
+ "uniqueidentifier": Native_UUID,
120
+ # Bool
121
+ "bit": Boolean,
122
+ "varbit": Boolean,
123
+ # JSON
124
+ "json": JSON,
125
+ }
126
+
127
+ def quote(self, s: str, is_table: bool = False) -> str:
128
+ if s in self.TABLE_NAMES and self.default_schema and is_table:
129
+ return f"[{self.default_schema}].[{s}]"
130
+ return f"[{s}]"
131
+
132
+ def set_timezone_to_utc(self) -> str:
133
+ raise NotImplementedError("Sybase does not support a session timezone setting.")
134
+
135
+ def current_timestamp(self) -> str:
136
+ return "GETDATE()"
137
+
138
+ def current_database(self) -> str:
139
+ return "DB_NAME()"
140
+
141
+ def current_schema(self) -> str:
142
+ return """default_schema_name
143
+ FROM sys.database_principals
144
+ WHERE name = CURRENT_USER"""
145
+
146
+ def to_string(self, s: str, coltype: str = None) -> str:
147
+ s_temp = re.sub(r'["\[\]`]', "", s)
148
+ raw_col_info = self.get_column_raw_info(s_temp)
149
+ ch_len = (raw_col_info and raw_col_info.character_maximum_length) or None
150
+ if not ch_len:
151
+ ch_len = 2500
152
+ ch_len = max(ch_len, 2500)
153
+ if self.sybase_driver_type.is_iq or self.query_config_for_free_tds["freetds_query_chosen"]:
154
+ return f"CAST({s} AS VARCHAR({ch_len}))"
155
+ if raw_col_info and raw_col_info.data_type in ["nvarchar", "nchar", "ntext"]:
156
+ return f"CAST({s} AS NVARCHAR({ch_len}))"
157
+ return f"CAST({s} AS VARCHAR({ch_len}))"
158
+
159
+ def type_repr(self, t) -> str:
160
+ try:
161
+ if self.sybase_driver_type.is_iq or self.query_config_for_free_tds["freetds_query_chosen"]:
162
+ return {bool: "bit", str: "varchar(2500)"}[t]
163
+ return {bool: "bit", str: "nvarchar(5000)"}[t]
164
+ except KeyError:
165
+ return super().type_repr(t)
166
+
167
+ def random(self) -> str:
168
+ return "rand()"
169
+
170
+ def is_distinct_from(self, a: str, b: str) -> str:
171
+ return f"(({a}<>{b} OR {a} IS NULL OR {b} IS NULL) AND NOT({a} IS NULL AND {b} IS NULL))"
172
+
173
+ def limit_select(
174
+ self,
175
+ select_query: str,
176
+ offset: Optional[int] = None,
177
+ limit: Optional[int] = None,
178
+ has_order_by: Optional[bool] = None,
179
+ ) -> str:
180
+ # import re
181
+
182
+ # def safe_trim(match):
183
+ # column_name = match.group(1)
184
+ # if self.sybase_driver_type.is_iq or self.query_config_for_free_tds["freetds_query_chosen"]:
185
+ # return f"TRIM(CAST({column_name} AS VARCHAR(2500)))"
186
+ # return f"TRIM(CAST({column_name} AS NVARCHAR(5000)))"
187
+ # select_query = re.sub(r"TRIM\(\[([\w]+)\]\)", safe_trim, select_query)
188
+ # select_query = re.sub(r"TRIM\(([\w]+)\)", safe_trim, select_query)
189
+
190
+ if limit is not None:
191
+ select_query = select_query.replace("SELECT", f"SELECT TOP {limit}", 1)
192
+
193
+ # if not has_order_by:
194
+ # select_query += " ORDER BY RAND()"
195
+ return select_query
196
+
197
+ def constant_values(self, rows) -> str:
198
+ values = ", ".join("(%s)" % ", ".join(self._constant_value(v) for v in row) for row in rows)
199
+ return f"VALUES {values}"
200
+
201
+ def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
202
+ varchar_type = (
203
+ "VARCHAR"
204
+ if (self.sybase_driver_type.is_iq or self.query_config_for_free_tds["freetds_query_chosen"])
205
+ else "NVARCHAR"
206
+ )
207
+
208
+ # Handle Date type - return YYYY-MM-DD format
209
+ if isinstance(coltype, Date):
210
+ return (
211
+ f"CASE WHEN {value} IS NULL THEN NULL "
212
+ f"ELSE "
213
+ f"CAST(DATEPART(YEAR, {value}) AS CHAR(4)) + '-' + "
214
+ f"RIGHT('0' + CAST(DATEPART(MONTH, {value}) AS VARCHAR(2)), 2) + '-' + "
215
+ f"RIGHT('0' + CAST(DATEPART(DAY, {value}) AS VARCHAR(2)), 2) "
216
+ f"END"
217
+ )
218
+ if isinstance(coltype, Datetime):
219
+ if coltype.precision == 4:
220
+ return f"CAST({value} AS {varchar_type}(100))"
221
+ if coltype.precision > 0:
222
+ return (
223
+ f"CASE WHEN {value} IS NULL THEN NULL "
224
+ f"ELSE "
225
+ f"CAST(DATEPART(YEAR, {value}) AS CHAR(4)) + '-' + "
226
+ f"RIGHT('0' + CAST(DATEPART(MONTH, {value}) AS VARCHAR(2)), 2) + '-' + "
227
+ f"RIGHT('0' + CAST(DATEPART(DAY, {value}) AS VARCHAR(2)), 2) + ' ' + "
228
+ f"RIGHT('0' + CAST(DATEPART(HOUR, {value}) AS VARCHAR(2)), 2) + ':' + "
229
+ f"RIGHT('0' + CAST(DATEPART(MINUTE, {value}) AS VARCHAR(2)), 2) + ':' + "
230
+ f"RIGHT('0' + CAST(DATEPART(SECOND, {value}) AS VARCHAR(2)), 2) + '.' + "
231
+ f"RIGHT('00' + CAST(DATEPART(MILLISECOND, {value}) AS VARCHAR(3)), 3) "
232
+ f"END"
233
+ )
234
+ return (
235
+ f"CASE WHEN {value} IS NULL THEN NULL "
236
+ f"ELSE "
237
+ f"CAST(DATEPART(YEAR, {value}) AS CHAR(4)) + '-' + "
238
+ f"RIGHT('0' + CAST(DATEPART(MONTH, {value}) AS VARCHAR(2)), 2) + '-' + "
239
+ f"RIGHT('0' + CAST(DATEPART(DAY, {value}) AS VARCHAR(2)), 2) + ' ' + "
240
+ f"RIGHT('0' + CAST(DATEPART(HOUR, {value}) AS VARCHAR(2)), 2) + ':' + "
241
+ f"RIGHT('0' + CAST(DATEPART(MINUTE, {value}) AS VARCHAR(2)), 2) + ':' + "
242
+ f"RIGHT('0' + CAST(DATEPART(SECOND, {value}) AS VARCHAR(2)), 2) "
243
+ f"END"
244
+ )
245
+ if self.sybase_driver_type.is_iq or self.query_config_for_free_tds["freetds_query_chosen"]:
246
+ return f"CAST({value} AS VARCHAR(100))"
247
+ return f"CAST({value} AS NVARCHAR(100))"
248
+
249
+ def timestamp_value(self, t: DbTime) -> str:
250
+ """Provide SQL for the given timestamp value - match normalize_timestamp precision"""
251
+ # Use consistent formatting that matches what normalize_timestamp produces
252
+ # This ensures exact equality comparisons work correctly
253
+ formatted = t.strftime("%Y-%m-%d %H:%M:%S")
254
+ if t.microsecond > 0:
255
+ # Always use 3-digit milliseconds to match normalize_timestamp output
256
+ # which uses DATEPART(MILLISECOND, value) giving 3 digits
257
+ milliseconds = t.microsecond // 1000
258
+ formatted += f".{milliseconds:03d}"
259
+ return f"'{formatted}'"
260
+
261
+ def timestamp_equality_condition(self, column: str, timestamp_value: str) -> str:
262
+ """Generate a timestamp equality condition that handles precision mismatches"""
263
+ # For Sybase, we need to handle the case where stored values have microsecond precision
264
+ # but our query values only have millisecond precision
265
+
266
+ # Extract the timestamp without quotes
267
+ clean_value = timestamp_value.strip("'")
268
+
269
+ # If the value has fractional seconds, create a range query
270
+ if "." in clean_value:
271
+ # For a value like '2020-01-01 00:02:33.951'
272
+ # We want to match anything from .951000 to .951999 microseconds
273
+ base_value = clean_value
274
+ next_ms_value = self._increment_millisecond(clean_value)
275
+
276
+ return f"({column} >= '{base_value}' AND {column} < '{next_ms_value}')"
277
+ else:
278
+ # No fractional seconds, use exact match
279
+ return f"{column} = '{clean_value}'"
280
+
281
+ def _increment_millisecond(self, timestamp_str: str) -> str:
282
+ """Increment the millisecond part of a timestamp string"""
283
+ from datetime import datetime, timedelta
284
+
285
+ try:
286
+ # Parse the timestamp
287
+ if "." in timestamp_str:
288
+ dt = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S.%f")
289
+ else:
290
+ dt = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S")
291
+
292
+ # Add 1 millisecond
293
+ dt_incremented = dt + timedelta(milliseconds=1)
294
+
295
+ # Format back to string with millisecond precision
296
+ return dt_incremented.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
297
+ except ValueError:
298
+ # Fallback to original value if parsing fails
299
+ return timestamp_str
300
+
301
+ def normalize_number(self, value: str, coltype: FractionalType) -> str:
302
+ # scale = getattr(coltype, "scale", 0) or 0
303
+ precision = getattr(coltype, "precision", 0) or 0
304
+ return self.to_string(f"CAST({value} AS DECIMAL(38, {precision}))")
305
+
306
+ # def md5_as_int(self, s: str) -> str:
307
+ # """Returns an MD5 hash of the input string as an integer for Sybase IQ."""
308
+ # return f"CAST(HEXTOINT(LEFT(CAST(HASH({s}, 'MD5') AS VARCHAR(32)), 8)) AS BIGINT) - 140737488355327"
309
+
310
+ # def md5_as_int(self, s: str) -> str:
311
+ # """Returns a hash-like integer based on ASCII values of the input string for Sybase."""
312
+ # # Create a simple hash using ASCII values and string length
313
+ # # This generates a pseudo-hash by combining ASCII values with position weights
314
+ # return (
315
+ # f"CAST(("
316
+ # f" (LEN({s}) * 31) + " # Length component
317
+ # f" (ASCII(LEFT({s}, 1)) * 97) + " # First character
318
+ # f" (CASE WHEN LEN({s}) > 1 THEN ASCII(SUBSTRING({s}, 2, 1)) * 53 ELSE 0 END) + " # Second character
319
+ # f" (CASE WHEN LEN({s}) > 2 THEN ASCII(SUBSTRING({s}, 3, 1)) * 29 ELSE 0 END) + " # Third character
320
+ # f" (CASE WHEN LEN({s}) > 3 THEN ASCII(RIGHT({s}, 1)) * 17 ELSE 0 END)" # Last character
321
+ # f") % 2147483647 AS BIGINT) - 1073741823" # Modulo to keep in range and shift
322
+ # )
323
+
324
+ # def md5_as_hex(self, s: str) -> str:
325
+ # return f"HashBytes('MD5', {s})"
326
+
327
+ # def md5_as_hex(self, s: str) -> str:
328
+ # """Returns a hex representation based on ASCII values instead of MD5."""
329
+ # # Create a hex-like string using ASCII values
330
+ # return (
331
+ # f"RIGHT('0000000' + CONVERT(VARCHAR(8), "
332
+ # f" (ASCII(LEFT({s}, 1)) * 256 + "
333
+ # f" CASE WHEN LEN({s}) > 1 THEN ASCII(SUBSTRING({s}, 2, 1)) ELSE 0 END) % 65536"
334
+ # f"), 16), 8)"
335
+ # )
336
+
337
+ def get_unique_prime_for_column(self, column_name: str) -> int:
338
+ if column_name in self.column_prime_map:
339
+ return self.column_prime_map[column_name]
340
+ used_primes = set(self.column_prime_map.values())
341
+ for p in self.primes:
342
+ if p > 100 and p not in used_primes:
343
+ self.column_prime_map[column_name] = p
344
+ return p
345
+ raise ValueError("Ran out of unique primes")
346
+
347
+ def md5_as_int(self, s: str) -> str:
348
+ if self.sybase_driver_type.is_ase or self.query_config_for_free_tds["ase_query_chosen"]:
349
+ return f"CAST(HEXTOINT(LEFT(CAST(HASH({s}, 'MD5') AS VARCHAR(32)), 8)) AS BIGINT) % 2147483647"
350
+ base_prime = self.get_unique_prime_for_column(s)
351
+ separator = " +\n "
352
+ parts = [f"LENGTH(COALESCE({s}, '')) * {base_prime}"]
353
+
354
+ for i in range(15):
355
+ parts.append(f"COALESCE(ASCII(SUBSTRING(COALESCE({s}, ''), {i + 1}, 1)), 0) * {self.primes[i]}")
356
+
357
+ for i, pos in enumerate([20, 25, 30, 35, 40]):
358
+ parts.append(
359
+ f"(CASE WHEN LENGTH(COALESCE({s}, '')) >= {pos} "
360
+ f"THEN COALESCE(ASCII(SUBSTRING(COALESCE({s}, ''), {pos}, 1)), 0) * {self.primes[15 + i]} ELSE 0 END)"
361
+ )
362
+
363
+ parts.append(
364
+ f"(CASE WHEN LENGTH(COALESCE({s}, '')) > 15 "
365
+ f"THEN COALESCE(ASCII(SUBSTRING(COALESCE({s}, ''), LENGTH(COALESCE({s}, '')), 1)), 0) * {self.primes[20]} ELSE 0 END)"
366
+ )
367
+
368
+ return f"CAST((\n {separator.join(parts)}\n ) % 2147483647 AS BIGINT)"
369
+
370
+ def md5_as_hex(self, s: str) -> str:
371
+ if self.sybase_driver_type.is_ase or self.query_config_for_free_tds["ase_query_chosen"]:
372
+ return f"HashBytes('MD5', {s})"
373
+ base_prime = self.get_unique_prime_for_column(s)
374
+ separator = " +\n "
375
+ parts = [f"LENGTH(COALESCE({s}, '')) * {base_prime}"]
376
+
377
+ for i in range(15):
378
+ parts.append(f"COALESCE(ASCII(SUBSTRING(COALESCE({s}, ''), {i + 1}, 1)), 0) * {self.primes[i]}")
379
+
380
+ for i, pos in enumerate([20, 25, 30, 35, 40]):
381
+ parts.append(
382
+ f"(CASE WHEN LENGTH(COALESCE({s}, '')) >= {pos} "
383
+ f"THEN COALESCE(ASCII(SUBSTRING(COALESCE({s}, ''), {pos}, 1)), 0) * {self.primes[15 + i]} ELSE 0 END)"
384
+ )
385
+
386
+ parts.append(
387
+ f"(CASE WHEN LENGTH(COALESCE({s}, '')) > 15 "
388
+ f"THEN COALESCE(ASCII(SUBSTRING(COALESCE({s}, ''), LENGTH(COALESCE({s}, '')), 1)), 0) * {self.primes[20]} ELSE 0 END)"
389
+ )
390
+
391
+ return (
392
+ f"RIGHT('00000000' + CONVERT(VARCHAR(8), (\n {separator.join(parts)}\n ) % 16777215), 16), 8)"
393
+ )
394
+
395
+ def concat(self, items: List[str]) -> str:
396
+ """Provide SQL for concatenating multiple columns into a string for Sybase IQ."""
397
+ assert len(items) > 1, "At least two columns are required for concatenation."
398
+ return " || ".join(items)
399
+
400
+ def normalize_uuid(self, value: str, coltype: ColType_UUID) -> str:
401
+ s_temp = re.sub(r'["\[\]`]', "", value)
402
+ raw_col_info = self.get_column_raw_info(s_temp)
403
+ ch_len = (raw_col_info and raw_col_info.character_maximum_length) or None
404
+ if not ch_len:
405
+ ch_len = 2500
406
+ ch_len = max(ch_len, 2500)
407
+ if isinstance(coltype, String_UUID):
408
+ if self.sybase_driver_type.is_iq or self.query_config_for_free_tds["freetds_query_chosen"]:
409
+ return f"CAST({value} AS VARCHAR({ch_len}))" # IQ: Match column length
410
+ return f"CAST({value} AS NVARCHAR({ch_len}))" # ASE: Match column length
411
+ if self.sybase_driver_type.is_iq or self.query_config_for_free_tds["freetds_query_chosen"]:
412
+ return f"CONVERT(VARCHAR({ch_len}), {value})"
413
+ return f"CONVERT(NVARCHAR({ch_len}), {value})"
414
+
415
+ def parse_type(self, table_path: DbPath, info: RawColumnInfo) -> ColType:
416
+ """Override base parse_type to handle datetime columns that should be treated as dates"""
417
+
418
+ # Check if this is a datetime column that should be treated as a date
419
+ if info.data_type == "datetime":
420
+ # Sybase IQ stores DATE columns as datetime with precision=4
421
+ # and DATETIME columns as datetime with precision=8
422
+ if info.datetime_precision == 4:
423
+ return Date(
424
+ precision=info.datetime_precision,
425
+ rounds=self.ROUNDS_ON_PREC_LOSS,
426
+ )
427
+ return super().parse_type(table_path, info)
428
+
429
+ def parse_table_name(self, name: str) -> DbPath:
430
+ "Parse the given table name into a DbPath"
431
+ self.TABLE_NAMES.append(name.split(".")[-1])
432
+ return tuple(name.split("."))
433
+
434
+
435
+ @attrs.define(frozen=False, init=False, kw_only=True)
436
+ class Sybase(ThreadedDatabase):
437
+ DIALECT_CLASS: ClassVar[Type[BaseDialect]] = Dialect
438
+ CONNECT_URI_HELP = "sybase://<user>:<password>@<host>/<database>/<schema>"
439
+ CONNECT_URI_PARAMS = ["database", "schema"]
440
+
441
+ default_database: str
442
+ _args: Dict[str, Any]
443
+ _sybase: Any
444
+ _conn: Any
445
+
446
+ def __init__(self, host, port, user, password, *, database, thread_count, **kw) -> None:
447
+ super().__init__(thread_count=thread_count)
448
+ args = dict(
449
+ host=host,
450
+ port=port,
451
+ database=database,
452
+ user=user,
453
+ password=password,
454
+ **kw,
455
+ )
456
+ self._args = {k: v for k, v in args.items() if v}
457
+ if self._args.get("odbc_driver", None) is not None:
458
+ self._args["driver"] = self._args.pop("odbc_driver")
459
+ else:
460
+ self._args["driver"] = "FreeTDS"
461
+ try:
462
+ self.default_database = self._args["database"]
463
+ self.default_schema = self._args["schema"]
464
+ self.dialect.default_schema = self.default_schema
465
+ except KeyError:
466
+ raise ValueError("Specify a default database and schema.")
467
+ self._sybase = import_sybase()
468
+ self._detect_driver_type(self._args.get("driver", None))
469
+ self._conn = self.create_connection()
470
+
471
+ def create_connection(self):
472
+ server = self._args.get("server", None) or ""
473
+ host = self._args.get("host", None) or ""
474
+ port = self._args.get("port", 5000)
475
+ database = self._args.get("database", None)
476
+ username = self._args.get("user", None)
477
+ password = self._args.get("password", None)
478
+ driver = self._args.get("driver", None)
479
+ max_query_timeout = 60 * 5 # 5 minutes
480
+
481
+ if self.dialect.sybase_driver_type.is_freetds:
482
+ conn_dict = {
483
+ "driver": "FreeTDS",
484
+ "database": database,
485
+ "user": username,
486
+ "password": password,
487
+ "port": port,
488
+ "tds_version": "auto",
489
+ }
490
+
491
+ conn_dict["host"] = host or server
492
+ try:
493
+ logger.debug("Attempting FreeTDS connection..")
494
+ self._conn = self._sybase.connect(**conn_dict)
495
+ self._conn.timeout = max_query_timeout
496
+ logger.info("Successfully connected to Sybase using FreeTDS")
497
+ return self._conn
498
+ except Exception as e:
499
+ error_msg = f"Failed to connect to Sybase with FreeTDS: {str(e)}"
500
+ logger.error(error_msg)
501
+ raise ConnectError(error_msg) from e
502
+
503
+ base_params = {
504
+ "DRIVER": self._prepare_driver_string(driver),
505
+ "DATABASE": database,
506
+ "UID": username,
507
+ "PWD": password,
508
+ }
509
+ connection_attempts = []
510
+ if self.dialect.sybase_driver_type.is_ase:
511
+ connection_attempts = [
512
+ {
513
+ "key": "SERVER",
514
+ "value": host,
515
+ "port": port,
516
+ }, # ASE typically uses SERVER
517
+ {"key": "SERVERNAME", "value": host, "port": port},
518
+ {
519
+ "key": "HOST",
520
+ "value": f"{host}:{port}",
521
+ "port": None,
522
+ }, # Host:Port format
523
+ ]
524
+ else:
525
+ connection_attempts = [
526
+ {"key": "HOST", "value": f"{host}:{port}", "port": None},
527
+ {"key": "HOST", "value": host, "port": port},
528
+ {"key": "SERVER", "value": server, "port": port},
529
+ {"key": "SERVERNAME", "value": server, "port": port},
530
+ ]
531
+
532
+ errors = []
533
+
534
+ for attempt in connection_attempts:
535
+ if not attempt["value"]:
536
+ continue
537
+
538
+ conn_dict = base_params.copy()
539
+ conn_dict[attempt["key"]] = attempt["value"]
540
+
541
+ # Handle port configuration
542
+ if attempt["port"] is not None:
543
+ port_configs = [
544
+ {"PORT": attempt["port"]},
545
+ {"Server port": attempt["port"]},
546
+ {}, # Try without explicit port
547
+ ]
548
+ else:
549
+ port_configs = [{}] # Port is already in the host string
550
+
551
+ for port_config in port_configs:
552
+ current_config = conn_dict.copy()
553
+ current_config.update(port_config)
554
+
555
+ # Add ASE-specific parameters if driver is ASE
556
+ if self.dialect.sybase_driver_type.is_ase:
557
+ ase_configs = [
558
+ {}, # Basic config
559
+ {"NetworkAddress": f"{host},{port}"}, # Alternative format
560
+ {"ServerName": host}, # Another common ASE parameter
561
+ ]
562
+ else:
563
+ ase_configs = [{}]
564
+
565
+ for ase_config in ase_configs:
566
+ final_config = current_config.copy()
567
+ final_config.update(ase_config)
568
+
569
+ try:
570
+ logger.debug("Attempting connection..")
571
+ self._conn = self._sybase.connect(**final_config)
572
+ self._conn.timeout = max_query_timeout
573
+ logger.info(f"Successfully connected to Sybase using: driver={driver}")
574
+ return self._conn
575
+ except Exception as e:
576
+ error_msg = "Failed to connect to sybase"
577
+ logger.debug(error_msg)
578
+ errors.append(error_msg)
579
+ continue
580
+ raise ConnectError(f"Failed to connect to Sybase with all attempts. Errors: {errors}")
581
+
582
+ def _normalize_driver(self, driver: str) -> str:
583
+ """Normalize driver string by removing braces, spaces, and converting to lowercase."""
584
+ return driver.replace("{", "").replace("}", "").replace(" ", "").strip().lower()
585
+
586
+ def _detect_driver_type(self, driver: str) -> None:
587
+ """Detect and set the appropriate driver type."""
588
+ normalized_driver = self._normalize_driver(driver)
589
+ self.dialect.sybase_driver_type.is_ase = "adaptive" in normalized_driver
590
+ self.dialect.sybase_driver_type.is_iq = "iq" in normalized_driver
591
+ self.dialect.sybase_driver_type.is_freetds = "freetds" in normalized_driver
592
+
593
+ def _prepare_driver_string(self, driver: str) -> str:
594
+ """Ensure driver string is properly formatted with braces."""
595
+ return f"{{{driver}}}" if not driver.startswith("{") else driver
596
+
597
+ def select_table_schema(self, path: DbPath) -> str:
598
+ database, schema, name = self._normalize_table_path(path)
599
+ if self.dialect.sybase_driver_type.is_iq:
600
+ return (
601
+ f"SELECT c.column_name, d.domain_name AS data_type, "
602
+ f"CASE WHEN d.domain_name IN ('DATE', 'TIME', 'TIMESTAMP') THEN c.scale ELSE NULL END AS datetime_precision, "
603
+ f"CASE WHEN t.name IN ('float') THEN 15 WHEN t.name IN ('real') THEN 7 ELSE c.prec END AS numeric_precision, "
604
+ f"CASE WHEN t.name IN ('float', 'real') THEN NULL ELSE c.scale END AS numeric_scale, "
605
+ f"NULL AS collation_name, c.width AS character_maximum_length "
606
+ f"FROM {database}.SYS.SYSTABLE t "
607
+ f"JOIN {database}.SYS.SYSCOLUMN c ON t.table_id = c.table_id "
608
+ f"JOIN {database}.SYS.SYSDOMAIN d ON c.domain_id = d.domain_id "
609
+ f"JOIN {database}.SYS.SYSUSER u ON t.creator = u.user_id "
610
+ f"WHERE t.table_name = '{name}' "
611
+ f"AND u.user_name = '{schema}'"
612
+ )
613
+ elif self.dialect.sybase_driver_type.is_ase:
614
+ return (
615
+ f"SELECT c.name AS column_name, t.name AS data_type, "
616
+ f"CASE WHEN c.type IN (61, 111) THEN c.prec ELSE NULL END AS datetime_precision, "
617
+ f"CASE WHEN t.name IN ('float') THEN 15 WHEN t.name IN ('real') THEN 7 ELSE c.prec END AS numeric_precision, "
618
+ f"CASE WHEN t.name IN ('float', 'real') THEN NULL ELSE c.scale END AS numeric_scale, "
619
+ f"NULL AS collation_name, c.length AS character_maximum_length "
620
+ f"FROM {database}..sysobjects o "
621
+ f"JOIN {database}..syscolumns c ON o.id = c.id "
622
+ f"JOIN {database}..systypes t ON c.usertype = t.usertype "
623
+ f"JOIN {database}..sysusers u ON o.uid = u.uid "
624
+ f"WHERE o.name = '{name}' "
625
+ f"AND u.name = '{schema}'"
626
+ )
627
+ elif self.dialect.sybase_driver_type.is_freetds:
628
+ ase_query = (
629
+ f"SELECT c.name AS column_name, t.name AS data_type, "
630
+ f"CASE WHEN c.type IN (61, 111) THEN c.prec ELSE NULL END AS datetime_precision, "
631
+ f"CASE WHEN t.name IN ('float') THEN 15 WHEN t.name IN ('real') THEN 7 ELSE c.prec END AS numeric_precision, "
632
+ f"CASE WHEN t.name IN ('float', 'real') THEN NULL ELSE c.scale END AS numeric_scale, "
633
+ f"NULL AS collation_name, c.length AS character_maximum_length "
634
+ f"FROM {database}..sysobjects o "
635
+ f"JOIN {database}..syscolumns c ON o.id = c.id "
636
+ f"JOIN {database}..systypes t ON c.usertype = t.usertype "
637
+ f"JOIN {database}..sysusers u ON o.uid = u.uid "
638
+ f"WHERE o.name = '{name}' "
639
+ f"AND u.name = '{schema}'"
640
+ )
641
+ iq_query = (
642
+ f"SELECT c.name AS column_name, t.name AS data_type, "
643
+ f"CASE WHEN c.type IN (61, 111) THEN c.prec ELSE NULL END AS datetime_precision, "
644
+ f"CASE WHEN t.name IN ('float') THEN 15 WHEN t.name IN ('real') THEN 7 ELSE c.prec END AS numeric_precision, "
645
+ f"CASE WHEN t.name IN ('float', 'real') THEN NULL ELSE c.scale END AS numeric_scale, "
646
+ f"NULL AS collation_name, c.length AS character_maximum_length "
647
+ f"FROM {database}.dbo.sysobjects o "
648
+ f"JOIN {database}.dbo.syscolumns c ON o.id = c.id "
649
+ f"JOIN {database}.dbo.systypes t ON c.usertype = t.usertype "
650
+ f"JOIN {database}.dbo.sysusers u ON o.uid = u.uid "
651
+ f"WHERE o.name = '{name}' AND u.name = '{schema}'"
652
+ )
653
+ if self.dialect.query_config_for_free_tds["ase_query_chosen"]:
654
+ return ase_query
655
+ elif self.dialect.query_config_for_free_tds["freetds_query_chosen"]:
656
+ return iq_query
657
+ try:
658
+ if self._query_cursor(self._conn.cursor(), ase_query, test_query=True):
659
+ logger.info("Sybase ASE Detected")
660
+ self.dialect.query_config_for_free_tds["ase_query_chosen"] = True
661
+ return ase_query
662
+ else:
663
+ max_temp_space_usage_query = "SET TEMPORARY OPTION MAX_TEMP_SPACE_PER_CONNECTION = 5120"
664
+ if self._query_cursor(self._conn.cursor(), max_temp_space_usage_query, test_query=True):
665
+ logger.info("Max temporary space usage set successfully.")
666
+ else:
667
+ logger.warning("Failed to set max temporary space usage, continuing with default settings.")
668
+ logger.info("Sybase IQ Detected")
669
+
670
+ self.dialect.query_config_for_free_tds["freetds_query_chosen"] = True
671
+ return iq_query
672
+ except Exception as e:
673
+ logger.error(f"Failed to execute test query: {e}")
674
+ raise QueryError(f"Failed to execute test query: {e}")
675
+ else:
676
+ ValueError(
677
+ f"{self.name}: Unsupported driver type: {self._args['driver']}. Supported drivers: ASE, IQ, FreeTDS."
678
+ )
679
+
680
+ def _normalize_table_path(self, path: DbPath) -> DbPath:
681
+ if len(path) == 1:
682
+ return self.default_database, self.default_schema, path[0]
683
+ elif len(path) == 2:
684
+ return self.default_database, path[0], path[1]
685
+ elif len(path) == 3:
686
+ return path
687
+
688
+ raise ValueError(
689
+ f"{self.name}: Bad table path for {self}: '{'.'.join(path)}'. Expected format: table, schema.table, or database.schema.table"
690
+ )
691
+
692
+ def _query_cursor(self, c, sql_code, test_query: bool = False):
693
+ if test_query:
694
+ try:
695
+ c.execute(sql_code)
696
+ return True
697
+ except Exception as e:
698
+ logger.warning(f"Test query failed: {sql_code}, error: {e}")
699
+ return False
700
+ try:
701
+ c.execute(sql_code)
702
+ if sql_code.lower().startswith(("select", "explain", "show")):
703
+ columns = c.description and [col[0] for col in c.description]
704
+ return QueryResult(c.fetchall(), columns)
705
+ elif sql_code.lower().startswith(("create", "drop")):
706
+ try:
707
+ c.connection.commit()
708
+ except AttributeError:
709
+ ...
710
+ except Exception as _e:
711
+ try:
712
+ c.connection.rollback()
713
+ except Exception as rollback_error:
714
+ logger.error(f"Rollback failed: {rollback_error}")
715
+ raise
716
+
717
+ def close(self):
718
+ super().close()
719
+ if self._conn is not None:
720
+ self._conn.close()