dcs-sdk 1.7.0__py3-none-any.whl → 1.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dcs_core/core/datasource/file_datasource.py +98 -4
- dcs_core/integrations/databases/azure_blob.py +12 -123
- dcs_core/integrations/databases/duck_db.py +71 -2
- dcs_core/integrations/databases/mssql.py +8 -5
- dcs_core/integrations/databases/oracle.py +2 -4
- dcs_core/integrations/databases/postgres.py +0 -2
- dcs_sdk/__version__.py +1 -1
- {dcs_sdk-1.7.0.dist-info → dcs_sdk-1.7.2.dist-info}/METADATA +2 -2
- {dcs_sdk-1.7.0.dist-info → dcs_sdk-1.7.2.dist-info}/RECORD +11 -11
- {dcs_sdk-1.7.0.dist-info → dcs_sdk-1.7.2.dist-info}/WHEEL +1 -1
- {dcs_sdk-1.7.0.dist-info → dcs_sdk-1.7.2.dist-info}/entry_points.txt +0 -0
|
@@ -12,19 +12,113 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
import os
|
|
16
|
+
import uuid
|
|
17
|
+
from abc import ABC, abstractmethod
|
|
18
|
+
from contextlib import contextmanager
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Dict, Iterator
|
|
16
21
|
|
|
22
|
+
import duckdb
|
|
23
|
+
from loguru import logger
|
|
24
|
+
|
|
25
|
+
from dcs_core.core.common.models.data_source_resource import RawColumnInfo
|
|
17
26
|
from dcs_core.core.datasource.base import DataSource
|
|
27
|
+
from dcs_core.integrations.databases.duck_db import DuckDb
|
|
18
28
|
|
|
19
29
|
|
|
20
|
-
class FileDataSource(DataSource):
|
|
30
|
+
class FileDataSource(DataSource, ABC):
|
|
21
31
|
"""
|
|
22
32
|
Abstract class for File data sources
|
|
23
33
|
"""
|
|
24
34
|
|
|
25
35
|
def __init__(self, data_source_name: str, data_connection: Dict):
|
|
26
36
|
super().__init__(data_source_name, data_connection)
|
|
37
|
+
self.temp_dir_name = "tmp"
|
|
38
|
+
|
|
39
|
+
@contextmanager
|
|
40
|
+
def as_duckdb(self, table_name: str) -> Iterator["DuckDb"]:
|
|
41
|
+
"""Returns a DuckDB instance for the given table name"""
|
|
42
|
+
duckdb_path = self.load_file_to_duckdb(table_name)
|
|
43
|
+
duck_db_ds = DuckDb(data_source_name=self.data_source_name, data_connection={"file_path": duckdb_path})
|
|
44
|
+
try:
|
|
45
|
+
duck_db_ds.connect()
|
|
46
|
+
yield duck_db_ds
|
|
47
|
+
finally:
|
|
48
|
+
duck_db_ds.close()
|
|
49
|
+
|
|
50
|
+
@abstractmethod
|
|
51
|
+
def query_get_table_names(self) -> dict:
|
|
52
|
+
"""
|
|
53
|
+
Query to get table names
|
|
54
|
+
"""
|
|
55
|
+
pass
|
|
27
56
|
|
|
28
|
-
|
|
29
|
-
|
|
57
|
+
@abstractmethod
|
|
58
|
+
def query_get_database_version(self) -> str:
|
|
59
|
+
"""
|
|
60
|
+
Get the database version
|
|
61
|
+
:return: version string
|
|
62
|
+
"""
|
|
30
63
|
pass
|
|
64
|
+
|
|
65
|
+
@abstractmethod
|
|
66
|
+
def _download_to_path(self, table_name: str, path: str) -> None:
|
|
67
|
+
"""Vendor-specific download"""
|
|
68
|
+
pass
|
|
69
|
+
|
|
70
|
+
def load_file_to_duckdb(self, table_name: str) -> str:
|
|
71
|
+
"""Template method"""
|
|
72
|
+
os.makedirs(self.temp_dir_name, exist_ok=True)
|
|
73
|
+
|
|
74
|
+
ext = Path(table_name).suffix
|
|
75
|
+
if not ext:
|
|
76
|
+
raise ValueError(f"Invalid file name {table_name}")
|
|
77
|
+
|
|
78
|
+
temp_path = f"{self.temp_dir_name}/{uuid.uuid4()}{ext}"
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
self._download_to_path(table_name, temp_path)
|
|
82
|
+
return self._load_path_to_duckdb(temp_path, table_name)
|
|
83
|
+
finally:
|
|
84
|
+
if os.path.exists(temp_path):
|
|
85
|
+
os.remove(temp_path)
|
|
86
|
+
logger.info(f"Cleaned up temp file {temp_path}")
|
|
87
|
+
|
|
88
|
+
def _load_path_to_duckdb(self, path: str, table_name: str) -> str:
|
|
89
|
+
"""Shared DuckDB loading logic"""
|
|
90
|
+
tmp_dir = self.temp_dir_name
|
|
91
|
+
duckdb_path = f"{tmp_dir}/{uuid.uuid4()}.duckdb"
|
|
92
|
+
table_stem = Path(table_name).stem
|
|
93
|
+
|
|
94
|
+
logger.info(f"Loading {path} into DuckDB")
|
|
95
|
+
|
|
96
|
+
conn = None
|
|
97
|
+
try:
|
|
98
|
+
conn = duckdb.connect(database=duckdb_path, read_only=False)
|
|
99
|
+
conn.execute(
|
|
100
|
+
f'CREATE TABLE "{table_stem}" AS SELECT * FROM read_csv_auto(?)',
|
|
101
|
+
[path],
|
|
102
|
+
)
|
|
103
|
+
logger.info(f"Successfully loaded data into {duckdb_path}")
|
|
104
|
+
return duckdb_path
|
|
105
|
+
except Exception as e:
|
|
106
|
+
logger.warning(f"read_csv_auto failed: {e}. Trying with ALL_VARCHAR=TRUE")
|
|
107
|
+
try:
|
|
108
|
+
if conn:
|
|
109
|
+
conn.close()
|
|
110
|
+
conn = duckdb.connect(database=duckdb_path, read_only=False)
|
|
111
|
+
conn.execute(
|
|
112
|
+
f'CREATE TABLE "{table_stem}" AS ' f"SELECT * FROM read_csv(?, ALL_VARCHAR=TRUE, SAMPLE_SIZE=-1)",
|
|
113
|
+
[path],
|
|
114
|
+
)
|
|
115
|
+
logger.info(f"Successfully loaded data with ALL_VARCHAR into {duckdb_path}")
|
|
116
|
+
return duckdb_path
|
|
117
|
+
except Exception as fallback_error:
|
|
118
|
+
logger.error(f"Failed to load CSV into DuckDB: {fallback_error}")
|
|
119
|
+
if os.path.exists(duckdb_path):
|
|
120
|
+
os.remove(duckdb_path)
|
|
121
|
+
raise
|
|
122
|
+
finally:
|
|
123
|
+
if conn:
|
|
124
|
+
conn.close()
|
|
@@ -12,21 +12,12 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import
|
|
16
|
-
import os
|
|
17
|
-
import uuid
|
|
18
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
19
|
-
from pathlib import Path
|
|
20
|
-
from queue import Empty, Queue
|
|
21
|
-
from typing import Any, Dict, List, Optional
|
|
15
|
+
from typing import Any, Dict, Optional
|
|
22
16
|
|
|
23
|
-
import duckdb
|
|
24
|
-
import pandas as pd
|
|
25
17
|
from azure.storage.blob import BlobServiceClient
|
|
26
18
|
from loguru import logger
|
|
27
19
|
|
|
28
20
|
from dcs_core.core.common.errors import (
|
|
29
|
-
DatachecksColumnFetchError,
|
|
30
21
|
DataChecksDataSourcesConnectionError,
|
|
31
22
|
DatachecksTableFetchError,
|
|
32
23
|
)
|
|
@@ -38,6 +29,7 @@ class AzureBlobDataSource(FileDataSource):
|
|
|
38
29
|
super().__init__(data_source_name, data_connection)
|
|
39
30
|
self.allowed_file_extensions = [".csv"]
|
|
40
31
|
self.blob_service_client: Optional[BlobServiceClient] = None
|
|
32
|
+
self.DEFAULT_NUMERIC_PRECISION = 16383
|
|
41
33
|
self.connection = None
|
|
42
34
|
|
|
43
35
|
def connect(self) -> Any:
|
|
@@ -90,28 +82,8 @@ class AzureBlobDataSource(FileDataSource):
|
|
|
90
82
|
except Exception as e:
|
|
91
83
|
raise DatachecksTableFetchError(f"Failed to list blobs: {e}")
|
|
92
84
|
|
|
93
|
-
def
|
|
94
|
-
|
|
95
|
-
Get column names for a table (CSV blob in this case).
|
|
96
|
-
"""
|
|
97
|
-
if not self.is_connected():
|
|
98
|
-
raise DataChecksDataSourcesConnectionError("Not connected to Azure Blob Storage")
|
|
99
|
-
|
|
100
|
-
if not any(table.endswith(ext) for ext in self.allowed_file_extensions):
|
|
101
|
-
raise ValueError(f"Unsupported file type for {table}. Allowed: {self.allowed_file_extensions}")
|
|
102
|
-
|
|
103
|
-
try:
|
|
104
|
-
blob_client = self.connection.get_blob_client(blob=table)
|
|
105
|
-
download_stream = blob_client.download_blob()
|
|
106
|
-
data = download_stream.readall()
|
|
107
|
-
if table.endswith(".csv"):
|
|
108
|
-
df = pd.read_csv(io.BytesIO(data))
|
|
109
|
-
else:
|
|
110
|
-
raise ValueError(f"Unsupported file type for {table}. Allowed: {self.allowed_file_extensions}")
|
|
111
|
-
|
|
112
|
-
return [{"column_name": col, "column_type": "string"} for col in df.columns.tolist()]
|
|
113
|
-
except Exception as e:
|
|
114
|
-
raise DatachecksColumnFetchError(f"Failed to read columns from blob '{table}': {e}")
|
|
85
|
+
def safe_get(self, lst, idx, default=None):
|
|
86
|
+
return lst[idx] if 0 <= idx < len(lst) else default
|
|
115
87
|
|
|
116
88
|
def query_get_database_version(self) -> str:
|
|
117
89
|
"""
|
|
@@ -121,97 +93,14 @@ class AzureBlobDataSource(FileDataSource):
|
|
|
121
93
|
api_version = self.blob_service_client.api_version
|
|
122
94
|
return api_version
|
|
123
95
|
|
|
124
|
-
def
|
|
125
|
-
"""
|
|
126
|
-
df = pd.DataFrame()
|
|
127
|
-
try:
|
|
128
|
-
while True:
|
|
129
|
-
try:
|
|
130
|
-
data = queue.get(timeout=timeout)
|
|
131
|
-
except Empty:
|
|
132
|
-
continue
|
|
133
|
-
|
|
134
|
-
if data is None:
|
|
135
|
-
break
|
|
136
|
-
|
|
137
|
-
try:
|
|
138
|
-
chunk = pd.read_csv(io.BytesIO(data), dtype=str)
|
|
139
|
-
df = pd.concat([df, chunk], ignore_index=True)
|
|
140
|
-
except Exception as e:
|
|
141
|
-
logger.error(f"[ERROR] Failed to read CSV chunk: {e}")
|
|
142
|
-
continue
|
|
143
|
-
|
|
144
|
-
except Exception as e:
|
|
145
|
-
logger.error(f"[FATAL] Consumer crashed: {e}")
|
|
146
|
-
|
|
147
|
-
finally:
|
|
148
|
-
result_df.append(df)
|
|
149
|
-
|
|
150
|
-
def _load_blob_to_pandas(self, table_name: str):
|
|
96
|
+
def _download_to_path(self, table_name: str, path: str):
|
|
97
|
+
"""Download blob to path"""
|
|
151
98
|
blob_client = self.connection.get_blob_client(blob=table_name)
|
|
152
|
-
|
|
153
|
-
blob_size = blob_client.get_blob_properties().size
|
|
154
|
-
start = 0
|
|
155
|
-
queue = Queue()
|
|
156
|
-
result_df = []
|
|
157
|
-
|
|
158
|
-
with ThreadPoolExecutor(max_workers=1) as executor:
|
|
159
|
-
executor.submit(self._chunk_load_to_pandas, queue, result_df)
|
|
160
|
-
|
|
161
|
-
all_data = b""
|
|
162
|
-
while start < blob_size:
|
|
163
|
-
end = min(start + CHUNK_SIZE - 1, blob_size - 1)
|
|
164
|
-
data = blob_client.download_blob(offset=start, length=end - start + 1).readall()
|
|
165
|
-
all_data += data
|
|
166
|
-
queue.put(data)
|
|
167
|
-
start += CHUNK_SIZE
|
|
168
|
-
|
|
169
|
-
queue.put(None)
|
|
170
|
-
if not result_df or len(result_df) == 0:
|
|
171
|
-
raise ValueError("No data downloaded from Azure Blob Storage")
|
|
172
|
-
return result_df[0]
|
|
173
|
-
|
|
174
|
-
def _load_pd_to_duckdb(self, df: pd.DataFrame, table_name: str):
|
|
175
|
-
dir_name = "tmp"
|
|
176
|
-
if not os.path.exists(dir_name):
|
|
177
|
-
os.makedirs(dir_name)
|
|
178
|
-
|
|
179
|
-
duck_db_file_name = f"{dir_name}/{uuid.uuid4()}.duckdb"
|
|
180
|
-
file_path = None
|
|
99
|
+
logger.info(f"Downloading {table_name} to {path}")
|
|
181
100
|
try:
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
file_path = duck_db_file_name
|
|
187
|
-
|
|
188
|
-
conn.register("df_view", df)
|
|
189
|
-
|
|
190
|
-
conn.execute(
|
|
191
|
-
f"""
|
|
192
|
-
CREATE OR REPLACE TABLE "{table_name}" AS
|
|
193
|
-
SELECT * FROM df_view;
|
|
194
|
-
"""
|
|
195
|
-
)
|
|
196
|
-
conn.unregister("df_view")
|
|
197
|
-
conn.close()
|
|
198
|
-
|
|
101
|
+
with open(path, "wb") as f:
|
|
102
|
+
stream = blob_client.download_blob()
|
|
103
|
+
for chunk in stream.chunks():
|
|
104
|
+
f.write(chunk)
|
|
199
105
|
except Exception as e:
|
|
200
|
-
|
|
201
|
-
raise
|
|
202
|
-
|
|
203
|
-
return file_path
|
|
204
|
-
|
|
205
|
-
def load_file_to_duckdb(self, table_name: str):
|
|
206
|
-
logger.info(f"Loading {table_name} to pandas")
|
|
207
|
-
df: pd.DataFrame = self._load_blob_to_pandas(table_name)
|
|
208
|
-
|
|
209
|
-
if df is None or df.empty:
|
|
210
|
-
raise ValueError("No data downloaded from Azure Blob Storage")
|
|
211
|
-
|
|
212
|
-
name_only = Path(table_name).stem
|
|
213
|
-
|
|
214
|
-
logger.info(f"Loading {table_name} to duckdb")
|
|
215
|
-
file_path = self._load_pd_to_duckdb(df, name_only)
|
|
216
|
-
|
|
217
|
-
return file_path
|
|
106
|
+
raise DataChecksDataSourcesConnectionError(f"Failed to download blob '{table_name}': {e}")
|
|
@@ -12,12 +12,14 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import os
|
|
15
|
+
from pathlib import Path
|
|
15
16
|
from typing import Any, Dict
|
|
16
17
|
|
|
17
18
|
import duckdb
|
|
18
19
|
from loguru import logger
|
|
19
20
|
|
|
20
21
|
from dcs_core.core.common.errors import DataChecksDataSourcesConnectionError
|
|
22
|
+
from dcs_core.core.common.models.data_source_resource import RawColumnInfo
|
|
21
23
|
from dcs_core.core.datasource.sql_datasource import SQLDataSource
|
|
22
24
|
|
|
23
25
|
|
|
@@ -26,6 +28,20 @@ class DuckDb(SQLDataSource):
|
|
|
26
28
|
super().__init__(data_source_name, data_connection)
|
|
27
29
|
self.connection = None
|
|
28
30
|
self.use_sa_text_query = False
|
|
31
|
+
self.regex_patterns = {
|
|
32
|
+
"uuid": r"^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$",
|
|
33
|
+
"usa_phone": r"^(\+1[-.\s]?)?(\(?\d{3}\)?[-.\s]?)?\d{3}[-.\s]?\d{4}$",
|
|
34
|
+
"email": r"^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$",
|
|
35
|
+
"usa_zip_code": r"^[0-9]{5}(?:-[0-9]{4})?$",
|
|
36
|
+
"ssn": r"^[0-9]{3}-[0-9]{2}-[0-9]{4}$",
|
|
37
|
+
"sedol": r"^[B-DF-HJ-NP-TV-XZ0-9]{6}[0-9]$",
|
|
38
|
+
"lei": r"^[A-Z0-9]{18}[0-9]{2}$",
|
|
39
|
+
"cusip": r"^[0-9A-Z]{9}$",
|
|
40
|
+
"figi": r"^BBG[A-Z0-9]{9}$",
|
|
41
|
+
"isin": r"^[A-Z]{2}[A-Z0-9]{9}[0-9]$",
|
|
42
|
+
"perm_id": r"^\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{3}$",
|
|
43
|
+
}
|
|
44
|
+
self.DEFAULT_NUMERIC_PRECISION = 16383
|
|
29
45
|
|
|
30
46
|
def connect(self) -> Any:
|
|
31
47
|
"""
|
|
@@ -49,9 +65,12 @@ class DuckDb(SQLDataSource):
|
|
|
49
65
|
Close the connection
|
|
50
66
|
"""
|
|
51
67
|
logger.info("Closing DuckDB connection")
|
|
52
|
-
self.connection
|
|
68
|
+
if self.connection:
|
|
69
|
+
self.connection.close()
|
|
53
70
|
try:
|
|
54
|
-
|
|
71
|
+
fp = self.data_connection.get("file_path")
|
|
72
|
+
if fp and os.path.exists(fp):
|
|
73
|
+
os.remove(fp)
|
|
55
74
|
except Exception as e:
|
|
56
75
|
logger.error(f"Failed to remove the file {self.data_connection.get('file_path')}: {e}")
|
|
57
76
|
|
|
@@ -70,3 +89,53 @@ class DuckDb(SQLDataSource):
|
|
|
70
89
|
:return: quoted column name
|
|
71
90
|
"""
|
|
72
91
|
return f'"{column}"'
|
|
92
|
+
|
|
93
|
+
def query_get_table_columns(
|
|
94
|
+
self,
|
|
95
|
+
table: str,
|
|
96
|
+
schema: str | None = None,
|
|
97
|
+
) -> Dict[str, RawColumnInfo]:
|
|
98
|
+
"""
|
|
99
|
+
Get the schema of a table.
|
|
100
|
+
:param table: table name
|
|
101
|
+
:return: Dictionary with column names and their types
|
|
102
|
+
"""
|
|
103
|
+
schema = schema or self.schema_name
|
|
104
|
+
info_schema_path = ["information_schema", "columns"]
|
|
105
|
+
if self.database:
|
|
106
|
+
database = self.quote_database(self.database)
|
|
107
|
+
info_schema_path.insert(0, database)
|
|
108
|
+
|
|
109
|
+
query = f"""
|
|
110
|
+
SELECT
|
|
111
|
+
column_name,
|
|
112
|
+
data_type,
|
|
113
|
+
CASE WHEN data_type IN ('TIMESTAMP', 'TIME') THEN datetime_precision ELSE NULL END AS datetime_precision,
|
|
114
|
+
CASE WHEN data_type = 'DECIMAL' THEN COALESCE(numeric_precision, 131072 + {self.DEFAULT_NUMERIC_PRECISION})
|
|
115
|
+
WHEN data_type IN ('DOUBLE', 'REAL', 'FLOAT') THEN numeric_precision
|
|
116
|
+
ELSE numeric_precision END AS numeric_precision,
|
|
117
|
+
CASE WHEN data_type = 'DECIMAL' THEN COALESCE(numeric_scale, {self.DEFAULT_NUMERIC_PRECISION}) ELSE numeric_scale END AS numeric_scale,
|
|
118
|
+
NULL AS collation_name,
|
|
119
|
+
CASE WHEN data_type = 'VARCHAR' THEN character_maximum_length ELSE NULL END AS character_maximum_length
|
|
120
|
+
FROM information_schema.columns
|
|
121
|
+
WHERE table_name = '{table}'
|
|
122
|
+
ORDER BY ordinal_position
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
rows = self.fetchall(query)
|
|
126
|
+
if not rows:
|
|
127
|
+
raise RuntimeError(f"{table}: Table, {schema}: Schema, does not exist, or has no columns")
|
|
128
|
+
|
|
129
|
+
column_info = {
|
|
130
|
+
r[0]: RawColumnInfo(
|
|
131
|
+
column_name=self.safe_get(r, 0),
|
|
132
|
+
data_type=self.safe_get(r, 1),
|
|
133
|
+
datetime_precision=self.safe_get(r, 2),
|
|
134
|
+
numeric_precision=self.safe_get(r, 3),
|
|
135
|
+
numeric_scale=self.safe_get(r, 4),
|
|
136
|
+
collation_name=self.safe_get(r, 5),
|
|
137
|
+
character_maximum_length=self.safe_get(r, 6),
|
|
138
|
+
)
|
|
139
|
+
for r in rows
|
|
140
|
+
}
|
|
141
|
+
return column_info
|
|
@@ -167,8 +167,6 @@ class MssqlDataSource(SQLDataSource):
|
|
|
167
167
|
:return: Dictionary with index details
|
|
168
168
|
"""
|
|
169
169
|
schema = schema or self.schema_name
|
|
170
|
-
table = table.upper()
|
|
171
|
-
schema = schema.upper()
|
|
172
170
|
|
|
173
171
|
query = f"""
|
|
174
172
|
SELECT
|
|
@@ -673,8 +671,6 @@ class MssqlDataSource(SQLDataSource):
|
|
|
673
671
|
|
|
674
672
|
quoted_name = self.quote_column(name)
|
|
675
673
|
|
|
676
|
-
query_parts.append(f"COUNT(DISTINCT {quoted_name}) AS [{name}_distinct]")
|
|
677
|
-
query_parts.append(f"COUNT({quoted_name}) - COUNT(DISTINCT {quoted_name}) AS [{name}_duplicate]")
|
|
678
674
|
query_parts.append(f"SUM(CASE WHEN {quoted_name} IS NULL THEN 1 ELSE 0 END) AS [{name}_is_null]")
|
|
679
675
|
|
|
680
676
|
if dtype in (
|
|
@@ -690,13 +686,20 @@ class MssqlDataSource(SQLDataSource):
|
|
|
690
686
|
"money",
|
|
691
687
|
"smallmoney",
|
|
692
688
|
):
|
|
689
|
+
query_parts.append(f"COUNT({quoted_name}) - COUNT(DISTINCT {quoted_name}) AS [{name}_duplicate]")
|
|
693
690
|
query_parts.append(f"MIN({quoted_name}) AS [{name}_min]")
|
|
694
691
|
query_parts.append(f"MAX({quoted_name}) AS [{name}_max]")
|
|
695
692
|
query_parts.append(f"AVG(CAST({quoted_name} AS FLOAT)) AS [{name}_average]")
|
|
693
|
+
query_parts.append(f"COUNT(DISTINCT {quoted_name}) AS [{name}_distinct]")
|
|
696
694
|
|
|
697
|
-
elif dtype in ("varchar", "nvarchar", "char", "nchar"
|
|
695
|
+
elif dtype in ("varchar", "nvarchar", "char", "nchar"):
|
|
698
696
|
query_parts.append(f"MAX(LEN({quoted_name})) AS [{name}_max_character_length]")
|
|
699
697
|
|
|
698
|
+
elif dtype in ("text", "ntext", "xml"):
|
|
699
|
+
query_parts.append(
|
|
700
|
+
f"MAX(LEN(CAST({quoted_name} AS NVARCHAR(MAX)))) " f"AS [{name}_max_character_length]"
|
|
701
|
+
)
|
|
702
|
+
|
|
700
703
|
if additional_queries:
|
|
701
704
|
query_parts.extend(additional_queries)
|
|
702
705
|
|
|
@@ -143,8 +143,6 @@ class OracleDataSource(SQLDataSource):
|
|
|
143
143
|
:return: Dictionary with index details
|
|
144
144
|
"""
|
|
145
145
|
schema = schema or self.schema_name
|
|
146
|
-
table = table.upper()
|
|
147
|
-
schema = schema.upper()
|
|
148
146
|
|
|
149
147
|
query = f"""
|
|
150
148
|
SELECT
|
|
@@ -696,8 +694,8 @@ class OracleDataSource(SQLDataSource):
|
|
|
696
694
|
AND r_ac.OWNER = r_acc.OWNER
|
|
697
695
|
AND acc.POSITION = r_acc.POSITION
|
|
698
696
|
WHERE ac.CONSTRAINT_TYPE = 'R'
|
|
699
|
-
AND ac.TABLE_NAME = '{table_name
|
|
700
|
-
AND ac.OWNER = '{schema
|
|
697
|
+
AND ac.TABLE_NAME = '{table_name}'
|
|
698
|
+
AND ac.OWNER = '{schema}';
|
|
701
699
|
"""
|
|
702
700
|
|
|
703
701
|
try:
|
dcs_sdk/__version__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dcs-sdk
|
|
3
|
-
Version: 1.7.
|
|
3
|
+
Version: 1.7.2
|
|
4
4
|
Summary: SDK for DataChecks
|
|
5
5
|
Author: Waterdip Labs
|
|
6
6
|
Author-email: hello@waterdip.ai
|
|
@@ -86,7 +86,7 @@ Requires-Dist: vertica-python (>=1.4.0) ; extra == "vertica" or extra == "all-db
|
|
|
86
86
|
Description-Content-Type: text/markdown
|
|
87
87
|
|
|
88
88
|
<h1 align="center">
|
|
89
|
-
DCS SDK v1.7.
|
|
89
|
+
DCS SDK v1.7.2
|
|
90
90
|
</h1>
|
|
91
91
|
|
|
92
92
|
> SDK for DataChecks
|
|
@@ -64,7 +64,7 @@ dcs_core/core/configuration/configuration_parser.py,sha256=ue7tzWkOpamhXw_DJhr5Z
|
|
|
64
64
|
dcs_core/core/configuration/configuration_parser_arc.py,sha256=TOoPf12pEXLdkjEGJEGV6rJOMR8yqLedla6T1x6g-Xw,14057
|
|
65
65
|
dcs_core/core/datasource/__init__.py,sha256=RkfhRKLXEForLCs4rZkTf0qc_b0TokSggSAcKI4yfZg,610
|
|
66
66
|
dcs_core/core/datasource/base.py,sha256=YD_UuGuoORFJNX30IQMk6aitiiTCHaiAddSNgUBmRtA,1935
|
|
67
|
-
dcs_core/core/datasource/file_datasource.py,sha256=
|
|
67
|
+
dcs_core/core/datasource/file_datasource.py,sha256=_uwxnunv8bF9IzKF3oC-lHeaG1mmQBsbQbgRjPAn208,4349
|
|
68
68
|
dcs_core/core/datasource/manager.py,sha256=cuh6XAOCxn2b9SQxYwYurgBb6WUD8ZS6KRIg3FAloYU,4824
|
|
69
69
|
dcs_core/core/datasource/search_datasource.py,sha256=_conk1Q_kywJhKHYyEScoKlVt_yRd05zuAISvDmXqjw,15014
|
|
70
70
|
dcs_core/core/datasource/sql_datasource.py,sha256=dlX-E--hadl2q8XpMNRyZmLGC35tltBsGDzlyZqzqtw,40730
|
|
@@ -100,17 +100,17 @@ dcs_core/core/validation/uniqueness_validation.py,sha256=a6zm0_omiULKbQcDit8J913
|
|
|
100
100
|
dcs_core/core/validation/validity_validation.py,sha256=358oAGH112oVxyPhDnfT-ypVaMAkpZ8pM73qogtdh9w,35297
|
|
101
101
|
dcs_core/integrations/__init__.py,sha256=RkfhRKLXEForLCs4rZkTf0qc_b0TokSggSAcKI4yfZg,610
|
|
102
102
|
dcs_core/integrations/databases/__init__.py,sha256=RkfhRKLXEForLCs4rZkTf0qc_b0TokSggSAcKI4yfZg,610
|
|
103
|
-
dcs_core/integrations/databases/azure_blob.py,sha256=
|
|
103
|
+
dcs_core/integrations/databases/azure_blob.py,sha256=XF-B790XA1sGyhgIWUPE-BRHR_-ctA31IjI66pNL6eM,4231
|
|
104
104
|
dcs_core/integrations/databases/bigquery.py,sha256=26RuypLMmiARZIWkV_mxtnNL2yCs94YWerSGH5Nr10Q,7337
|
|
105
105
|
dcs_core/integrations/databases/databricks.py,sha256=n4fm5m_mtRCdtjLGDvbNW18u7Ev234vDBjq_lxuOxns,1978
|
|
106
106
|
dcs_core/integrations/databases/db2.py,sha256=hNGivvYCitp88ouZlCxp7iRQ-vnPiK1kL8x85NyGotk,26492
|
|
107
|
-
dcs_core/integrations/databases/duck_db.py,sha256=
|
|
107
|
+
dcs_core/integrations/databases/duck_db.py,sha256=X4FRSsobOFCIi329cYofQsMd_fkRI4KxC8BIrtiDz4g,5531
|
|
108
108
|
dcs_core/integrations/databases/elasticsearch.py,sha256=6CTGs1WGrfgdDRNVt9DpOB0_z_znT6YoVj10E1WY-wQ,2152
|
|
109
|
-
dcs_core/integrations/databases/mssql.py,sha256=
|
|
109
|
+
dcs_core/integrations/databases/mssql.py,sha256=9yBWwXLZeRUZtReVGE42ku4bTOGvTmy2jRYl1t9afak,39053
|
|
110
110
|
dcs_core/integrations/databases/mysql.py,sha256=mUFLIGdbF_ktIlA19P7kq7holp5ZkRezGgN6TL_uiJ4,15815
|
|
111
111
|
dcs_core/integrations/databases/opensearch.py,sha256=XeDaHRLLym3wFeA_N6RzQEHmQCI3DjD8A86Y9UKwFEM,2190
|
|
112
|
-
dcs_core/integrations/databases/oracle.py,sha256=
|
|
113
|
-
dcs_core/integrations/databases/postgres.py,sha256=
|
|
112
|
+
dcs_core/integrations/databases/oracle.py,sha256=IIUeoBlwFtu6XYJh9Pr5bqZz_YQJJN6SE74HRC8zGxs,29116
|
|
113
|
+
dcs_core/integrations/databases/postgres.py,sha256=j3za4dcOkoG0HhwNo4C2Ack2DfpphUfFKponPR69zqY,21264
|
|
114
114
|
dcs_core/integrations/databases/redshift.py,sha256=R9eYxpD1Ve3ChZb-gyClJ6suSljG53O6Wez2GzUW0k0,2043
|
|
115
115
|
dcs_core/integrations/databases/snowflake.py,sha256=NI6sgL9iakyCbIxtj0DiqeOpF5F9ybuhtG_IwvT86Ws,1942
|
|
116
116
|
dcs_core/integrations/databases/spark_df.py,sha256=pO9hSENLdrRaPvPa66yCrKS2iv5JWJBsU9XB13BBasY,3659
|
|
@@ -134,7 +134,7 @@ dcs_core/report/static/index.js,sha256=p4wvku-zlXi0y4gWeSzV1amY0s4mjtUq2QsezARLV
|
|
|
134
134
|
dcs_core/report/static/index.js.LICENSE.txt,sha256=bBDZBJVEDrqjCi7sfoF8CchjFn3hdcbNkP7ub7kbcXQ,201041
|
|
135
135
|
dcs_sdk/__init__.py,sha256=RkfhRKLXEForLCs4rZkTf0qc_b0TokSggSAcKI4yfZg,610
|
|
136
136
|
dcs_sdk/__main__.py,sha256=Qn8stIaQGrdLjHQ-H7xO0T-brtq5RWZoWU9QvqoarV8,683
|
|
137
|
-
dcs_sdk/__version__.py,sha256=
|
|
137
|
+
dcs_sdk/__version__.py,sha256=lY21yR6SjBLmx4p_ke5vm0njUHCIUOS4nF7jJjG9wD0,633
|
|
138
138
|
dcs_sdk/cli/__init__.py,sha256=RkfhRKLXEForLCs4rZkTf0qc_b0TokSggSAcKI4yfZg,610
|
|
139
139
|
dcs_sdk/cli/cli.py,sha256=jaO52UrMWLafcF_yhqllPkmYSTuO2sksFi30fYFdAB4,4406
|
|
140
140
|
dcs_sdk/sdk/__init__.py,sha256=skrZcgWWJBL6NXTUERywJ3qRJRemgpDXyW7lPg1FJk8,2107
|
|
@@ -156,7 +156,7 @@ dcs_sdk/sdk/utils/similarity_score/levenshtein_distance_provider.py,sha256=puAWP
|
|
|
156
156
|
dcs_sdk/sdk/utils/table.py,sha256=X8HxdYTWyx_oVrBWPsXlmA-xJKXXDBW9RrhlWNqA1As,18224
|
|
157
157
|
dcs_sdk/sdk/utils/themes.py,sha256=Meo2Yldv4uyPpEqI7qdA28Aa6sxtwUU1dLKKm4QavjM,1403
|
|
158
158
|
dcs_sdk/sdk/utils/utils.py,sha256=a9QGEVL8L7asbJm_VBwgKvJQknsvuqWS0uTUaHsDPiY,16463
|
|
159
|
-
dcs_sdk-1.7.
|
|
160
|
-
dcs_sdk-1.7.
|
|
161
|
-
dcs_sdk-1.7.
|
|
162
|
-
dcs_sdk-1.7.
|
|
159
|
+
dcs_sdk-1.7.2.dist-info/METADATA,sha256=wLWP2WELM_fo2dGYJK9Uwea76M1rg1S2sexM7sVQ8cE,7652
|
|
160
|
+
dcs_sdk-1.7.2.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
|
|
161
|
+
dcs_sdk-1.7.2.dist-info/entry_points.txt,sha256=XhODNz7UccgPOyklXgp7pIfTTXArd6-V0mImjhnhwto,80
|
|
162
|
+
dcs_sdk-1.7.2.dist-info/RECORD,,
|
|
File without changes
|