dcs-sdk 1.6.7__py3-none-any.whl → 1.6.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dcs_sdk/__version__.py +1 -1
- dcs_sdk/sdk/data_diff/data_differ.py +8 -2
- dcs_sdk/sdk/utils/utils.py +20 -19
- {dcs_sdk-1.6.7.dist-info → dcs_sdk-1.6.9.dist-info}/METADATA +12 -20
- {dcs_sdk-1.6.7.dist-info → dcs_sdk-1.6.9.dist-info}/RECORD +7 -7
- {dcs_sdk-1.6.7.dist-info → dcs_sdk-1.6.9.dist-info}/WHEEL +0 -0
- {dcs_sdk-1.6.7.dist-info → dcs_sdk-1.6.9.dist-info}/entry_points.txt +0 -0
dcs_sdk/__version__.py
CHANGED
|
@@ -267,6 +267,10 @@ class DBTableDiffer:
|
|
|
267
267
|
) from e
|
|
268
268
|
|
|
269
269
|
except Exception as e:
|
|
270
|
+
self.cleanup_duckdb(
|
|
271
|
+
src=self.config.source.filepath,
|
|
272
|
+
target=self.config.target.filepath,
|
|
273
|
+
)
|
|
270
274
|
raise RuntimeError(f"process_duckdb failed for {'source' if is_source else 'target'}: {e}") from e
|
|
271
275
|
|
|
272
276
|
def _prepare_source_table(self) -> Optional[str]:
|
|
@@ -785,8 +789,10 @@ class DBTableDiffer:
|
|
|
785
789
|
with suppress(Exception):
|
|
786
790
|
db_connection.close()
|
|
787
791
|
|
|
788
|
-
|
|
789
|
-
|
|
792
|
+
if hasattr(self.table1, "database"):
|
|
793
|
+
safe_close(self.table1.database)
|
|
794
|
+
if hasattr(self.table2, "database"):
|
|
795
|
+
safe_close(self.table2.database)
|
|
790
796
|
|
|
791
797
|
if self.source_db:
|
|
792
798
|
self.source_db.drop_view_from_db(
|
dcs_sdk/sdk/utils/utils.py
CHANGED
|
@@ -158,7 +158,8 @@ def chunk_load_to_pandas(queue: Queue, result_df: list, timeout: float = 2.0):
|
|
|
158
158
|
break
|
|
159
159
|
|
|
160
160
|
try:
|
|
161
|
-
|
|
161
|
+
chunk = pd.read_csv(io.BytesIO(data), dtype=str)
|
|
162
|
+
df = pd.concat([df, chunk], ignore_index=True)
|
|
162
163
|
except Exception as e:
|
|
163
164
|
logger.error(f"[ERROR] Failed to read CSV chunk: {e}")
|
|
164
165
|
continue
|
|
@@ -232,21 +233,25 @@ def duck_db_load_pd_to_table(config: Comparison, is_source: bool = False, df: pd
|
|
|
232
233
|
table_name = config.source.table if is_source else config.target.table
|
|
233
234
|
|
|
234
235
|
conn = duckdb.connect(database=duck_db_file_name, read_only=False)
|
|
236
|
+
if is_source:
|
|
237
|
+
config.source.filepath = duck_db_file_name
|
|
238
|
+
else:
|
|
239
|
+
config.target.filepath = duck_db_file_name
|
|
235
240
|
|
|
236
241
|
conn.register("df_view", df)
|
|
237
242
|
|
|
238
243
|
conn.execute(
|
|
239
244
|
f"""
|
|
240
|
-
CREATE OR REPLACE TABLE {table_name} AS
|
|
245
|
+
CREATE OR REPLACE TABLE "{table_name}" AS
|
|
241
246
|
SELECT * FROM df_view;
|
|
242
247
|
"""
|
|
243
248
|
)
|
|
244
249
|
|
|
245
250
|
if pk_cols and len(pk_cols) > 0:
|
|
246
|
-
|
|
251
|
+
quoted_pk_cols = ", ".join([f'"{col}"' for col in pk_cols])
|
|
247
252
|
conn.execute(
|
|
248
253
|
f"""
|
|
249
|
-
CREATE INDEX idx_{table_name} ON {table_name} ({
|
|
254
|
+
CREATE INDEX "idx_{table_name}" ON "{table_name}" ({quoted_pk_cols});
|
|
250
255
|
"""
|
|
251
256
|
)
|
|
252
257
|
|
|
@@ -254,7 +259,7 @@ def duck_db_load_pd_to_table(config: Comparison, is_source: bool = False, df: pd
|
|
|
254
259
|
view_name = f"{table_name}_query"
|
|
255
260
|
conn.execute(
|
|
256
261
|
f"""
|
|
257
|
-
CREATE VIEW {view_name} AS {query};
|
|
262
|
+
CREATE VIEW "{view_name}" AS {query};
|
|
258
263
|
"""
|
|
259
264
|
)
|
|
260
265
|
|
|
@@ -265,10 +270,6 @@ def duck_db_load_pd_to_table(config: Comparison, is_source: bool = False, df: pd
|
|
|
265
270
|
logger.error(f"Error in loading CSV to DuckDB: {e}")
|
|
266
271
|
return False
|
|
267
272
|
|
|
268
|
-
if is_source:
|
|
269
|
-
config.source.filepath = duck_db_file_name
|
|
270
|
-
else:
|
|
271
|
-
config.target.filepath = duck_db_file_name
|
|
272
273
|
return True
|
|
273
274
|
|
|
274
275
|
|
|
@@ -298,6 +299,12 @@ def duck_db_load_csv_to_table(config: Comparison, path, is_source: bool = False)
|
|
|
298
299
|
try:
|
|
299
300
|
table_name = generate_table_name(csv_file)
|
|
300
301
|
conn = duckdb.connect(database=duck_db_file_name, read_only=False)
|
|
302
|
+
if is_source:
|
|
303
|
+
config.source.filepath = duck_db_file_name
|
|
304
|
+
config.source.table = table_name
|
|
305
|
+
else:
|
|
306
|
+
config.target.filepath = duck_db_file_name
|
|
307
|
+
config.target.table = table_name
|
|
301
308
|
conn.execute(
|
|
302
309
|
"""
|
|
303
310
|
CREATE OR REPLACE TABLE {} AS SELECT * FROM read_csv('{}',HEADER=True, UNION_BY_NAME=True, nullstr='NULL', all_varchar=True, IGNORE_ERRORS=TRUE);
|
|
@@ -307,14 +314,14 @@ def duck_db_load_csv_to_table(config: Comparison, path, is_source: bool = False)
|
|
|
307
314
|
)
|
|
308
315
|
|
|
309
316
|
if pk_cols and len(pk_cols) > 0:
|
|
310
|
-
|
|
317
|
+
quoted_pk_cols = ", ".join(['"{}"'.format(col) for col in pk_cols])
|
|
311
318
|
conn.execute(
|
|
312
319
|
"""
|
|
313
|
-
CREATE INDEX idx_{} ON {} ({});
|
|
320
|
+
CREATE INDEX "idx_{}" ON "{}" ({});
|
|
314
321
|
""".format(
|
|
315
322
|
table_name,
|
|
316
323
|
table_name,
|
|
317
|
-
|
|
324
|
+
quoted_pk_cols,
|
|
318
325
|
)
|
|
319
326
|
)
|
|
320
327
|
|
|
@@ -322,7 +329,7 @@ def duck_db_load_csv_to_table(config: Comparison, path, is_source: bool = False)
|
|
|
322
329
|
table_name = f"{table_name}_query"
|
|
323
330
|
conn.execute(
|
|
324
331
|
"""
|
|
325
|
-
CREATE VIEW {} AS {};
|
|
332
|
+
CREATE VIEW "{}" AS {};
|
|
326
333
|
""".format(
|
|
327
334
|
table_name, query
|
|
328
335
|
)
|
|
@@ -332,12 +339,6 @@ def duck_db_load_csv_to_table(config: Comparison, path, is_source: bool = False)
|
|
|
332
339
|
logger.error(f"Error in loading CSV to DuckDB: {e}")
|
|
333
340
|
return False
|
|
334
341
|
|
|
335
|
-
if is_source:
|
|
336
|
-
config.source.filepath = duck_db_file_name
|
|
337
|
-
config.source.table = table_name
|
|
338
|
-
else:
|
|
339
|
-
config.target.filepath = duck_db_file_name
|
|
340
|
-
config.target.table = table_name
|
|
341
342
|
return True
|
|
342
343
|
|
|
343
344
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dcs-sdk
|
|
3
|
-
Version: 1.6.
|
|
3
|
+
Version: 1.6.9
|
|
4
4
|
Summary: SDK for DataChecks
|
|
5
5
|
Author: Waterdip Labs
|
|
6
6
|
Author-email: hello@waterdip.ai
|
|
@@ -86,12 +86,11 @@ Requires-Dist: vertica-python (>=1.4.0) ; extra == "vertica" or extra == "all-db
|
|
|
86
86
|
Description-Content-Type: text/markdown
|
|
87
87
|
|
|
88
88
|
<h1 align="center">
|
|
89
|
-
DCS SDK v1.6.
|
|
89
|
+
DCS SDK v1.6.9
|
|
90
90
|
</h1>
|
|
91
91
|
|
|
92
92
|
> SDK for DataChecks
|
|
93
93
|
|
|
94
|
-
|
|
95
94
|
## Installation
|
|
96
95
|
|
|
97
96
|
> Python version `>=3.10,<3.13`
|
|
@@ -108,24 +107,19 @@ $ pip install dcs-sdk[all-dbs]
|
|
|
108
107
|
|
|
109
108
|
| Database | Code Name | Supported |
|
|
110
109
|
| ----------------- | ------------ | --------- |
|
|
111
|
-
| PostgreSQL | `postgres` | ✅
|
|
112
|
-
| Snowflake | `snowflake` | ✅
|
|
113
|
-
| Trino | `trino` | ✅
|
|
114
|
-
| Databricks | `databricks` | ✅
|
|
115
|
-
| Oracle | `oracle` | ✅
|
|
116
|
-
| MSSQL | `mssql` | ✅
|
|
117
|
-
| MySQL | `mysql` | ✅
|
|
118
|
-
| SAP Sybase IQ/ASE | `sybase` | ✅
|
|
119
|
-
| File | `file` | ✅
|
|
120
|
-
| BigQuery | `bigquery` | ✅
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
110
|
+
| PostgreSQL | `postgres` | ✅ |
|
|
111
|
+
| Snowflake | `snowflake` | ✅ |
|
|
112
|
+
| Trino | `trino` | ✅ |
|
|
113
|
+
| Databricks | `databricks` | ✅ |
|
|
114
|
+
| Oracle | `oracle` | ✅ |
|
|
115
|
+
| MSSQL | `mssql` | ✅ |
|
|
116
|
+
| MySQL | `mysql` | ✅ |
|
|
117
|
+
| SAP Sybase IQ/ASE | `sybase` | ✅ |
|
|
118
|
+
| File | `file` | ✅ |
|
|
119
|
+
| BigQuery | `bigquery` | ✅ |
|
|
124
120
|
|
|
125
121
|
## Available Commands
|
|
126
122
|
|
|
127
|
-
|
|
128
|
-
|
|
129
123
|
| Option | Short Option | Required | Default | Description | Example |
|
|
130
124
|
| :-----------: | :----------: | :------: | :-------------: | :------------------------------------------------: | :------------------------------------------------------------------------------------------------------: |
|
|
131
125
|
| --config-path | -C | **Yes** | None | Specify the file path for the configuration | dcs-sdk run --config-path config.yaml --compare comp_name |
|
|
@@ -138,8 +132,6 @@ $ pip install dcs-sdk[all-dbs]
|
|
|
138
132
|
| --report-path | | No | dcs_report.html | Specify the file path for HTML report | dcs-sdk run --config-path config.yaml --compare comp_name --html-report --report-path table.html |
|
|
139
133
|
| --table | | No | False | Display Comparison in table format | dcs-sdk run --config-path config.yaml --compare comp_name --html-report --report-path table.html --table |
|
|
140
134
|
|
|
141
|
-
|
|
142
|
-
|
|
143
135
|
### Example Command [CLI]
|
|
144
136
|
|
|
145
137
|
```sh
|
|
@@ -133,14 +133,14 @@ dcs_core/report/static/index.js,sha256=p4wvku-zlXi0y4gWeSzV1amY0s4mjtUq2QsezARLV
|
|
|
133
133
|
dcs_core/report/static/index.js.LICENSE.txt,sha256=bBDZBJVEDrqjCi7sfoF8CchjFn3hdcbNkP7ub7kbcXQ,201041
|
|
134
134
|
dcs_sdk/__init__.py,sha256=RkfhRKLXEForLCs4rZkTf0qc_b0TokSggSAcKI4yfZg,610
|
|
135
135
|
dcs_sdk/__main__.py,sha256=Qn8stIaQGrdLjHQ-H7xO0T-brtq5RWZoWU9QvqoarV8,683
|
|
136
|
-
dcs_sdk/__version__.py,sha256=
|
|
136
|
+
dcs_sdk/__version__.py,sha256=iSCeuxA5501nrospfk7ajv7gQWKLpPSH4npFVobI-gY,633
|
|
137
137
|
dcs_sdk/cli/__init__.py,sha256=RkfhRKLXEForLCs4rZkTf0qc_b0TokSggSAcKI4yfZg,610
|
|
138
138
|
dcs_sdk/cli/cli.py,sha256=jaO52UrMWLafcF_yhqllPkmYSTuO2sksFi30fYFdAB4,4406
|
|
139
139
|
dcs_sdk/sdk/__init__.py,sha256=skrZcgWWJBL6NXTUERywJ3qRJRemgpDXyW7lPg1FJk8,2107
|
|
140
140
|
dcs_sdk/sdk/config/__init__.py,sha256=RkfhRKLXEForLCs4rZkTf0qc_b0TokSggSAcKI4yfZg,610
|
|
141
141
|
dcs_sdk/sdk/config/config_loader.py,sha256=ZbSGQ56LsHv4_mxNhYrf6eoegO2R4PaqAs8iAghU73M,22435
|
|
142
142
|
dcs_sdk/sdk/data_diff/__init__.py,sha256=RkfhRKLXEForLCs4rZkTf0qc_b0TokSggSAcKI4yfZg,610
|
|
143
|
-
dcs_sdk/sdk/data_diff/data_differ.py,sha256=
|
|
143
|
+
dcs_sdk/sdk/data_diff/data_differ.py,sha256=3uef0ED5TzMmXcJYx3OZtmS-K4tj_yjlsvkD9aoIFOc,39296
|
|
144
144
|
dcs_sdk/sdk/rules/__init__.py,sha256=_BkKcE_jfdDQI_ECdOamJaefMKEXrKpYjPpnBQXl_Xs,657
|
|
145
145
|
dcs_sdk/sdk/rules/rules_mappping.py,sha256=fxakVkf7B2cVkYSO946LTim_HmMsl6lBDBqZjTTsSPI,1292
|
|
146
146
|
dcs_sdk/sdk/rules/rules_repository.py,sha256=x0Rli-wdnHAmXm5526go_qC3P-eFRt-4L7fs4hNqC-g,7564
|
|
@@ -154,8 +154,8 @@ dcs_sdk/sdk/utils/similarity_score/jaccard_provider.py,sha256=Jd0TvIGOULNTsiCL_F
|
|
|
154
154
|
dcs_sdk/sdk/utils/similarity_score/levenshtein_distance_provider.py,sha256=puAWPnoWfNo4BN4-kXIUHrtrt5jLv3Vkw_NfHvjYrn4,1185
|
|
155
155
|
dcs_sdk/sdk/utils/table.py,sha256=X8HxdYTWyx_oVrBWPsXlmA-xJKXXDBW9RrhlWNqA1As,18224
|
|
156
156
|
dcs_sdk/sdk/utils/themes.py,sha256=Meo2Yldv4uyPpEqI7qdA28Aa6sxtwUU1dLKKm4QavjM,1403
|
|
157
|
-
dcs_sdk/sdk/utils/utils.py,sha256=
|
|
158
|
-
dcs_sdk-1.6.
|
|
159
|
-
dcs_sdk-1.6.
|
|
160
|
-
dcs_sdk-1.6.
|
|
161
|
-
dcs_sdk-1.6.
|
|
157
|
+
dcs_sdk/sdk/utils/utils.py,sha256=a9QGEVL8L7asbJm_VBwgKvJQknsvuqWS0uTUaHsDPiY,16463
|
|
158
|
+
dcs_sdk-1.6.9.dist-info/METADATA,sha256=Od43VgAhrNLKIICnGJ3nSzmuUYJzHl7miH6OSysVE9U,7652
|
|
159
|
+
dcs_sdk-1.6.9.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
160
|
+
dcs_sdk-1.6.9.dist-info/entry_points.txt,sha256=XhODNz7UccgPOyklXgp7pIfTTXArd6-V0mImjhnhwto,80
|
|
161
|
+
dcs_sdk-1.6.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|