dcs-sdk 1.6.9__tar.gz → 1.7.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/PKG-INFO +2 -2
  2. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/README.md +1 -1
  3. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/common/models/configuration.py +3 -0
  4. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/configuration/configuration_parser.py +1 -0
  5. dcs_sdk-1.7.1/dcs_core/core/datasource/file_datasource.py +124 -0
  6. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/datasource/manager.py +1 -0
  7. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/inspect.py +0 -1
  8. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/integrations/databases/azure_blob.py +22 -25
  9. dcs_sdk-1.7.1/dcs_core/integrations/databases/duck_db.py +141 -0
  10. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_sdk/__version__.py +1 -1
  11. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/pyproject.toml +1 -1
  12. dcs_sdk-1.6.9/dcs_core/core/datasource/file_datasource.py +0 -26
  13. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/__init__.py +0 -0
  14. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/__main__.py +0 -0
  15. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/abcs/__init__.py +0 -0
  16. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/abcs/compiler.py +0 -0
  17. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/abcs/database_types.py +0 -0
  18. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/config.py +0 -0
  19. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/databases/__init__.py +0 -0
  20. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/databases/_connect.py +0 -0
  21. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/databases/base.py +0 -0
  22. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/databases/bigquery.py +0 -0
  23. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/databases/clickhouse.py +0 -0
  24. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/databases/databricks.py +0 -0
  25. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/databases/duckdb.py +0 -0
  26. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/databases/mssql.py +0 -0
  27. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/databases/mysql.py +0 -0
  28. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/databases/oracle.py +0 -0
  29. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/databases/postgresql.py +0 -0
  30. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/databases/presto.py +0 -0
  31. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/databases/redis.py +0 -0
  32. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/databases/redshift.py +0 -0
  33. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/databases/snowflake.py +0 -0
  34. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/databases/sybase.py +0 -0
  35. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/databases/trino.py +0 -0
  36. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/databases/vertica.py +0 -0
  37. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/diff_tables.py +0 -0
  38. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/errors.py +0 -0
  39. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/format.py +0 -0
  40. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/hashdiff_tables.py +0 -0
  41. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/info_tree.py +0 -0
  42. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/joindiff_tables.py +0 -0
  43. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/lexicographic_space.py +0 -0
  44. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/parse_time.py +0 -0
  45. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/py.typed +0 -0
  46. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/queries/__init__.py +0 -0
  47. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/queries/api.py +0 -0
  48. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/queries/ast_classes.py +0 -0
  49. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/queries/base.py +0 -0
  50. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/queries/extras.py +0 -0
  51. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/query_utils.py +0 -0
  52. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/schema.py +0 -0
  53. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/table_segment.py +0 -0
  54. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/thread_utils.py +0 -0
  55. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/utils.py +0 -0
  56. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/data_diff/version.py +0 -0
  57. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/__init__.py +0 -0
  58. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/__main__.py +0 -0
  59. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/__version__.py +0 -0
  60. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/cli/__init__.py +0 -0
  61. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/cli/cli.py +0 -0
  62. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/__init__.py +0 -0
  63. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/common/__init__.py +0 -0
  64. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/common/errors.py +0 -0
  65. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/common/models/__init__.py +0 -0
  66. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/common/models/dashboard.py +0 -0
  67. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/common/models/data_source_resource.py +0 -0
  68. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/common/models/metric.py +0 -0
  69. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/common/models/profile.py +0 -0
  70. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/common/models/validation.py +0 -0
  71. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/common/models/widget.py +0 -0
  72. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/configuration/__init__.py +0 -0
  73. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/configuration/config_loader.py +0 -0
  74. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/configuration/configuration_parser_arc.py +0 -0
  75. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/datasource/__init__.py +0 -0
  76. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/datasource/base.py +0 -0
  77. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/datasource/search_datasource.py +0 -0
  78. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/datasource/sql_datasource.py +0 -0
  79. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/logger/__init__.py +0 -0
  80. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/logger/base.py +0 -0
  81. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/logger/default_logger.py +0 -0
  82. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/metric/__init__.py +0 -0
  83. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/metric/base.py +0 -0
  84. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/metric/combined_metric.py +0 -0
  85. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/metric/custom_metric.py +0 -0
  86. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/metric/manager.py +0 -0
  87. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/metric/numeric_metric.py +0 -0
  88. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/metric/reliability_metric.py +0 -0
  89. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/profiling/__init__.py +0 -0
  90. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/profiling/datasource_profiling.py +0 -0
  91. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/profiling/numeric_field_profiling.py +0 -0
  92. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/profiling/text_field_profiling.py +0 -0
  93. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/repository/__init__.py +0 -0
  94. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/repository/metric_repository.py +0 -0
  95. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/utils/__init__.py +0 -0
  96. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/utils/log.py +0 -0
  97. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/utils/tracking.py +0 -0
  98. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/utils/utils.py +0 -0
  99. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/validation/__init__.py +0 -0
  100. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/validation/base.py +0 -0
  101. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/validation/completeness_validation.py +0 -0
  102. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/validation/custom_query_validation.py +0 -0
  103. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/validation/manager.py +0 -0
  104. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/validation/numeric_validation.py +0 -0
  105. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/validation/reliability_validation.py +0 -0
  106. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/validation/uniqueness_validation.py +0 -0
  107. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/core/validation/validity_validation.py +0 -0
  108. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/integrations/__init__.py +0 -0
  109. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/integrations/databases/__init__.py +0 -0
  110. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/integrations/databases/bigquery.py +0 -0
  111. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/integrations/databases/databricks.py +0 -0
  112. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/integrations/databases/db2.py +0 -0
  113. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/integrations/databases/elasticsearch.py +0 -0
  114. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/integrations/databases/mssql.py +0 -0
  115. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/integrations/databases/mysql.py +0 -0
  116. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/integrations/databases/opensearch.py +0 -0
  117. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/integrations/databases/oracle.py +0 -0
  118. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/integrations/databases/postgres.py +0 -0
  119. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/integrations/databases/redshift.py +0 -0
  120. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/integrations/databases/snowflake.py +0 -0
  121. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/integrations/databases/spark_df.py +0 -0
  122. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/integrations/databases/sybase.py +0 -0
  123. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/integrations/storage/__init__.py +0 -0
  124. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/integrations/storage/local_file.py +0 -0
  125. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/integrations/utils/__init__.py +0 -0
  126. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/integrations/utils/utils.py +0 -0
  127. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/report/__init__.py +0 -0
  128. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/report/dashboard.py +0 -0
  129. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/report/models.py +0 -0
  130. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/report/static/assets/fonts/DMSans-Bold.ttf +0 -0
  131. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/report/static/assets/fonts/DMSans-Medium.ttf +0 -0
  132. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/report/static/assets/fonts/DMSans-Regular.ttf +0 -0
  133. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/report/static/assets/fonts/DMSans-SemiBold.ttf +0 -0
  134. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/report/static/assets/images/docs.svg +0 -0
  135. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/report/static/assets/images/github.svg +0 -0
  136. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/report/static/assets/images/logo.svg +0 -0
  137. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/report/static/assets/images/slack.svg +0 -0
  138. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/report/static/index.js +0 -0
  139. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_core/report/static/index.js.LICENSE.txt +0 -0
  140. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_sdk/__init__.py +0 -0
  141. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_sdk/__main__.py +0 -0
  142. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_sdk/cli/__init__.py +0 -0
  143. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_sdk/cli/cli.py +0 -0
  144. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_sdk/sdk/__init__.py +0 -0
  145. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_sdk/sdk/config/__init__.py +0 -0
  146. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_sdk/sdk/config/config_loader.py +0 -0
  147. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_sdk/sdk/data_diff/__init__.py +0 -0
  148. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_sdk/sdk/data_diff/data_differ.py +0 -0
  149. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_sdk/sdk/rules/__init__.py +0 -0
  150. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_sdk/sdk/rules/rules_mappping.py +0 -0
  151. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_sdk/sdk/rules/rules_repository.py +0 -0
  152. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_sdk/sdk/rules/schema_rules.py +0 -0
  153. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_sdk/sdk/utils/__init__.py +0 -0
  154. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_sdk/sdk/utils/serializer.py +0 -0
  155. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_sdk/sdk/utils/similarity_score/__init__.py +0 -0
  156. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_sdk/sdk/utils/similarity_score/base_provider.py +0 -0
  157. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_sdk/sdk/utils/similarity_score/cosine_similarity_provider.py +0 -0
  158. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_sdk/sdk/utils/similarity_score/jaccard_provider.py +0 -0
  159. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_sdk/sdk/utils/similarity_score/levenshtein_distance_provider.py +0 -0
  160. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_sdk/sdk/utils/table.py +0 -0
  161. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_sdk/sdk/utils/themes.py +0 -0
  162. {dcs_sdk-1.6.9 → dcs_sdk-1.7.1}/dcs_sdk/sdk/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dcs-sdk
3
- Version: 1.6.9
3
+ Version: 1.7.1
4
4
  Summary: SDK for DataChecks
5
5
  Author: Waterdip Labs
6
6
  Author-email: hello@waterdip.ai
@@ -86,7 +86,7 @@ Requires-Dist: vertica-python (>=1.4.0) ; extra == "vertica" or extra == "all-db
86
86
  Description-Content-Type: text/markdown
87
87
 
88
88
  <h1 align="center">
89
- DCS SDK v1.6.9
89
+ DCS SDK v1.7.1
90
90
  </h1>
91
91
 
92
92
  > SDK for DataChecks
@@ -1,5 +1,5 @@
1
1
  <h1 align="center">
2
- DCS SDK v1.6.9
2
+ DCS SDK v1.7.1
3
3
  </h1>
4
4
 
5
5
  > SDK for DataChecks
@@ -44,6 +44,7 @@ class DataSourceType(str, Enum):
44
44
  DB2 = "db2"
45
45
  SYBASE = "sybase"
46
46
  AZURE_BLOB = "azure_blob"
47
+ DUCK_DB = "duck_db"
47
48
 
48
49
 
49
50
  class DataSourceLanguageSupport(str, Enum):
@@ -92,6 +93,8 @@ class DataSourceConnectionConfiguration:
92
93
  endpoint_suffix: Optional[str] = None
93
94
  subfolder_path: Optional[str] = None
94
95
 
96
+ file_path: Optional[str] = None
97
+
95
98
 
96
99
  @dataclass
97
100
  class DataSourceConfiguration:
@@ -81,6 +81,7 @@ class DataSourceConfigParser(ConfigParser):
81
81
  protocol=config["connection"].get("protocol"),
82
82
  driver=config["connection"].get("driver"),
83
83
  server=config["connection"].get("server"),
84
+ file_path=config["connection"].get("file_path"),
84
85
  )
85
86
  return connection_config
86
87
 
@@ -0,0 +1,124 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import os
16
+ import uuid
17
+ from abc import ABC, abstractmethod
18
+ from contextlib import contextmanager
19
+ from pathlib import Path
20
+ from typing import Dict, Iterator
21
+
22
+ import duckdb
23
+ from loguru import logger
24
+
25
+ from dcs_core.core.common.models.data_source_resource import RawColumnInfo
26
+ from dcs_core.core.datasource.base import DataSource
27
+ from dcs_core.integrations.databases.duck_db import DuckDb
28
+
29
+
30
+ class FileDataSource(DataSource, ABC):
31
+ """
32
+ Abstract class for File data sources
33
+ """
34
+
35
+ def __init__(self, data_source_name: str, data_connection: Dict):
36
+ super().__init__(data_source_name, data_connection)
37
+ self.temp_dir_name = "tmp"
38
+
39
+ @contextmanager
40
+ def as_duckdb(self, table_name: str) -> Iterator["DuckDb"]:
41
+ """Returns a DuckDB instance for the given table name"""
42
+ duckdb_path = self.load_file_to_duckdb(table_name)
43
+ duck_db_ds = DuckDb(data_source_name=self.data_source_name, data_connection={"file_path": duckdb_path})
44
+ try:
45
+ duck_db_ds.connect()
46
+ yield duck_db_ds
47
+ finally:
48
+ duck_db_ds.close()
49
+
50
+ @abstractmethod
51
+ def query_get_table_names(self) -> dict:
52
+ """
53
+ Query to get table names
54
+ """
55
+ pass
56
+
57
+ @abstractmethod
58
+ def query_get_database_version(self) -> str:
59
+ """
60
+ Get the database version
61
+ :return: version string
62
+ """
63
+ pass
64
+
65
+ @abstractmethod
66
+ def _download_to_path(self, table_name: str, path: str) -> None:
67
+ """Vendor-specific download"""
68
+ pass
69
+
70
+ def load_file_to_duckdb(self, table_name: str) -> str:
71
+ """Template method"""
72
+ os.makedirs(self.temp_dir_name, exist_ok=True)
73
+
74
+ ext = Path(table_name).suffix
75
+ if not ext:
76
+ raise ValueError(f"Invalid file name {table_name}")
77
+
78
+ temp_path = f"{self.temp_dir_name}/{uuid.uuid4()}{ext}"
79
+
80
+ try:
81
+ self._download_to_path(table_name, temp_path)
82
+ return self._load_path_to_duckdb(temp_path, table_name)
83
+ finally:
84
+ if os.path.exists(temp_path):
85
+ os.remove(temp_path)
86
+ logger.info(f"Cleaned up temp file {temp_path}")
87
+
88
+ def _load_path_to_duckdb(self, path: str, table_name: str) -> str:
89
+ """Shared DuckDB loading logic"""
90
+ tmp_dir = self.temp_dir_name
91
+ duckdb_path = f"{tmp_dir}/{uuid.uuid4()}.duckdb"
92
+ table_stem = Path(table_name).stem
93
+
94
+ logger.info(f"Loading {path} into DuckDB")
95
+
96
+ conn = None
97
+ try:
98
+ conn = duckdb.connect(database=duckdb_path, read_only=False)
99
+ conn.execute(
100
+ f'CREATE TABLE "{table_stem}" AS SELECT * FROM read_csv_auto(?)',
101
+ [path],
102
+ )
103
+ logger.info(f"Successfully loaded data into {duckdb_path}")
104
+ return duckdb_path
105
+ except Exception as e:
106
+ logger.warning(f"read_csv_auto failed: {e}. Trying with ALL_VARCHAR=TRUE")
107
+ try:
108
+ if conn:
109
+ conn.close()
110
+ conn = duckdb.connect(database=duckdb_path, read_only=False)
111
+ conn.execute(
112
+ f'CREATE TABLE "{table_stem}" AS ' f"SELECT * FROM read_csv(?, ALL_VARCHAR=TRUE, SAMPLE_SIZE=-1)",
113
+ [path],
114
+ )
115
+ logger.info(f"Successfully loaded data with ALL_VARCHAR into {duckdb_path}")
116
+ return duckdb_path
117
+ except Exception as fallback_error:
118
+ logger.error(f"Failed to load CSV into DuckDB: {fallback_error}")
119
+ if os.path.exists(duckdb_path):
120
+ os.remove(duckdb_path)
121
+ raise
122
+ finally:
123
+ if conn:
124
+ conn.close()
@@ -58,6 +58,7 @@ class DataSourceManager:
58
58
  "db2": "DB2DataSource",
59
59
  "sybase": "SybaseDataSource",
60
60
  "azure_blob": "AzureBlobDataSource",
61
+ "duck_db": "DuckDb",
61
62
  }
62
63
 
63
64
  def __init__(self, config: Configuration):
@@ -130,7 +130,6 @@ class Inspect:
130
130
  try:
131
131
  self.data_source_manager.connect()
132
132
  self.validation_manager.build_validations()
133
-
134
133
  validation_infos: Dict[str, ValidationInfo] = {}
135
134
 
136
135
  for datasource, _ in self.validation_manager.get_validations.items():
@@ -12,17 +12,21 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import io
16
- from typing import Any, Dict, List, Optional
15
+ import os
16
+ import uuid
17
+ from pathlib import Path
18
+ from typing import Any, Dict, Optional
17
19
 
18
- import pandas as pd
20
+ import duckdb
19
21
  from azure.storage.blob import BlobServiceClient
22
+ from loguru import logger
20
23
 
21
24
  from dcs_core.core.common.errors import (
22
25
  DatachecksColumnFetchError,
23
26
  DataChecksDataSourcesConnectionError,
24
27
  DatachecksTableFetchError,
25
28
  )
29
+ from dcs_core.core.common.models.data_source_resource import RawColumnInfo
26
30
  from dcs_core.core.datasource.file_datasource import FileDataSource
27
31
 
28
32
 
@@ -31,6 +35,7 @@ class AzureBlobDataSource(FileDataSource):
31
35
  super().__init__(data_source_name, data_connection)
32
36
  self.allowed_file_extensions = [".csv"]
33
37
  self.blob_service_client: Optional[BlobServiceClient] = None
38
+ self.DEFAULT_NUMERIC_PRECISION = 16383
34
39
  self.connection = None
35
40
 
36
41
  def connect(self) -> Any:
@@ -83,28 +88,8 @@ class AzureBlobDataSource(FileDataSource):
83
88
  except Exception as e:
84
89
  raise DatachecksTableFetchError(f"Failed to list blobs: {e}")
85
90
 
86
- def query_get_table_columns(self, table: str) -> List[dict]:
87
- """
88
- Get column names for a table (CSV blob in this case).
89
- """
90
- if not self.is_connected():
91
- raise DataChecksDataSourcesConnectionError("Not connected to Azure Blob Storage")
92
-
93
- if not any(table.endswith(ext) for ext in self.allowed_file_extensions):
94
- raise ValueError(f"Unsupported file type for {table}. Allowed: {self.allowed_file_extensions}")
95
-
96
- try:
97
- blob_client = self.connection.get_blob_client(blob=table)
98
- download_stream = blob_client.download_blob()
99
- data = download_stream.readall()
100
- if table.endswith(".csv"):
101
- df = pd.read_csv(io.BytesIO(data))
102
- else:
103
- raise ValueError(f"Unsupported file type for {table}. Allowed: {self.allowed_file_extensions}")
104
-
105
- return [{"column_name": col, "column_type": "string"} for col in df.columns.tolist()]
106
- except Exception as e:
107
- raise DatachecksColumnFetchError(f"Failed to read columns from blob '{table}': {e}")
91
+ def safe_get(self, lst, idx, default=None):
92
+ return lst[idx] if 0 <= idx < len(lst) else default
108
93
 
109
94
  def query_get_database_version(self) -> str:
110
95
  """
@@ -113,3 +98,15 @@ class AzureBlobDataSource(FileDataSource):
113
98
  """
114
99
  api_version = self.blob_service_client.api_version
115
100
  return api_version
101
+
102
+ def _download_to_path(self, table_name: str, path: str):
103
+ """Download blob to path"""
104
+ blob_client = self.connection.get_blob_client(blob=table_name)
105
+ logger.info(f"Downloading {table_name} to {path}")
106
+ try:
107
+ with open(path, "wb") as f:
108
+ stream = blob_client.download_blob()
109
+ for chunk in stream.chunks():
110
+ f.write(chunk)
111
+ except Exception as e:
112
+ raise DataChecksDataSourcesConnectionError(f"Failed to download blob '{table_name}': {e}")
@@ -0,0 +1,141 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import os
15
+ from pathlib import Path
16
+ from typing import Any, Dict
17
+
18
+ import duckdb
19
+ from loguru import logger
20
+
21
+ from dcs_core.core.common.errors import DataChecksDataSourcesConnectionError
22
+ from dcs_core.core.common.models.data_source_resource import RawColumnInfo
23
+ from dcs_core.core.datasource.sql_datasource import SQLDataSource
24
+
25
+
26
+ class DuckDb(SQLDataSource):
27
+ def __init__(self, data_source_name: str, data_connection: Dict):
28
+ super().__init__(data_source_name, data_connection)
29
+ self.connection = None
30
+ self.use_sa_text_query = False
31
+ self.regex_patterns = {
32
+ "uuid": r"^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$",
33
+ "usa_phone": r"^(\+1[-.\s]?)?(\(?\d{3}\)?[-.\s]?)?\d{3}[-.\s]?\d{4}$",
34
+ "email": r"^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$",
35
+ "usa_zip_code": r"^[0-9]{5}(?:-[0-9]{4})?$",
36
+ "ssn": r"^[0-9]{3}-[0-9]{2}-[0-9]{4}$",
37
+ "sedol": r"^[B-DF-HJ-NP-TV-XZ0-9]{6}[0-9]$",
38
+ "lei": r"^[A-Z0-9]{18}[0-9]{2}$",
39
+ "cusip": r"^[0-9A-Z]{9}$",
40
+ "figi": r"^BBG[A-Z0-9]{9}$",
41
+ "isin": r"^[A-Z]{2}[A-Z0-9]{9}[0-9]$",
42
+ "perm_id": r"^\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{3}$",
43
+ }
44
+ self.DEFAULT_NUMERIC_PRECISION = 16383
45
+
46
+ def connect(self) -> Any:
47
+ """
48
+ Connect to the file data source
49
+ """
50
+ try:
51
+ file_path = self.data_connection.get("file_path")
52
+ self.connection = duckdb.connect(database=file_path)
53
+ return self.connection
54
+ except Exception as e:
55
+ raise DataChecksDataSourcesConnectionError(f"Failed to connect to DuckDB: {e}")
56
+
57
+ def is_connected(self) -> bool:
58
+ """
59
+ Check if the file data source is connected
60
+ """
61
+ return self.connection is not None
62
+
63
+ def close(self):
64
+ """
65
+ Close the connection
66
+ """
67
+ logger.info("Closing DuckDB connection")
68
+ if self.connection:
69
+ self.connection.close()
70
+ try:
71
+ fp = self.data_connection.get("file_path")
72
+ if fp and os.path.exists(fp):
73
+ os.remove(fp)
74
+ except Exception as e:
75
+ logger.error(f"Failed to remove the file {self.data_connection.get('file_path')}: {e}")
76
+
77
+ def qualified_table_name(self, table_name: str) -> str:
78
+ """
79
+ Get the qualified table name
80
+ :param table_name: name of the table
81
+ :return: qualified table name
82
+ """
83
+ return f'"{table_name}"'
84
+
85
+ def quote_column(self, column: str) -> str:
86
+ """
87
+ Quote the column name
88
+ :param column: name of the column
89
+ :return: quoted column name
90
+ """
91
+ return f'"{column}"'
92
+
93
+ def query_get_table_columns(
94
+ self,
95
+ table: str,
96
+ schema: str | None = None,
97
+ ) -> Dict[str, RawColumnInfo]:
98
+ """
99
+ Get the schema of a table.
100
+ :param table: table name
101
+ :return: Dictionary with column names and their types
102
+ """
103
+ schema = schema or self.schema_name
104
+ info_schema_path = ["information_schema", "columns"]
105
+ if self.database:
106
+ database = self.quote_database(self.database)
107
+ info_schema_path.insert(0, database)
108
+
109
+ query = f"""
110
+ SELECT
111
+ column_name,
112
+ data_type,
113
+ CASE WHEN data_type IN ('TIMESTAMP', 'TIME') THEN datetime_precision ELSE NULL END AS datetime_precision,
114
+ CASE WHEN data_type = 'DECIMAL' THEN COALESCE(numeric_precision, 131072 + {self.DEFAULT_NUMERIC_PRECISION})
115
+ WHEN data_type IN ('DOUBLE', 'REAL', 'FLOAT') THEN numeric_precision
116
+ ELSE numeric_precision END AS numeric_precision,
117
+ CASE WHEN data_type = 'DECIMAL' THEN COALESCE(numeric_scale, {self.DEFAULT_NUMERIC_PRECISION}) ELSE numeric_scale END AS numeric_scale,
118
+ NULL AS collation_name,
119
+ CASE WHEN data_type = 'VARCHAR' THEN character_maximum_length ELSE NULL END AS character_maximum_length
120
+ FROM information_schema.columns
121
+ WHERE table_name = '{table}'
122
+ ORDER BY ordinal_position
123
+ """
124
+
125
+ rows = self.fetchall(query)
126
+ if not rows:
127
+ raise RuntimeError(f"{table}: Table, {schema}: Schema, does not exist, or has no columns")
128
+
129
+ column_info = {
130
+ r[0]: RawColumnInfo(
131
+ column_name=self.safe_get(r, 0),
132
+ data_type=self.safe_get(r, 1),
133
+ datetime_precision=self.safe_get(r, 2),
134
+ numeric_precision=self.safe_get(r, 3),
135
+ numeric_scale=self.safe_get(r, 4),
136
+ collation_name=self.safe_get(r, 5),
137
+ character_maximum_length=self.safe_get(r, 6),
138
+ )
139
+ for r in rows
140
+ }
141
+ return column_info
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.6.9"
15
+ __version__ = "1.7.1"
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "dcs-sdk"
3
- version = "1.6.9"
3
+ version = "1.7.1"
4
4
  description = "SDK for DataChecks"
5
5
  authors = ["Waterdip Labs <hello@waterdip.ai>"]
6
6
  readme = "README.md"
@@ -1,26 +0,0 @@
1
- # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- from typing import Dict
16
-
17
- from dcs_core.core.datasource.base import DataSource
18
-
19
-
20
- class FileDataSource(DataSource):
21
- """
22
- Abstract class for File data sources
23
- """
24
-
25
- def __init__(self, data_source_name: str, data_connection: Dict):
26
- super().__init__(data_source_name, data_connection)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes