dcs-sdk 1.6.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. data_diff/__init__.py +221 -0
  2. data_diff/__main__.py +517 -0
  3. data_diff/abcs/__init__.py +13 -0
  4. data_diff/abcs/compiler.py +27 -0
  5. data_diff/abcs/database_types.py +402 -0
  6. data_diff/config.py +141 -0
  7. data_diff/databases/__init__.py +38 -0
  8. data_diff/databases/_connect.py +323 -0
  9. data_diff/databases/base.py +1417 -0
  10. data_diff/databases/bigquery.py +376 -0
  11. data_diff/databases/clickhouse.py +217 -0
  12. data_diff/databases/databricks.py +262 -0
  13. data_diff/databases/duckdb.py +207 -0
  14. data_diff/databases/mssql.py +343 -0
  15. data_diff/databases/mysql.py +189 -0
  16. data_diff/databases/oracle.py +238 -0
  17. data_diff/databases/postgresql.py +293 -0
  18. data_diff/databases/presto.py +222 -0
  19. data_diff/databases/redis.py +93 -0
  20. data_diff/databases/redshift.py +233 -0
  21. data_diff/databases/snowflake.py +222 -0
  22. data_diff/databases/sybase.py +720 -0
  23. data_diff/databases/trino.py +73 -0
  24. data_diff/databases/vertica.py +174 -0
  25. data_diff/diff_tables.py +489 -0
  26. data_diff/errors.py +17 -0
  27. data_diff/format.py +369 -0
  28. data_diff/hashdiff_tables.py +1026 -0
  29. data_diff/info_tree.py +76 -0
  30. data_diff/joindiff_tables.py +434 -0
  31. data_diff/lexicographic_space.py +253 -0
  32. data_diff/parse_time.py +88 -0
  33. data_diff/py.typed +0 -0
  34. data_diff/queries/__init__.py +13 -0
  35. data_diff/queries/api.py +213 -0
  36. data_diff/queries/ast_classes.py +811 -0
  37. data_diff/queries/base.py +38 -0
  38. data_diff/queries/extras.py +43 -0
  39. data_diff/query_utils.py +70 -0
  40. data_diff/schema.py +67 -0
  41. data_diff/table_segment.py +583 -0
  42. data_diff/thread_utils.py +112 -0
  43. data_diff/utils.py +1022 -0
  44. data_diff/version.py +15 -0
  45. dcs_core/__init__.py +13 -0
  46. dcs_core/__main__.py +17 -0
  47. dcs_core/__version__.py +15 -0
  48. dcs_core/cli/__init__.py +13 -0
  49. dcs_core/cli/cli.py +165 -0
  50. dcs_core/core/__init__.py +19 -0
  51. dcs_core/core/common/__init__.py +13 -0
  52. dcs_core/core/common/errors.py +50 -0
  53. dcs_core/core/common/models/__init__.py +13 -0
  54. dcs_core/core/common/models/configuration.py +284 -0
  55. dcs_core/core/common/models/dashboard.py +24 -0
  56. dcs_core/core/common/models/data_source_resource.py +75 -0
  57. dcs_core/core/common/models/metric.py +160 -0
  58. dcs_core/core/common/models/profile.py +75 -0
  59. dcs_core/core/common/models/validation.py +216 -0
  60. dcs_core/core/common/models/widget.py +44 -0
  61. dcs_core/core/configuration/__init__.py +13 -0
  62. dcs_core/core/configuration/config_loader.py +139 -0
  63. dcs_core/core/configuration/configuration_parser.py +262 -0
  64. dcs_core/core/configuration/configuration_parser_arc.py +328 -0
  65. dcs_core/core/datasource/__init__.py +13 -0
  66. dcs_core/core/datasource/base.py +62 -0
  67. dcs_core/core/datasource/manager.py +112 -0
  68. dcs_core/core/datasource/search_datasource.py +421 -0
  69. dcs_core/core/datasource/sql_datasource.py +1094 -0
  70. dcs_core/core/inspect.py +163 -0
  71. dcs_core/core/logger/__init__.py +13 -0
  72. dcs_core/core/logger/base.py +32 -0
  73. dcs_core/core/logger/default_logger.py +94 -0
  74. dcs_core/core/metric/__init__.py +13 -0
  75. dcs_core/core/metric/base.py +220 -0
  76. dcs_core/core/metric/combined_metric.py +98 -0
  77. dcs_core/core/metric/custom_metric.py +34 -0
  78. dcs_core/core/metric/manager.py +137 -0
  79. dcs_core/core/metric/numeric_metric.py +403 -0
  80. dcs_core/core/metric/reliability_metric.py +90 -0
  81. dcs_core/core/profiling/__init__.py +13 -0
  82. dcs_core/core/profiling/datasource_profiling.py +136 -0
  83. dcs_core/core/profiling/numeric_field_profiling.py +72 -0
  84. dcs_core/core/profiling/text_field_profiling.py +67 -0
  85. dcs_core/core/repository/__init__.py +13 -0
  86. dcs_core/core/repository/metric_repository.py +77 -0
  87. dcs_core/core/utils/__init__.py +13 -0
  88. dcs_core/core/utils/log.py +29 -0
  89. dcs_core/core/utils/tracking.py +105 -0
  90. dcs_core/core/utils/utils.py +44 -0
  91. dcs_core/core/validation/__init__.py +13 -0
  92. dcs_core/core/validation/base.py +230 -0
  93. dcs_core/core/validation/completeness_validation.py +153 -0
  94. dcs_core/core/validation/custom_query_validation.py +24 -0
  95. dcs_core/core/validation/manager.py +282 -0
  96. dcs_core/core/validation/numeric_validation.py +276 -0
  97. dcs_core/core/validation/reliability_validation.py +91 -0
  98. dcs_core/core/validation/uniqueness_validation.py +61 -0
  99. dcs_core/core/validation/validity_validation.py +738 -0
  100. dcs_core/integrations/__init__.py +13 -0
  101. dcs_core/integrations/databases/__init__.py +13 -0
  102. dcs_core/integrations/databases/bigquery.py +187 -0
  103. dcs_core/integrations/databases/databricks.py +51 -0
  104. dcs_core/integrations/databases/db2.py +652 -0
  105. dcs_core/integrations/databases/elasticsearch.py +61 -0
  106. dcs_core/integrations/databases/mssql.py +829 -0
  107. dcs_core/integrations/databases/mysql.py +409 -0
  108. dcs_core/integrations/databases/opensearch.py +64 -0
  109. dcs_core/integrations/databases/oracle.py +719 -0
  110. dcs_core/integrations/databases/postgres.py +482 -0
  111. dcs_core/integrations/databases/redshift.py +53 -0
  112. dcs_core/integrations/databases/snowflake.py +48 -0
  113. dcs_core/integrations/databases/spark_df.py +111 -0
  114. dcs_core/integrations/databases/sybase.py +1069 -0
  115. dcs_core/integrations/storage/__init__.py +13 -0
  116. dcs_core/integrations/storage/local_file.py +149 -0
  117. dcs_core/integrations/utils/__init__.py +13 -0
  118. dcs_core/integrations/utils/utils.py +36 -0
  119. dcs_core/report/__init__.py +13 -0
  120. dcs_core/report/dashboard.py +211 -0
  121. dcs_core/report/models.py +88 -0
  122. dcs_core/report/static/assets/fonts/DMSans-Bold.ttf +0 -0
  123. dcs_core/report/static/assets/fonts/DMSans-Medium.ttf +0 -0
  124. dcs_core/report/static/assets/fonts/DMSans-Regular.ttf +0 -0
  125. dcs_core/report/static/assets/fonts/DMSans-SemiBold.ttf +0 -0
  126. dcs_core/report/static/assets/images/docs.svg +6 -0
  127. dcs_core/report/static/assets/images/github.svg +4 -0
  128. dcs_core/report/static/assets/images/logo.svg +7 -0
  129. dcs_core/report/static/assets/images/slack.svg +13 -0
  130. dcs_core/report/static/index.js +2 -0
  131. dcs_core/report/static/index.js.LICENSE.txt +3971 -0
  132. dcs_sdk/__init__.py +13 -0
  133. dcs_sdk/__main__.py +18 -0
  134. dcs_sdk/__version__.py +15 -0
  135. dcs_sdk/cli/__init__.py +13 -0
  136. dcs_sdk/cli/cli.py +163 -0
  137. dcs_sdk/sdk/__init__.py +58 -0
  138. dcs_sdk/sdk/config/__init__.py +13 -0
  139. dcs_sdk/sdk/config/config_loader.py +491 -0
  140. dcs_sdk/sdk/data_diff/__init__.py +13 -0
  141. dcs_sdk/sdk/data_diff/data_differ.py +821 -0
  142. dcs_sdk/sdk/rules/__init__.py +15 -0
  143. dcs_sdk/sdk/rules/rules_mappping.py +31 -0
  144. dcs_sdk/sdk/rules/rules_repository.py +214 -0
  145. dcs_sdk/sdk/rules/schema_rules.py +65 -0
  146. dcs_sdk/sdk/utils/__init__.py +13 -0
  147. dcs_sdk/sdk/utils/serializer.py +25 -0
  148. dcs_sdk/sdk/utils/similarity_score/__init__.py +13 -0
  149. dcs_sdk/sdk/utils/similarity_score/base_provider.py +153 -0
  150. dcs_sdk/sdk/utils/similarity_score/cosine_similarity_provider.py +39 -0
  151. dcs_sdk/sdk/utils/similarity_score/jaccard_provider.py +24 -0
  152. dcs_sdk/sdk/utils/similarity_score/levenshtein_distance_provider.py +31 -0
  153. dcs_sdk/sdk/utils/table.py +475 -0
  154. dcs_sdk/sdk/utils/themes.py +40 -0
  155. dcs_sdk/sdk/utils/utils.py +349 -0
  156. dcs_sdk-1.6.5.dist-info/METADATA +150 -0
  157. dcs_sdk-1.6.5.dist-info/RECORD +159 -0
  158. dcs_sdk-1.6.5.dist-info/WHEEL +4 -0
  159. dcs_sdk-1.6.5.dist-info/entry_points.txt +4 -0
dcs_sdk/__init__.py ADDED
@@ -0,0 +1,13 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
dcs_sdk/__main__.py ADDED
@@ -0,0 +1,18 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from dcs_sdk.cli.cli import main
16
+
17
+ if __name__ == "__main__":
18
+ main()
dcs_sdk/__version__.py ADDED
@@ -0,0 +1,15 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ __version__ = "1.6.4"
@@ -0,0 +1,13 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
dcs_sdk/cli/cli.py ADDED
@@ -0,0 +1,163 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import json
16
+ from typing import List, Union
17
+
18
+ import click
19
+
20
+ from dcs_core.cli.cli import inspect as inspect_command
21
+ from dcs_sdk.__version__ import __version__
22
+ from dcs_sdk.sdk.config.config_loader import Comparison, data_diff_config_loader
23
+ from dcs_sdk.sdk.data_diff.data_differ import diff_db_tables
24
+ from dcs_sdk.sdk.utils.utils import post_comparison_results
25
+
26
+
27
+ @click.version_option(version=__version__, package_name="dcs-sdk", prog_name="DCS SDK")
28
+ @click.group(help=f"DCS SDK version {__version__}")
29
+ def main():
30
+ pass
31
+
32
+
33
+ @main.command(
34
+ short_help="Starts DCS SDK",
35
+ )
36
+ @click.option(
37
+ "-C",
38
+ "--config-path",
39
+ required=True,
40
+ default=None,
41
+ help="Specify the file path for configuration",
42
+ )
43
+ @click.option(
44
+ "--save-json",
45
+ "-j",
46
+ is_flag=True,
47
+ help="Save data into JSON file",
48
+ )
49
+ @click.option(
50
+ "--json-path",
51
+ "-jp",
52
+ required=False,
53
+ default="dcs_report.json",
54
+ help="Specify the file path for JSON file",
55
+ )
56
+ @click.option(
57
+ "--compare",
58
+ required=True,
59
+ help="Run only specific comparison using comparison name",
60
+ )
61
+ @click.option(
62
+ "--stats",
63
+ is_flag=True,
64
+ help="Print stats about data diff",
65
+ )
66
+ @click.option(
67
+ "--url",
68
+ required=False,
69
+ help="Specify the server URL to send data",
70
+ )
71
+ @click.option(
72
+ "--html-report",
73
+ is_flag=True,
74
+ help="Save table as HTML",
75
+ )
76
+ @click.option(
77
+ "--report-path",
78
+ required=False,
79
+ default="dcs_report.html",
80
+ help="Specify the file path for HTML report",
81
+ )
82
+ @click.option(
83
+ "--table",
84
+ "display_table",
85
+ is_flag=True,
86
+ help="Display Comparison in table format",
87
+ )
88
+ def run(
89
+ config_path: Union[str, None],
90
+ save_json: bool = False,
91
+ json_path: str = "dcs_report.json",
92
+ compare: str = None,
93
+ stats: bool = False,
94
+ url: str = None,
95
+ html_report: bool = False,
96
+ report_path: str = "dcs_report.html",
97
+ display_table: bool = False,
98
+ ):
99
+ data_diff_cli(
100
+ config_path=config_path,
101
+ save_json=save_json,
102
+ json_path=json_path,
103
+ compare=compare,
104
+ url=url,
105
+ is_cli=True,
106
+ show_stats=stats,
107
+ html_report=html_report,
108
+ report_path=report_path,
109
+ display_table=display_table,
110
+ )
111
+
112
+
113
+ def data_diff_cli(
114
+ config_path,
115
+ save_json: bool,
116
+ json_path: str,
117
+ report_path: str,
118
+ compare: str,
119
+ url: str,
120
+ is_cli: bool = True,
121
+ show_stats: bool = False,
122
+ html_report: bool = False,
123
+ display_table: bool = False,
124
+ ):
125
+ comparisons: List[Comparison] = data_diff_config_loader(config_path)
126
+ comp_name_found = False
127
+ result = None
128
+ try:
129
+ for comparison in comparisons:
130
+ if comparison.comparison_name == compare:
131
+ result = diff_db_tables(
132
+ config=comparison,
133
+ is_cli=is_cli,
134
+ show_stats=show_stats,
135
+ save_html=html_report,
136
+ html_path=report_path,
137
+ display_table=display_table,
138
+ )
139
+ total_seconds = result.get("meta", {}).get("seconds", 0)
140
+ print(f"Time took: {total_seconds:.2f} {'seconds' if total_seconds > 1 else 'second'}")
141
+ comp_name_found = True
142
+
143
+ if not comp_name_found:
144
+ raise ValueError(f"Comparison name {compare} not found in the config file")
145
+ if result and url:
146
+ post_comparison_results(
147
+ comparison_data=result,
148
+ url=url,
149
+ is_cli=is_cli,
150
+ )
151
+ if save_json:
152
+ if result:
153
+ with open(json_path, "w") as f:
154
+ f.write(json.dumps(result))
155
+
156
+ except Exception as e:
157
+ print(f"Error: {e}")
158
+
159
+
160
+ main.add_command(inspect_command, name="inspect")
161
+
162
+ if __name__ == "__main__":
163
+ main()
@@ -0,0 +1,58 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Any, Dict, List, Optional
16
+
17
+ from dcs_sdk.sdk.config.config_loader import Comparison, data_diff_config_loader
18
+ from dcs_sdk.sdk.data_diff.data_differ import diff_db_tables
19
+
20
+
21
+ class DcsSdk:
22
+ def __init__(
23
+ self,
24
+ comparison_name: str,
25
+ config_path: Optional[str] = None,
26
+ config_yaml: Optional[str] = None,
27
+ config_json: Optional[Dict] = None,
28
+ api_key: str = "ABC",
29
+ ):
30
+ self.default_api_key = "ABC"
31
+ self.comparison_name = comparison_name
32
+ self.config_path = config_path
33
+ self.config_yaml = config_yaml
34
+ self.config_json = config_json
35
+ self.api_key = api_key
36
+ self.__validate_api_key()
37
+
38
+ def __validate_api_key(self):
39
+ if not self.api_key or self.api_key != self.default_api_key:
40
+ raise ValueError("Invalid API key provided.")
41
+
42
+ def run(self):
43
+ data_diff = self.__run_data_diff()
44
+ return data_diff
45
+
46
+ def __run_data_diff(self) -> Any:
47
+ """
48
+ Run Data Diff
49
+ """
50
+ comparisons: List[Comparison] = data_diff_config_loader(
51
+ config_path=self.config_path, config_yaml=self.config_yaml, config_json=self.config_json
52
+ )
53
+ for comparison in comparisons:
54
+ if comparison.comparison_name == self.comparison_name:
55
+ result = diff_db_tables(comparison)
56
+ return result
57
+
58
+ raise ValueError(f"Comparison name {self.comparison_name} not found in the config file")
@@ -0,0 +1,13 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.