dcs-sdk 1.6.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. data_diff/__init__.py +221 -0
  2. data_diff/__main__.py +517 -0
  3. data_diff/abcs/__init__.py +13 -0
  4. data_diff/abcs/compiler.py +27 -0
  5. data_diff/abcs/database_types.py +402 -0
  6. data_diff/config.py +141 -0
  7. data_diff/databases/__init__.py +38 -0
  8. data_diff/databases/_connect.py +323 -0
  9. data_diff/databases/base.py +1417 -0
  10. data_diff/databases/bigquery.py +376 -0
  11. data_diff/databases/clickhouse.py +217 -0
  12. data_diff/databases/databricks.py +262 -0
  13. data_diff/databases/duckdb.py +207 -0
  14. data_diff/databases/mssql.py +343 -0
  15. data_diff/databases/mysql.py +189 -0
  16. data_diff/databases/oracle.py +238 -0
  17. data_diff/databases/postgresql.py +293 -0
  18. data_diff/databases/presto.py +222 -0
  19. data_diff/databases/redis.py +93 -0
  20. data_diff/databases/redshift.py +233 -0
  21. data_diff/databases/snowflake.py +222 -0
  22. data_diff/databases/sybase.py +720 -0
  23. data_diff/databases/trino.py +73 -0
  24. data_diff/databases/vertica.py +174 -0
  25. data_diff/diff_tables.py +489 -0
  26. data_diff/errors.py +17 -0
  27. data_diff/format.py +369 -0
  28. data_diff/hashdiff_tables.py +1026 -0
  29. data_diff/info_tree.py +76 -0
  30. data_diff/joindiff_tables.py +434 -0
  31. data_diff/lexicographic_space.py +253 -0
  32. data_diff/parse_time.py +88 -0
  33. data_diff/py.typed +0 -0
  34. data_diff/queries/__init__.py +13 -0
  35. data_diff/queries/api.py +213 -0
  36. data_diff/queries/ast_classes.py +811 -0
  37. data_diff/queries/base.py +38 -0
  38. data_diff/queries/extras.py +43 -0
  39. data_diff/query_utils.py +70 -0
  40. data_diff/schema.py +67 -0
  41. data_diff/table_segment.py +583 -0
  42. data_diff/thread_utils.py +112 -0
  43. data_diff/utils.py +1022 -0
  44. data_diff/version.py +15 -0
  45. dcs_core/__init__.py +13 -0
  46. dcs_core/__main__.py +17 -0
  47. dcs_core/__version__.py +15 -0
  48. dcs_core/cli/__init__.py +13 -0
  49. dcs_core/cli/cli.py +165 -0
  50. dcs_core/core/__init__.py +19 -0
  51. dcs_core/core/common/__init__.py +13 -0
  52. dcs_core/core/common/errors.py +50 -0
  53. dcs_core/core/common/models/__init__.py +13 -0
  54. dcs_core/core/common/models/configuration.py +284 -0
  55. dcs_core/core/common/models/dashboard.py +24 -0
  56. dcs_core/core/common/models/data_source_resource.py +75 -0
  57. dcs_core/core/common/models/metric.py +160 -0
  58. dcs_core/core/common/models/profile.py +75 -0
  59. dcs_core/core/common/models/validation.py +216 -0
  60. dcs_core/core/common/models/widget.py +44 -0
  61. dcs_core/core/configuration/__init__.py +13 -0
  62. dcs_core/core/configuration/config_loader.py +139 -0
  63. dcs_core/core/configuration/configuration_parser.py +262 -0
  64. dcs_core/core/configuration/configuration_parser_arc.py +328 -0
  65. dcs_core/core/datasource/__init__.py +13 -0
  66. dcs_core/core/datasource/base.py +62 -0
  67. dcs_core/core/datasource/manager.py +112 -0
  68. dcs_core/core/datasource/search_datasource.py +421 -0
  69. dcs_core/core/datasource/sql_datasource.py +1094 -0
  70. dcs_core/core/inspect.py +163 -0
  71. dcs_core/core/logger/__init__.py +13 -0
  72. dcs_core/core/logger/base.py +32 -0
  73. dcs_core/core/logger/default_logger.py +94 -0
  74. dcs_core/core/metric/__init__.py +13 -0
  75. dcs_core/core/metric/base.py +220 -0
  76. dcs_core/core/metric/combined_metric.py +98 -0
  77. dcs_core/core/metric/custom_metric.py +34 -0
  78. dcs_core/core/metric/manager.py +137 -0
  79. dcs_core/core/metric/numeric_metric.py +403 -0
  80. dcs_core/core/metric/reliability_metric.py +90 -0
  81. dcs_core/core/profiling/__init__.py +13 -0
  82. dcs_core/core/profiling/datasource_profiling.py +136 -0
  83. dcs_core/core/profiling/numeric_field_profiling.py +72 -0
  84. dcs_core/core/profiling/text_field_profiling.py +67 -0
  85. dcs_core/core/repository/__init__.py +13 -0
  86. dcs_core/core/repository/metric_repository.py +77 -0
  87. dcs_core/core/utils/__init__.py +13 -0
  88. dcs_core/core/utils/log.py +29 -0
  89. dcs_core/core/utils/tracking.py +105 -0
  90. dcs_core/core/utils/utils.py +44 -0
  91. dcs_core/core/validation/__init__.py +13 -0
  92. dcs_core/core/validation/base.py +230 -0
  93. dcs_core/core/validation/completeness_validation.py +153 -0
  94. dcs_core/core/validation/custom_query_validation.py +24 -0
  95. dcs_core/core/validation/manager.py +282 -0
  96. dcs_core/core/validation/numeric_validation.py +276 -0
  97. dcs_core/core/validation/reliability_validation.py +91 -0
  98. dcs_core/core/validation/uniqueness_validation.py +61 -0
  99. dcs_core/core/validation/validity_validation.py +738 -0
  100. dcs_core/integrations/__init__.py +13 -0
  101. dcs_core/integrations/databases/__init__.py +13 -0
  102. dcs_core/integrations/databases/bigquery.py +187 -0
  103. dcs_core/integrations/databases/databricks.py +51 -0
  104. dcs_core/integrations/databases/db2.py +652 -0
  105. dcs_core/integrations/databases/elasticsearch.py +61 -0
  106. dcs_core/integrations/databases/mssql.py +829 -0
  107. dcs_core/integrations/databases/mysql.py +409 -0
  108. dcs_core/integrations/databases/opensearch.py +64 -0
  109. dcs_core/integrations/databases/oracle.py +719 -0
  110. dcs_core/integrations/databases/postgres.py +482 -0
  111. dcs_core/integrations/databases/redshift.py +53 -0
  112. dcs_core/integrations/databases/snowflake.py +48 -0
  113. dcs_core/integrations/databases/spark_df.py +111 -0
  114. dcs_core/integrations/databases/sybase.py +1069 -0
  115. dcs_core/integrations/storage/__init__.py +13 -0
  116. dcs_core/integrations/storage/local_file.py +149 -0
  117. dcs_core/integrations/utils/__init__.py +13 -0
  118. dcs_core/integrations/utils/utils.py +36 -0
  119. dcs_core/report/__init__.py +13 -0
  120. dcs_core/report/dashboard.py +211 -0
  121. dcs_core/report/models.py +88 -0
  122. dcs_core/report/static/assets/fonts/DMSans-Bold.ttf +0 -0
  123. dcs_core/report/static/assets/fonts/DMSans-Medium.ttf +0 -0
  124. dcs_core/report/static/assets/fonts/DMSans-Regular.ttf +0 -0
  125. dcs_core/report/static/assets/fonts/DMSans-SemiBold.ttf +0 -0
  126. dcs_core/report/static/assets/images/docs.svg +6 -0
  127. dcs_core/report/static/assets/images/github.svg +4 -0
  128. dcs_core/report/static/assets/images/logo.svg +7 -0
  129. dcs_core/report/static/assets/images/slack.svg +13 -0
  130. dcs_core/report/static/index.js +2 -0
  131. dcs_core/report/static/index.js.LICENSE.txt +3971 -0
  132. dcs_sdk/__init__.py +13 -0
  133. dcs_sdk/__main__.py +18 -0
  134. dcs_sdk/__version__.py +15 -0
  135. dcs_sdk/cli/__init__.py +13 -0
  136. dcs_sdk/cli/cli.py +163 -0
  137. dcs_sdk/sdk/__init__.py +58 -0
  138. dcs_sdk/sdk/config/__init__.py +13 -0
  139. dcs_sdk/sdk/config/config_loader.py +491 -0
  140. dcs_sdk/sdk/data_diff/__init__.py +13 -0
  141. dcs_sdk/sdk/data_diff/data_differ.py +821 -0
  142. dcs_sdk/sdk/rules/__init__.py +15 -0
  143. dcs_sdk/sdk/rules/rules_mappping.py +31 -0
  144. dcs_sdk/sdk/rules/rules_repository.py +214 -0
  145. dcs_sdk/sdk/rules/schema_rules.py +65 -0
  146. dcs_sdk/sdk/utils/__init__.py +13 -0
  147. dcs_sdk/sdk/utils/serializer.py +25 -0
  148. dcs_sdk/sdk/utils/similarity_score/__init__.py +13 -0
  149. dcs_sdk/sdk/utils/similarity_score/base_provider.py +153 -0
  150. dcs_sdk/sdk/utils/similarity_score/cosine_similarity_provider.py +39 -0
  151. dcs_sdk/sdk/utils/similarity_score/jaccard_provider.py +24 -0
  152. dcs_sdk/sdk/utils/similarity_score/levenshtein_distance_provider.py +31 -0
  153. dcs_sdk/sdk/utils/table.py +475 -0
  154. dcs_sdk/sdk/utils/themes.py +40 -0
  155. dcs_sdk/sdk/utils/utils.py +349 -0
  156. dcs_sdk-1.6.5.dist-info/METADATA +150 -0
  157. dcs_sdk-1.6.5.dist-info/RECORD +159 -0
  158. dcs_sdk-1.6.5.dist-info/WHEEL +4 -0
  159. dcs_sdk-1.6.5.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,136 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ from datetime import datetime, timezone
15
+ from typing import Dict, List, Union
16
+
17
+ from dcs_core.core.common.models.metric import (
18
+ IndexMetrics,
19
+ MetricsType,
20
+ MetricValue,
21
+ TableMetrics,
22
+ )
23
+ from dcs_core.core.datasource.base import DataSource
24
+ from dcs_core.core.datasource.sql_datasource import SQLDataSource
25
+ from dcs_core.core.metric.base import MetricIdentity
26
+ from dcs_core.core.profiling.numeric_field_profiling import NumericSQLFieldProfiler
27
+ from dcs_core.core.profiling.text_field_profiling import TextSQLFieldProfiler
28
+
29
+
30
+ class DataSourceProfiling:
31
+ """
32
+ This class is responsible for generating field profiles for a given data source.
33
+
34
+ """
35
+
36
+ def __init__(self, data_source: DataSource):
37
+ """
38
+ :param data_source: The data source for which field profiles are to be generated.
39
+ """
40
+ self._datasource = data_source
41
+ if isinstance(data_source, SQLDataSource):
42
+ self._tables: List[str] = data_source.query_get_table_metadata()
43
+ self._field_meta_data: Dict[str, Dict[str, str]] = {}
44
+ for table in self._tables:
45
+ self._field_meta_data[table] = data_source.query_get_column_metadata(table_name=table)
46
+
47
+ def _generate_sql_data_source_profiles(self) -> List[TableMetrics]:
48
+ """
49
+ This method generates field profiles for a SQL data source.
50
+ """
51
+ list_of_metric = []
52
+ for table, fields in self._field_meta_data.items():
53
+ table_metrics: List[MetricValue] = []
54
+
55
+ for field, data_type in fields.items():
56
+ # profile for numeric fields if the data type is numeric
57
+ if data_type in DataSource.NUMERIC_PYTHON_TYPES_FOR_PROFILING:
58
+ metrics = self._generate_numeric_field_profile(table=table, field=field, data_type=data_type)
59
+ table_metrics.extend(metrics)
60
+
61
+ # profile for numeric fields if the data type is text
62
+ elif data_type in DataSource.TEXT_PYTHON_TYPES_FOR_PROFILING:
63
+ metrics = self._generate_text_field_profile(table=table, field=field, data_type=data_type)
64
+ table_metrics.extend(metrics)
65
+
66
+ # add row count metrics
67
+ table_metrics.append(self._generate_sql_table_row_count(table=table))
68
+ # create a table metric list for a table
69
+ list_of_metric.append(
70
+ TableMetrics(
71
+ table_name=table,
72
+ metrics={metric.identity: metric for metric in table_metrics},
73
+ data_source=self._datasource.data_source_name,
74
+ )
75
+ )
76
+
77
+ return list_of_metric
78
+
79
+ def _generate_sql_table_row_count(self, table: str) -> MetricValue:
80
+ if isinstance(self._datasource, SQLDataSource):
81
+ table_row_count = self._datasource.query_get_row_count(table=table)
82
+ return MetricValue(
83
+ identity=MetricIdentity.generate_identity(
84
+ metric_name="",
85
+ metric_type=MetricsType.ROW_COUNT,
86
+ data_source=self._datasource,
87
+ table_name=table,
88
+ ),
89
+ value=table_row_count,
90
+ data_source=self._datasource.data_source_name,
91
+ metric_type=MetricsType.ROW_COUNT,
92
+ table_name=table,
93
+ timestamp=datetime.now(timezone.utc),
94
+ )
95
+
96
+ def _generate_numeric_field_profile(self, table: str, field: str, data_type: str) -> List[MetricValue]:
97
+ """
98
+ This method generates a numeric field profile for a given field.
99
+ """
100
+ profiles = []
101
+ if isinstance(self._datasource, SQLDataSource):
102
+ profiler = NumericSQLFieldProfiler(
103
+ data_source=self._datasource,
104
+ table_name=table,
105
+ field_name=field,
106
+ data_type=data_type,
107
+ )
108
+ generate = profiler.generate()
109
+ profiles = generate.get_metric_values
110
+ return profiles
111
+
112
+ def _generate_text_field_profile(self, table: str, field: str, data_type: str) -> List[MetricValue]:
113
+ """
114
+ This method generates a text field profile for a given field.
115
+ """
116
+ profiles = []
117
+
118
+ if isinstance(self._datasource, SQLDataSource):
119
+ profiler = TextSQLFieldProfiler(
120
+ data_source=self._datasource,
121
+ table_name=table,
122
+ field_name=field,
123
+ data_type=data_type,
124
+ )
125
+ generate = profiler.generate()
126
+ profiles = generate.get_metric_values
127
+ return profiles
128
+
129
+ def generate(self) -> List[Union[TableMetrics, IndexMetrics]]:
130
+ """
131
+ This method generates field profiles for a given data source.
132
+ """
133
+ if isinstance(self._datasource, SQLDataSource):
134
+ return self._generate_sql_data_source_profiles()
135
+ else:
136
+ raise NotImplementedError(f"Profiling for {self._datasource.data_source_name} is not implemented.")
@@ -0,0 +1,72 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from datetime import datetime, timezone
16
+ from typing import Dict
17
+
18
+ from dcs_core.core.common.models.metric import MetricsType, MetricValue
19
+ from dcs_core.core.common.models.profile import NumericFieldProfile
20
+ from dcs_core.core.datasource.sql_datasource import SQLDataSource
21
+ from dcs_core.core.metric.base import MetricIdentity
22
+
23
+
24
+ class NumericSQLFieldProfiler:
25
+ """
26
+ NumericSQLFieldProfiler is a class that represents a numeric field profile that is generated by a data source.
27
+ """
28
+
29
+ def __init__(
30
+ self,
31
+ data_source: SQLDataSource,
32
+ table_name: str,
33
+ field_name: str,
34
+ data_type: str,
35
+ ):
36
+ self._data_source = data_source
37
+ self._table_name = table_name
38
+ self._field_name = field_name
39
+ self._data_type = data_type
40
+
41
+ def generate(self) -> NumericFieldProfile:
42
+ """
43
+ Generate a numeric field profile.
44
+ """
45
+ data: Dict = self._data_source.profiling_sql_aggregates_numeric(self._table_name, self._field_name)
46
+ return self._generate_field_profile(data)
47
+
48
+ def _generate_field_profile(self, data: Dict) -> NumericFieldProfile:
49
+ """
50
+ Generate a numeric field profile from the data provided.
51
+ """
52
+ profile = NumericFieldProfile(
53
+ field_name=self._field_name,
54
+ data_type=self._data_type,
55
+ )
56
+ timestamp = datetime.now(timezone.utc)
57
+ for key, value in data.items():
58
+ metric_value = MetricValue(
59
+ value=value,
60
+ identity=MetricIdentity.generate_identity(
61
+ metric_name="",
62
+ metric_type=MetricsType(key),
63
+ data_source=self._data_source,
64
+ table_name=self._table_name,
65
+ field_name=self._field_name,
66
+ ),
67
+ metric_type=MetricsType(key),
68
+ timestamp=timestamp,
69
+ data_source=self._data_source.data_source_name,
70
+ )
71
+ setattr(profile, key, metric_value)
72
+ return profile
@@ -0,0 +1,67 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from datetime import datetime, timezone
16
+ from typing import Dict
17
+
18
+ from dcs_core.core.common.models.metric import MetricsType, MetricValue
19
+ from dcs_core.core.common.models.profile import TextFieldProfile
20
+ from dcs_core.core.datasource.sql_datasource import SQLDataSource
21
+ from dcs_core.core.metric.base import MetricIdentity
22
+
23
+
24
+ class TextSQLFieldProfiler:
25
+ def __init__(
26
+ self,
27
+ data_source: SQLDataSource,
28
+ table_name: str,
29
+ field_name: str,
30
+ data_type: str,
31
+ ):
32
+ self._data_source = data_source
33
+ self._table_name = table_name
34
+ self._field_name = field_name
35
+ self._data_type = data_type
36
+
37
+ def generate(self) -> TextFieldProfile:
38
+ data: Dict = self._data_source.profiling_sql_aggregates_string(self._table_name, self._field_name)
39
+ return self._generate_field_profile(data)
40
+
41
+ def _generate_field_profile(self, data: Dict) -> TextFieldProfile:
42
+ """
43
+ Generate a numeric field profile from the data provided.
44
+ """
45
+ profile = TextFieldProfile(
46
+ field_name=self._field_name,
47
+ data_type=self._data_type,
48
+ )
49
+ timestamp = datetime.now(timezone.utc)
50
+ for key, value in data.items():
51
+ metric_value = MetricValue(
52
+ value=value,
53
+ identity=MetricIdentity.generate_identity(
54
+ metric_name="",
55
+ metric_type=MetricsType(key),
56
+ data_source=self._data_source,
57
+ table_name=self._table_name,
58
+ field_name=self._field_name,
59
+ ),
60
+ metric_type=MetricsType(key),
61
+ timestamp=timestamp,
62
+ data_source=self._data_source.data_source_name,
63
+ table_name=self._table_name,
64
+ field_name=self._field_name,
65
+ )
66
+ setattr(profile, key, metric_value)
67
+ return profile
@@ -0,0 +1,13 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
@@ -0,0 +1,77 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import abc
16
+ from datetime import datetime
17
+ from typing import Dict, List, Tuple
18
+
19
+ from dcs_core.core.common.models.metric import MetricValue
20
+
21
+
22
+ class MetricRepository(metaclass=abc.ABCMeta):
23
+ @abc.abstractmethod
24
+ def save_metric(self, metric_id, metric_value: MetricValue) -> int:
25
+ """
26
+ This method will save the metric_value for the given metric_id. The metric_value will be stored in two time
27
+ granularity - daily and hourly.
28
+ - daily: the time store value will be the start of the day. For example a metric value for 2020-01-01 10:30am
29
+ will be stored as 2020-01-01 00:00:00. This is to ensure that we can query the metric value for a given day.
30
+ If a metric is generated multiple times a day then the latest value will be stored and overwritten.
31
+ - hourly: the time store value will be the start of the hour. For example a metric value for 2020-01-01 10:30am
32
+ will be stored as 2020-01-01 10:00:00. This is to ensure that we can query the metric value for a given hour.
33
+ If a metric is generated multiple times an hour then the latest value will be stored and overwritten.
34
+ """
35
+ pass
36
+
37
+ @abc.abstractmethod
38
+ def save_all_metrics(self, metrics: List[MetricValue]) -> int:
39
+ """
40
+ This method will save all the metrics in the given list. The metric_value will be stored in two time
41
+ granularity - daily and hourly.
42
+ """
43
+ pass
44
+
45
+ @abc.abstractmethod
46
+ def get_metric_by_id(
47
+ self, metric_id, start_date: datetime = None, end_date: datetime = None
48
+ ) -> Tuple[List[MetricValue], List[MetricValue]]:
49
+ """
50
+ This method will return the metrics stored in the repository for the given metric_id. The metrics will be
51
+ returned as a tuple of two lists. The first list will contain the daily metrics and the second list will contain
52
+ the hourly metrics.
53
+
54
+ return value:
55
+ (
56
+ [daily_metric_1, daily_metric_2],
57
+ [hourly_metric_1, hourly_metric_2]
58
+ )
59
+ """
60
+ pass
61
+
62
+ @abc.abstractmethod
63
+ def get_all_metrics(
64
+ self, start_date: datetime = None, end_date: datetime = None
65
+ ) -> Dict[str, Tuple[List[MetricValue], List[MetricValue]]]:
66
+ """
67
+ This method will return all the metrics stored in the repository. The metrics will be returned as a dictionary
68
+ with the key being the metric_id and the value being a tuple of two lists. The first list will contain the
69
+ daily metrics and the second list will contain the hourly metrics.
70
+
71
+ return value:
72
+ {
73
+ "metric_id_1": ([daily_metric_1, daily_metric_2], [hourly_metric_1, hourly_metric_2]),
74
+ "metric_id_2": ([daily_metric_1, daily_metric_2], [hourly_metric_1, hourly_metric_2])
75
+ }
76
+ """
77
+ pass
@@ -0,0 +1,13 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
@@ -0,0 +1,29 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from loguru import Logger
16
+
17
+
18
+ class LogLevel:
19
+ pass
20
+
21
+
22
+ class Log:
23
+ def __init__(self, logger: Logger):
24
+ self.logger: Logger = logger
25
+ self.logs: list[Log] = []
26
+ self.verbose: bool = False
27
+
28
+ def log(self, level, message, location, doc, exception):
29
+ pass
@@ -0,0 +1,105 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """
16
+ Module for anonymous telemetry.
17
+
18
+ No credentials, data, personal information or anything private is collected (and will never be).
19
+ """
20
+ import json
21
+ import os
22
+ import platform
23
+ from time import time
24
+ from typing import Optional
25
+ from uuid import uuid4
26
+
27
+ import requests
28
+ from loguru import logger
29
+
30
+ from dcs_core.core.utils.utils import truncate_error
31
+
32
+ TRACKING_DISABLED = os.environ.get("DISABLE_DCS_ANONYMOUS_TELEMETRY", False)
33
+ TRACK_URL = "https://hosted.rudderlabs.com/v1/track"
34
+ TOKEN = "2U4Bsait5XpEyHnbFtJSjig7KH8"
35
+ TIMEOUT = 8
36
+
37
+ dcs_anonymous_id = None
38
+
39
+
40
+ def is_tracking_enabled():
41
+ return not TRACKING_DISABLED
42
+
43
+
44
+ def get_anonymous_id():
45
+ global dcs_anonymous_id
46
+ if dcs_anonymous_id is None:
47
+ dcs_anonymous_id = str(uuid4())
48
+ return dcs_anonymous_id
49
+
50
+
51
+ def create_error_event(
52
+ exception: Exception,
53
+ ):
54
+ error = truncate_error(repr(exception))
55
+ return {
56
+ "event": "dcs_error",
57
+ "properties": {
58
+ "distinct_id": get_anonymous_id(),
59
+ "token": TOKEN,
60
+ "time": time(),
61
+ "os_type": os.name,
62
+ "os_version": platform.platform(),
63
+ "python_version": f"{platform.python_version()}/{platform.python_implementation()}",
64
+ "error": error,
65
+ },
66
+ }
67
+
68
+
69
+ def create_inspect_event_json(
70
+ runtime_seconds: float,
71
+ inspect_info: Optional[dict] = None,
72
+ error: Optional[str] = None,
73
+ ):
74
+ return {
75
+ "event": "dcs_inspect_end",
76
+ "properties": {
77
+ "distinct_id": get_anonymous_id(),
78
+ "token": TOKEN,
79
+ "time": time(),
80
+ "runtime_seconds": runtime_seconds,
81
+ "os_type": os.name,
82
+ "os_version": platform.platform(),
83
+ "python_version": f"{platform.python_version()}/{platform.python_implementation()}",
84
+ "count_metrics": inspect_info.get("metrics_count", 0) if inspect_info else 0,
85
+ "count_datasource": inspect_info.get("datasource_count", 0) if inspect_info else 0,
86
+ "count_tables": inspect_info.get("table_count", 0) if inspect_info else 0,
87
+ "count_index": inspect_info.get("index_count", 0) if inspect_info else 0,
88
+ "error": error,
89
+ },
90
+ }
91
+
92
+
93
+ def send_event_json(event_json):
94
+ if is_tracking_enabled():
95
+ headers = {
96
+ "Content-Type": "application/json",
97
+ "Authorization": "Basic MlU0QnNhaXQ1WHBFeUhuYkZ0SlNqaWc3S0g4Og==",
98
+ }
99
+ data = json.dumps(event_json).encode()
100
+ try:
101
+ response = requests.post(TRACK_URL, data=data, headers=headers)
102
+ if response.status_code != 200:
103
+ raise RuntimeError(response)
104
+ except Exception as e:
105
+ logger.debug(f"Failed to post to Rudderstack: {e}")
@@ -0,0 +1,44 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import json
15
+ import re
16
+ from datetime import datetime
17
+ from pathlib import Path
18
+
19
+
20
+ def truncate_error(error: str):
21
+ first_line = error.split("\n", 1)[0]
22
+ return re.sub("'(.*?)'", "'***'", first_line)
23
+
24
+
25
+ def ensure_directory_exists(dir_path: str, create_if_not_exists=True):
26
+ dir_path = Path(dir_path)
27
+ if dir_path.exists() and dir_path.is_dir():
28
+ return True
29
+ elif create_if_not_exists:
30
+ dir_path.mkdir(parents=True)
31
+ return True
32
+ return False
33
+
34
+
35
+ def write_to_file(file_path: str, data: str):
36
+ with open(file_path, "w") as file:
37
+ file.write(data)
38
+
39
+
40
+ class EnhancedJSONEncoder(json.JSONEncoder):
41
+ def default(self, o):
42
+ if isinstance(o, datetime):
43
+ return o.isoformat()
44
+ return super().default(o)
@@ -0,0 +1,13 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.