icsDataValidation 1.0.358__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. icsDataValidation/configuration.py +19 -0
  2. icsDataValidation/connection_setups/__init__.py +0 -0
  3. icsDataValidation/connection_setups/azure_connection_setup.py +19 -0
  4. icsDataValidation/connection_setups/databricks_connection_setup.py +28 -0
  5. icsDataValidation/connection_setups/exasol_connection_setup.py +17 -0
  6. icsDataValidation/connection_setups/oracle_connection_setup.py +26 -0
  7. icsDataValidation/connection_setups/snowflake_connection_setup.py +35 -0
  8. icsDataValidation/connection_setups/teradata_connection_setup.py +18 -0
  9. icsDataValidation/core/__init__.py +0 -0
  10. icsDataValidation/core/database_objects.py +18 -0
  11. icsDataValidation/core/object_comparison.py +239 -0
  12. icsDataValidation/input_parameters/__init__.py +0 -0
  13. icsDataValidation/input_parameters/testing_tool_params.py +81 -0
  14. icsDataValidation/main.py +250 -0
  15. icsDataValidation/output_parameters/__init__.py +0 -0
  16. icsDataValidation/output_parameters/result_params.py +94 -0
  17. icsDataValidation/services/__init__.py +0 -0
  18. icsDataValidation/services/comparison_service.py +582 -0
  19. icsDataValidation/services/database_services/__init__.py +0 -0
  20. icsDataValidation/services/database_services/azure_service.py +320 -0
  21. icsDataValidation/services/database_services/databricks_hive_metastore_service.py +1694 -0
  22. icsDataValidation/services/database_services/databricks_unity_catalog_service.py +1379 -0
  23. icsDataValidation/services/database_services/exasol_service.py +261 -0
  24. icsDataValidation/services/database_services/oracle_service.py +713 -0
  25. icsDataValidation/services/database_services/snowflake_service.py +1100 -0
  26. icsDataValidation/services/database_services/teradata_service.py +665 -0
  27. icsDataValidation/services/initialization_service.py +103 -0
  28. icsDataValidation/services/result_service.py +573 -0
  29. icsDataValidation/services/system_service.py +61 -0
  30. icsDataValidation/services/testset_service.py +257 -0
  31. icsDataValidation/utils/__init__.py +0 -0
  32. icsDataValidation/utils/file_util.py +96 -0
  33. icsDataValidation/utils/logger_util.py +96 -0
  34. icsDataValidation/utils/pandas_util.py +159 -0
  35. icsDataValidation/utils/parallelization_util.py +52 -0
  36. icsDataValidation/utils/sql_util.py +14 -0
  37. icsDataValidation-1.0.358.dist-info/METADATA +21 -0
  38. icsDataValidation-1.0.358.dist-info/RECORD +40 -0
  39. icsDataValidation-1.0.358.dist-info/WHEEL +5 -0
  40. icsDataValidation-1.0.358.dist-info/top_level.txt +1 -0
@@ -0,0 +1,61 @@
1
+ from icsDataValidation.connection_setups.snowflake_connection_setup import load_snowflake_credentials
2
+ from icsDataValidation.connection_setups.exasol_connection_setup import load_exasol_credentials
3
+ from icsDataValidation.connection_setups.azure_connection_setup import load_azure_credentials
4
+ from icsDataValidation.connection_setups.teradata_connection_setup import load_teradata_credentials
5
+ from icsDataValidation.connection_setups.oracle_connection_setup import load_oracle_credentials
6
+ from icsDataValidation.connection_setups.databricks_connection_setup import load_databricks_credentials
7
+ from icsDataValidation.services.database_services.snowflake_service import SnowflakeService
8
+ from icsDataValidation.services.database_services.teradata_service import TeradataService
9
+ from icsDataValidation.services.database_services.exasol_service import ExasolService
10
+ from icsDataValidation.services.database_services.azure_service import AzureService
11
+ from icsDataValidation.services.database_services.oracle_service import OracleService
12
+ from icsDataValidation.services.database_services.databricks_hive_metastore_service import DatabricksHiveMetastoreService
13
+ from icsDataValidation.services.database_services.databricks_unity_catalog_service import DatabricksUnityCatalogService
14
+
15
+ #########################################################################################
16
+ #########################################################################################
17
+
18
+ class SystemService:
19
+ """
20
+ Class to initialize database services dependent on the system selection.
21
+ """
22
+
23
+ def __init__(self, system_selection: str, database_config: dict):
24
+ self.database_config = database_config
25
+ self.system_selection = system_selection
26
+ self.system_type = database_config[system_selection]["DATABASE_TYPE"].upper()
27
+
28
+ def get_connection_params(self):
29
+ """
30
+ Get the connection parameters dependent on the system type.
31
+ """
32
+ credentials_function_mapping = {
33
+ "SNOWFLAKE": load_snowflake_credentials,
34
+ "EXASOL": load_exasol_credentials,
35
+ "AZURE": load_azure_credentials,
36
+ "TERADATA": load_teradata_credentials,
37
+ "ORACLE": load_oracle_credentials,
38
+ "DATABRICKS_HIVE_METASTORE": load_databricks_credentials,
39
+ "DATABRICKS_UNITY_CATALOG": load_databricks_credentials,
40
+ }
41
+
42
+ connection_params = credentials_function_mapping[self.system_type](
43
+ self.database_config, self.system_selection
44
+ )
45
+ return connection_params
46
+
47
+ def initialize_database_service(self, connection_params: dict):
48
+ """
49
+ Initialize the database service dependent on the system type.
50
+ """
51
+ database_service_mapping = {
52
+ "SNOWFLAKE": SnowflakeService,
53
+ "EXASOL": ExasolService,
54
+ "AZURE": AzureService,
55
+ "TERADATA": TeradataService,
56
+ "ORACLE": OracleService,
57
+ "DATABRICKS_HIVE_METASTORE": DatabricksHiveMetastoreService,
58
+ "DATABRICKS_UNITY_CATALOG": DatabricksUnityCatalogService,
59
+ }
60
+ database_service = database_service_mapping[self.system_type](connection_params)
61
+ return database_service
@@ -0,0 +1,257 @@
1
+ import logging
2
+
3
+ from icsDataValidation.utils.logger_util import configure_dev_ops_logger
4
+
5
+ #########################################################################################
6
+ # Configure Dev Ops Logger
7
+
8
+ logger = logging.getLogger('TestsetService')
9
+ logger.setLevel(logging.INFO)
10
+ configure_dev_ops_logger(logger)
11
+
12
+ #########################################################################################
13
+ #########################################################################################
14
+
15
+ class TestsetService:
16
+ """
17
+ Class to prepare the set of objects for the comparison.
18
+ Maps schemas and objects between source and target.
19
+ Handles blacklists and whitelists.
20
+ """
21
+
22
+ def __init__(self, testset_mapping:dict, testset_blacklist: dict, testset_whitelist: dict=None):
23
+ self.testset_mapping = testset_mapping
24
+ self.testset_blacklist = testset_blacklist
25
+ self.testset_whitelist = testset_whitelist
26
+
27
+ def handle_database_mapping(self, source_database_name: str = None) -> str:
28
+ """
29
+ Map the source and the target database.
30
+ Note: Case-Insensitive and returns upper-case target database name.
31
+ """
32
+ target_database_name=source_database_name.upper()
33
+
34
+ if self.testset_mapping and "DATABASE_MAPPING" in self.testset_mapping:
35
+ for database_mapping in self.testset_mapping["DATABASE_MAPPING"]:
36
+ if source_database_name.upper() == database_mapping["src_database_name"].upper():
37
+ target_database_name = database_mapping["trgt_database_name"].upper()
38
+
39
+ return target_database_name
40
+
41
+ def handle_schema_mapping(self, source_schema_name: str = None, source_database_name: str = None) -> str:
42
+ """
43
+ Map the source and the target schema.
44
+ Note: Case-Insensitive and returns upper-case target schema name.
45
+ """
46
+ target_schema_name=source_schema_name.upper()
47
+ found_schema_mapping = False
48
+
49
+ if self.testset_mapping and "SCHEMA_MAPPING" in self.testset_mapping:
50
+ for schema_mapping in self.testset_mapping["SCHEMA_MAPPING"]:
51
+
52
+ if f"{source_database_name.upper()}.{source_schema_name.upper()}" == schema_mapping["src_schema_identifier"].upper():
53
+ target_schema_name = schema_mapping["trgt_schema_name"].upper()
54
+ found_schema_mapping = True
55
+
56
+ return target_schema_name, found_schema_mapping
57
+
58
+ def handle_schema_replace_mapping(self, source_schema_name: str = None) -> str:
59
+ """
60
+ Map the source and the target schema by replacing a subset of the target schema string.
61
+ Note: Case-Insensitive and returns upper-case target schema name.
62
+ """
63
+
64
+ if self.testset_mapping and "SCHEMA_REPLACE_MAPPING" in self.testset_mapping:
65
+ replace_mapping = self.testset_mapping["SCHEMA_REPLACE_MAPPING"]
66
+ for replace_object in replace_mapping:
67
+ target_schema_name = source_schema_name.upper().replace(
68
+ replace_object["src_replace_value"].upper(),
69
+ replace_object["trgt_replace_value"].upper(),
70
+ )
71
+ else:
72
+ target_schema_name=source_schema_name.upper()
73
+
74
+ return target_schema_name
75
+
76
+ def handle_blacklist(self, database_objects: dict, src_trgt: str)-> dict:
77
+ """
78
+ Handle the blacklist from the migration_config to restrict database objects.
79
+ Use src_trgt="SRC" for source and src_trgt="TRGT" for target.
80
+ """
81
+ blacklist_objects=[object_blacklisted.upper() for object_blacklisted in self.testset_blacklist[f"BLACKLIST_OBJECTS_{src_trgt}"]]
82
+ blacklist_schemas=[schema_blacklisted.upper() for schema_blacklisted in self.testset_blacklist[f"BLACKLIST_SCHEMAS_{src_trgt}"]]
83
+ blacklist_databases=[database_blacklisted.upper() for database_blacklisted in self.testset_blacklist[f"BLACKLIST_DATABASES_{src_trgt}"]]
84
+
85
+ database_objects_=database_objects.copy()
86
+
87
+ for db_object in database_objects_:
88
+ database_name = db_object["object_identifier"].split(".",1)[0]
89
+ schema_identifier = ".".join(db_object["object_identifier"].split(".",2)[:2])
90
+ if database_name in blacklist_databases:
91
+ database_objects.remove(db_object)
92
+ elif schema_identifier in blacklist_schemas:
93
+ database_objects.remove(db_object)
94
+ elif db_object["object_identifier"] in blacklist_objects:
95
+ database_objects.remove(db_object)
96
+
97
+ return database_objects
98
+
99
+ def handle_whitelist(self, database_objects: dict, src_trgt: str)-> dict:
100
+ """
101
+ Handle the whitelist which is defined as a testset to restrict database objects.
102
+ Use src_trgt="SRC" for source and src_trgt="TRGT" for target.
103
+ """
104
+ whitelist_objects=[object_whitelisted.upper() for object_whitelisted in self.testset_whitelist[f"WHITELIST_OBJECTS_{src_trgt}"]]
105
+ whitelist_schemas=[schema_whitelisted.upper() for schema_whitelisted in self.testset_whitelist[f"WHITELIST_SCHEMAS_{src_trgt}"]]
106
+ whitelist_databases=[database_whitelisted.upper() for database_whitelisted in self.testset_whitelist[f"WHITELIST_DATABASES_{src_trgt}"]]
107
+
108
+ database_objects_=database_objects.copy()
109
+
110
+ for db_object in database_objects_:
111
+ database_name = db_object["object_identifier"].split(".",1)[0]
112
+ schema_identifier = ".".join(db_object["object_identifier"].split(".",2)[:2])
113
+ if not db_object["object_identifier"].upper() in whitelist_objects and schema_identifier.upper() not in whitelist_schemas and database_name.upper() not in whitelist_databases:
114
+ database_objects.remove(db_object)
115
+
116
+ return database_objects
117
+
118
+ def map_objects(self, database_objects_src: list, database_objects_trgt: list):
119
+ """
120
+ Maps objects between source and target by using the mapping defined in the migration_config.json.
121
+ Handles object "1:1"-mapping and object "replace"-mapping.
122
+ Returns remaining_mapping_objects which differ between source and target and can not be mapped.
123
+ Returns a flag all_objects_matching which indicates if there exist remaining_mapping_objects.
124
+ """
125
+ intersection_objects_mapped_trgt_src = []
126
+ remaining_mapping_objects = []
127
+ src_objects_minus_trgt_objects = [object for object in database_objects_src if object not in database_objects_trgt]
128
+ trgt_objects_minus_src_objects = [object for object in database_objects_trgt if object not in database_objects_src]
129
+
130
+
131
+ trgt_objects_minus_src_table_identifiers = [object["object_identifier"] for object in database_objects_trgt if object not in database_objects_src and object["object_type"] == 'table']
132
+ trgt_objects_minus_src_view_identifiers = [object["object_identifier"] for object in database_objects_trgt if object not in database_objects_src and object["object_type"] == 'view']
133
+
134
+
135
+ if database_objects_src != database_objects_trgt and self.testset_mapping:
136
+
137
+ src_objects_minus_trgt_objects_ = src_objects_minus_trgt_objects.copy()
138
+
139
+ trgt_objects_minus_src_object_identifiers=[object["object_identifier"] for object in trgt_objects_minus_src_objects]
140
+
141
+ for n_db_object, db_object in enumerate(src_objects_minus_trgt_objects_):
142
+ logger.info(f"Object {n_db_object+1} of {len(src_objects_minus_trgt_objects_)}: {db_object}")
143
+ continue_flag = True
144
+
145
+ #########################################################################################
146
+ # Object-Mapping
147
+ for mapping in self.testset_mapping["OBJECT_MAPPING"]:
148
+
149
+ if (
150
+ db_object["object_identifier"] == mapping["src_object_identifier"].upper()
151
+ and db_object["object_type"] == mapping["src_object_type"]
152
+ and mapping['trgt_object_identifier'].upper() in trgt_objects_minus_src_object_identifiers
153
+ ):
154
+ logger.info(f" -> mapping object found: {mapping}")
155
+ intersection_objects_mapped_trgt_src.append({"src_object_identifier": db_object["object_identifier"],"src_object_type": db_object["object_type"], "trgt_object_identifier": mapping["trgt_object_identifier"],"trgt_object_type": mapping["trgt_object_type"]})
156
+ src_objects_minus_trgt_objects.remove(db_object)
157
+
158
+ for trgt_object in trgt_objects_minus_src_objects:
159
+ if trgt_object["object_identifier"] == mapping["trgt_object_identifier"].upper():
160
+ trgt_objects_minus_src_objects.remove(trgt_object)
161
+ logger.info(" -> added by 1:1 mapping")
162
+
163
+ # set continue_flag to false because this object has been covered by the mapping
164
+ continue_flag = False
165
+ break
166
+
167
+ ##########################################################################################
168
+ # Database-Mapping, and Schema-Mapping
169
+
170
+ if continue_flag == True:
171
+
172
+ src_database_name = db_object["object_identifier"].split(".",1)[0]
173
+ src_schema_name = db_object["object_identifier"].split(".",2)[1]
174
+ src_object_name = db_object["object_identifier"].split(".",2)[2]
175
+
176
+ trgt_database_name=self.handle_database_mapping(src_database_name)
177
+ trgt_schema_name, _ =self.handle_schema_mapping(src_schema_name,src_database_name)
178
+
179
+ trgt_object_identifier=f"{trgt_database_name}.{trgt_schema_name}.{src_object_name}".upper()
180
+
181
+ if (db_object["object_type"] == 'table' and trgt_object_identifier in trgt_objects_minus_src_table_identifiers) or (db_object["object_type"] == 'view' and trgt_object_identifier in trgt_objects_minus_src_view_identifiers):
182
+ intersection_objects_mapped_trgt_src.append({"src_object_identifier": db_object["object_identifier"],"src_object_type": db_object["object_type"], "trgt_object_identifier": trgt_object_identifier,"trgt_object_type": db_object["object_type"]})
183
+ src_objects_minus_trgt_objects.remove(db_object)
184
+
185
+ for trgt_object in trgt_objects_minus_src_objects:
186
+ if trgt_object["object_identifier"] == trgt_object_identifier:
187
+ trgt_objects_minus_src_objects.remove(trgt_object)
188
+
189
+ logger.info(" -> added by database/schema-mapping")
190
+
191
+ # set continue_flag to false because this object has been covered by the replacements
192
+ continue_flag = False
193
+
194
+ ##########################################################################################
195
+ # Replace-Mapping
196
+
197
+ if continue_flag == True:
198
+
199
+
200
+ src_database_name = db_object["object_identifier"].split(".",1)[0]
201
+ src_schema_name = db_object["object_identifier"].split(".",2)[1]
202
+ src_object_name = db_object["object_identifier"].split(".",2)[2]
203
+
204
+ #TODO rework!!!!
205
+
206
+ ## replace the values from the migration_config.json to create a potential_match which can be looked for the trgt_objects_minus_src_objects list
207
+ #potential_match = db_object["object_identifier"].upper().replace(f'{substitute["src_replace_value"].upper()}',f'{substitute["trgt_replace_value"].upper()}')
208
+ #
209
+ ## the potential_match is contained within the trgt_objects_minus_src_objects list
210
+ #if potential_match in trgt_objects_minus_src_object_identifiers:
211
+ # logger.info(f" -> replace mapping found: {substitute}")
212
+ # intersection_objects_mapped_trgt_src.append({"src_object_identifier": db_object["object_identifier"],"src_object_type": db_object["object_type"], "trgt_object_identifier": potential_match,"trgt_object_type": db_object["object_type"]})
213
+ # src_objects_minus_trgt_objects.remove(db_object)
214
+ #
215
+ # for trgt_object in trgt_objects_minus_src_objects:
216
+ # if trgt_object["object_identifier"] == potential_match:
217
+ # trgt_objects_minus_src_objects.remove(trgt_object)
218
+ # logger.info(" -> added by replace mapping")
219
+ #
220
+ # # set continue_flag to false because this object has been covered by the replacements
221
+ # continue_flag = False
222
+ # break
223
+
224
+ #####################################################################
225
+ # Remaining objects
226
+ if continue_flag == True:
227
+ remaining_mapping_objects.append({"src_object_identifier": db_object["object_identifier"],"trgt_object_identifier": '',"src_object_type": db_object["object_type"],"trgt_object_type": ''})
228
+ logger.info(" -> no mapping found -> added to remaining_mapping_objects")
229
+
230
+ object_identifiers_src_minus_trgt= [object["object_identifier"] for object in src_objects_minus_trgt_objects]
231
+ object_identifiers_trgt_minus_src= [object["object_identifier"] for object in trgt_objects_minus_src_objects]
232
+
233
+ if src_objects_minus_trgt_objects:
234
+ logger.warning('There are database objects in the source db that are not in the target db and for which no mapping exists:')
235
+ logger.warning(f"{object_identifiers_src_minus_trgt}")
236
+ if trgt_objects_minus_src_objects:
237
+ logger.warning('There are database objects in the target db that are not in the source db and for which no mapping exists:')
238
+ logger.warning(f"{object_identifiers_trgt_minus_src}")
239
+
240
+ if not (src_objects_minus_trgt_objects and trgt_objects_minus_src_objects):
241
+ all_objects_matching=True
242
+ else:
243
+ all_objects_matching=False
244
+
245
+ return intersection_objects_mapped_trgt_src, object_identifiers_src_minus_trgt, object_identifiers_trgt_minus_src, remaining_mapping_objects, all_objects_matching
246
+
247
+ @staticmethod
248
+ def get_intersection_objects_trgt_src(database_objects_src: list, database_objects_trgt: list, intersection_objects_mapped_trgt_src:list):
249
+ """
250
+ Get intersection of all database objects from source db and target db - including mapped objects.
251
+ """
252
+
253
+ intersection_objects_trgt_src_without_mapping =[{"src_object_identifier": object["object_identifier"],"src_object_type": object["object_type"],"trgt_object_identifier": object["object_identifier"],"trgt_object_type": object["object_type"]} for object in database_objects_src if object in database_objects_trgt]
254
+
255
+ intersection_objects_trgt_src= intersection_objects_trgt_src_without_mapping + intersection_objects_mapped_trgt_src
256
+
257
+ return intersection_objects_trgt_src
File without changes
@@ -0,0 +1,96 @@
1
+ import errno
2
+ import os
3
+ from typing import Union
4
+ import json
5
+ import decimal
6
+ import numpy as np
7
+ class CustomJSONEncoder(json.JSONEncoder):
8
+ def default(self, o):
9
+ if isinstance(o, decimal.Decimal):
10
+ return str(o)
11
+ if isinstance(o, np.integer):
12
+ return int(o)
13
+ if isinstance(o, np.floating):
14
+ return float(o)
15
+ if isinstance(o, np.ndarray):
16
+ return o.tolist()
17
+ try:
18
+ super(CustomJSONEncoder, self).default(o)
19
+ except:
20
+ return str(o)
21
+
22
+ return super(CustomJSONEncoder, self).default(o)
23
+
24
+ def load_file(file_path: str, encoding: str = "utf-8-sig") -> str:
25
+ """
26
+ Reads and returns the file content of given path.
27
+ Encodings tried in following order:
28
+ utf-8-sig - default
29
+ utf-8
30
+ utf-16-le
31
+ utf-16
32
+ cp1252
33
+ Args:
34
+ file_path: absolute file path to a file
35
+ encoding: specific code page name
36
+ Raises:
37
+ EnvironmentError - if file could not been read with stated encodings
38
+ Returns:
39
+ File content as string representation
40
+ """
41
+ if not os.path.exists(file_path):
42
+ raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), file_path)
43
+ else:
44
+ try:
45
+ with open(file_path, "r", encoding=encoding) as file:
46
+ return file.read()
47
+ except:
48
+ try:
49
+ encoding = "utf-8"
50
+ with open(file_path, "r", encoding=encoding) as file:
51
+ return file.read()
52
+ except:
53
+ try:
54
+ encoding = "utf-16-le"
55
+ with open(file_path, "r", encoding=encoding) as file:
56
+ return file.read()
57
+ except:
58
+ try:
59
+ encoding = "utf-16"
60
+ with open(file_path, "r", encoding=encoding) as file:
61
+ return file.read()
62
+ except:
63
+ try:
64
+ encoding = "cp1252"
65
+ with open(file_path, "r", encoding=encoding) as file:
66
+ return file.read()
67
+ except:
68
+ raise EnvironmentError(
69
+ f"Can not read file {file_path}. Tried utf-8-sig (BOM), utf-8, utf-16, utf-16-le and cp1252."
70
+ )
71
+
72
+ def load_json(file_path: str, encoding: str = "utf-8-sig") -> Union[dict, None]:
73
+ """
74
+ Reads amd returns a given json file. Content must be in valid JSON Schema.
75
+ Valid JSON Schema should not have any trailing commas.
76
+ Encodings tried in following order:
77
+ utf-8-sig - default
78
+ utf-8
79
+ utf-16-le
80
+ utf-16
81
+ cp1252
82
+ Args:
83
+ file_path: absolute file path to a file
84
+ encoding: specific code page name
85
+ Raises:
86
+ EnvironmentError - if file could not been read with stated encodings
87
+ Returns:
88
+ File content as dictionary.
89
+ If the file path does not exists None will be returned.
90
+ """
91
+ return json.loads(load_file(file_path, encoding))
92
+
93
+
94
+ def write_json_to_file(json_object: dict, file_path: str) -> None:
95
+ with open(f"{file_path}", 'w') as f:
96
+ json.dump(json_object, f, indent=4, cls=CustomJSONEncoder)
@@ -0,0 +1,96 @@
1
+ import logging
2
+ import re
3
+
4
+ class DevOpsFormatter(logging.Formatter):
5
+ error_format = "##vso[task.logissue type=error][%(name)s] %(message)s"
6
+ warning_format = "##vso[task.logissue type=warning][%(name)s] %(message)s"
7
+ dbg_fmt = "DBG: %(module)s: %(lineno)d: %(msg)s"
8
+ info_format = "%(message)s"
9
+ section_format = "%(message)s"
10
+
11
+ def __init__(self, fmt="%(levelno)s: %(msg)s", section_info = False):
12
+ super().__init__(fmt=fmt)
13
+ self._section_info = section_info
14
+
15
+ def parse_progress(self, message:str) -> str:
16
+ """
17
+ Parses Method for progress information
18
+ """
19
+ progress_pattern = re.compile(r".*PROGRESS\s\[\s*'(?P<x>\d+)\/(?P<y>\d+)'\s*\].*", re.IGNORECASE | re.MULTILINE)
20
+
21
+ if (progress_pattern.match(message)):
22
+ progress_match = progress_pattern.search(message)
23
+ x = int(progress_match.group('x'))
24
+ y = int(progress_match.group('y'))
25
+ progress_value = round((x / y)*100)
26
+ return f"##vso[task.setprogress value={progress_value};]script progress\n"
27
+
28
+ return ""
29
+
30
+ def parse_group_start(self, message:str) -> str:
31
+ """
32
+ Parses if its a group start and prepends a command string to the message
33
+ """
34
+ start_pattern = re.compile(r".*START\s(?P<gname>.+?\s\[\s*'.*'\s*\]).*", re.IGNORECASE | re.MULTILINE)
35
+
36
+ if (start_pattern.match(message)):
37
+ start_match = start_pattern.search(message)
38
+ return f"##[group]{start_match.group('gname')}\n"
39
+
40
+ return ""
41
+
42
+ def parse_group_end(self, message:str) -> str:
43
+ """
44
+ Parses if its a group end and appends a command string to the message
45
+ """
46
+ end_pattern = re.compile(r".*FINISHED\s.+?\s\[\s*'.*'\s*\].*", re.IGNORECASE | re.MULTILINE)
47
+
48
+ if (end_pattern.match(message)):
49
+ return "\n##[endgroup]"
50
+
51
+ return ""
52
+
53
+ def format(self, record):
54
+
55
+ # Save the original format configured by the user
56
+ # when the logger formatter was instantiated
57
+ format_orig = self._style._fmt
58
+
59
+ if (record.levelno == logging.INFO):
60
+ record_message = f"{record.msg}"
61
+
62
+ return f"{self.parse_progress(record.msg)}{self.parse_group_start(record.msg)}{record_message}{self.parse_group_end(record.msg)}"
63
+
64
+ # Replace the original format with one customized by logging level
65
+ if record.levelno == logging.DEBUG:
66
+ self._fmt = DevOpsFormatter.dbg_fmt
67
+ elif record.levelno == logging.INFO and not self._section_info:
68
+ self._style._fmt = DevOpsFormatter.info_format
69
+
70
+ elif record.levelno == logging.INFO and self._section_info:
71
+ self._style._fmt = DevOpsFormatter.section_format
72
+
73
+ elif record.levelno == logging.ERROR:
74
+ self._style._fmt = DevOpsFormatter.error_format
75
+
76
+ elif record.levelno == logging.WARNING:
77
+ self._style._fmt = DevOpsFormatter.warning_format
78
+
79
+ # Call the original formatter class to do the grunt work
80
+ result = logging.Formatter.format(self, record)
81
+
82
+ # Restore the original format configured by the user
83
+ self._style._fmt = format_orig
84
+
85
+ return result
86
+
87
+ def configure_dev_ops_logger(logger: logging.Logger) -> None:
88
+ """
89
+ Configure logging for azure devops
90
+ """
91
+ snf_logger = logging.getLogger('snowflake.connector.ocsp_snowflake')
92
+ snf_logger.disabled = True
93
+ section_formatter = DevOpsFormatter(section_info=True)
94
+ section_handler = logging.StreamHandler()
95
+ section_handler.setFormatter(section_formatter)
96
+ logger.addHandler(section_handler)
@@ -0,0 +1,159 @@
1
+ import numpy as np
2
+ from decimal import Decimal
3
+
4
+
5
+
6
+ def get_diff_dataframes(df_1, df_2, key_columns_1, key_columns_2):
7
+ """
8
+ Get the the difference between two Pandas Dataframes by sorting over specific key-columns.
9
+ Returns the two dataframes containing only the rows with differences from the input dataframes and the sorted dataframes.
10
+ """
11
+ df_1_sorted = df_1.sort_values(by=key_columns_1).reset_index(drop=True)
12
+ df_2_sorted = df_2.sort_values(by=key_columns_2).reset_index(drop=True)
13
+
14
+ diff_1 = df_1_sorted[~df_1_sorted.apply(tuple,1).isin(df_2_sorted.apply(tuple,1))]
15
+ diff_2 = df_2_sorted[~df_2_sorted.apply(tuple,1).isin(df_1_sorted.apply(tuple,1))]
16
+
17
+ diff_1 = diff_1.reset_index(drop=True)
18
+ diff_2 = diff_2.reset_index(drop=True)
19
+
20
+ return diff_1, diff_2, df_1_sorted, df_2_sorted
21
+
22
+
23
+ def get_diff_dict_from_diff_dataframes(diff_1, diff_2, key_columns_1, key_columns_2, key_column_values_with_mismatches, numeric_scale):
24
+ """
25
+ Get the
26
+ """
27
+ diff_dict = {}
28
+
29
+ #TODO support a list of key_columns_1 (and key_columns_2) and a dictionary of key_column_values_with_mismatches
30
+ key_column_1=key_columns_1[0]
31
+ key_column_2=key_columns_2[0]
32
+ key_column_values_with_mismatches=key_column_values_with_mismatches[key_column_1]
33
+
34
+ for value_with_mismatch in key_column_values_with_mismatches:
35
+
36
+ if value_with_mismatch is None:
37
+ row_1_with_mismatch = diff_1.loc[diff_1[key_column_1].isnull()]
38
+ row_2_with_mismatch = diff_2.loc[diff_2[key_column_2].isnull()]
39
+ value_with_mismatch = 'NULL'
40
+ else:
41
+ row_1_with_mismatch = diff_1.loc[diff_1[key_column_1] == value_with_mismatch]
42
+ row_2_with_mismatch = diff_2.loc[diff_2[key_column_2] == value_with_mismatch]
43
+ value_with_mismatch = str(value_with_mismatch)
44
+
45
+ diff_dict[value_with_mismatch] = {}
46
+
47
+ for column in row_1_with_mismatch:
48
+ if column == 'group_by_column' or column not in row_2_with_mismatch or column in key_columns_1:
49
+ continue
50
+
51
+ if row_1_with_mismatch[column].values.size > 0:
52
+ src_value=row_1_with_mismatch[column].values[0]
53
+ elif column=='COUNT_OF_GROUP_BY_VALUE':
54
+ src_value=0
55
+ else:
56
+ src_value= None
57
+
58
+ if row_2_with_mismatch[column].values.size > 0:
59
+ trgt_value=row_2_with_mismatch[column].values[0]
60
+ elif column=='COUNT_OF_GROUP_BY_VALUE':
61
+ trgt_value=0
62
+ else:
63
+ trgt_value= None
64
+
65
+ try:
66
+ src_value= src_value.item()
67
+ except Exception:
68
+ pass
69
+
70
+ try:
71
+ trgt_value= trgt_value.item()
72
+ except Exception:
73
+ pass
74
+
75
+ if src_value != trgt_value:
76
+ if src_value is None:
77
+ diff_trgt_minus_src = trgt_value
78
+ elif trgt_value is None:
79
+ if isinstance(src_value, str) or isinstance(trgt_value, str):
80
+ diff_trgt_minus_src = f"{-int(src_value.split('_',1)[0])}_{-int(src_value.split('_',1)[1])}"
81
+ else:
82
+ diff_trgt_minus_src = -round(float(src_value), numeric_scale)
83
+ else:
84
+ if isinstance(src_value, str) or isinstance(trgt_value, str):
85
+ diff_trgt_minus_src = f"{int(trgt_value.split('_',1)[0])-int(src_value.split('_',1)[0])}_{int(trgt_value.split('_',1)[1])-int(src_value.split('_',1)[1])}"
86
+ else:
87
+ diff_trgt_minus_src = round(float(trgt_value)-float(src_value), numeric_scale)
88
+
89
+ if diff_trgt_minus_src:
90
+ diff_dict[value_with_mismatch][column] = {
91
+ "SRC_VALUE": src_value,
92
+ "TRGT_VALUE": trgt_value,
93
+ "DIFF_TRGT_MINUS_SRC": diff_trgt_minus_src
94
+ }
95
+ if not diff_dict[value_with_mismatch]:
96
+ diff_dict.pop(value_with_mismatch)
97
+
98
+ if not diff_dict:
99
+ diff_dict = None
100
+
101
+ return diff_dict
102
+
103
+
104
+
105
+ #########################################################################################################
106
+ #TODO write as pytest
107
+ # Test Space
108
+ import pandas as pd
109
+
110
+ # *** TEST 1 ***
111
+ #df1 = pd.DataFrame({'group_by_column': [1, 2,3,6], 'A': [1, 1,9,2], 'B': [1, 3,9,2], 'C': [1, 4,9,2],'D': ['1_1', '1_1','8_3','1_1']})
112
+ #df2 = pd.DataFrame({'group_by_column': [2, 1,3,5], 'A': [2, 1,9,1], 'B': [3, 1,5,1], 'C': [5, 1,9,1],'D': ['1_1', '1_1','5_5','1_1']})
113
+ #key_column_values_with_mismatches={'group_by_column':[None,3,5,6]}
114
+ #numeric_scale=2
115
+
116
+ #########
117
+
118
+ # *** TEST 2 ***
119
+ #df1 = pd.DataFrame({'group_by_column': [1, None,3,6], 'A': [1, 1,9,2], 'B': [1, 3,9,2], 'C': [1, 4,9,2],'D': ['1_1', '1_1','8_3','1_1']})
120
+ #df2 = pd.DataFrame({'group_by_column': [None, 1,3,5], 'A': [2, 1,9,1], 'B': [3, 1,5,1], 'C': [5, 1,9,1],'D': ['1_1', '1_1','5_5','1_1']})
121
+ #key_column_values_with_mismatches={'group_by_column':[None,3,5,6]}
122
+ #numeric_scale=2
123
+
124
+ #########
125
+
126
+ # *** TEST 3 ***
127
+ #df1 = pd.DataFrame({'group_by_column': [1, 2,3,4], 'A': [1, 1,9,2.001], 'B': [1, 3,9,2.0004], 'C': [1, 4,9,2.00000000001],'D': ['1_1', '1_1','8_3','1_1']})
128
+ #df2 = pd.DataFrame({'group_by_column': [1, 2,3,4], 'A': [2, 1,9,Decimal(2.001)], 'B': [3, 3,9,2.0005], 'C': [1, 4,9,2.00000000004],'D': ['1_1', '1_1','8_3','1_1']})
129
+ #key_column_values_with_mismatches={'group_by_column':[1,2,3,4]}
130
+ #numeric_scale=7
131
+ #
132
+ ##########
133
+ #
134
+ #diff_1, diff_2, df_1_sorted, df_2_sorted =get_diff_dataframes(df1, df2, ['group_by_column'], ['group_by_column'])
135
+ #diff_dict = get_diff_dict_from_diff_dataframes(df1, df2, ['group_by_column'], ['group_by_column'], key_column_values_with_mismatches, numeric_scale)
136
+ #import json
137
+ #import decimal
138
+ #import numpy as np
139
+ #
140
+ #class CustomJSONEncoder(json.JSONEncoder):
141
+ # def default(self, o):
142
+ # if isinstance(o, decimal.Decimal):
143
+ # return str(o)
144
+ # if isinstance(o, np.integer):
145
+ # return int(o)
146
+ # if isinstance(o, np.floating):
147
+ # return float(o)
148
+ # if isinstance(o, np.ndarray):
149
+ # return o.tolist()
150
+ # try:
151
+ # super(CustomJSONEncoder, self).default(o)
152
+ # except:
153
+ # return str(o)
154
+ #
155
+ # return super(CustomJSONEncoder, self).default(o)
156
+ #
157
+ #diff_json = json.dumps(diff_dict, indent=4, cls=CustomJSONEncoder)
158
+ #
159
+ #print(diff_json)