PyPI - icsDataValidation - Versions diffs - 1.0.415__py3-none-any.whl → 1.0.421__py3-none-any.whl - Mend

icsDataValidation 1.0.415py3-none-any.whl → 1.0.421py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

icsDataValidation/core/object_comparison.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import logging
 import time
-from typing import Union, List, Dict
+from typing import Union, List, Dict
 from threading import current_thread
 from pathlib import PurePath
@@ -51,7 +51,7 @@ def get_additional_configuration(src_object: DatabaseObject, testing_tool_params
                 exclude_columns = additional_configuration[f"{src_object.database}.{src_object.schema}.{src_object.name}"]["EXCLUDE_COLUMNS"]
                 exclude_columns = [excluded_column.upper() for excluded_column in exclude_columns]
                 logger.info(f"EXCLUDE_COLUMNS: {exclude_columns} ")
     return src_filter, trgt_filter, exclude_columns
 def compare_objects(testing_tool_params: TestingToolParams, objects_to_compare: List[Dict]) -> List[Dict]:
@@ -86,7 +86,7 @@ def compare_objects(testing_tool_params: TestingToolParams, objects_to_compare:
             src_filter, trgt_filter, exclude_columns = get_additional_configuration(src_object, testing_tool_params)
             comparison_service=ComparisonService(src_object, trgt_object, db_service_src, db_service_trgt, src_filter, trgt_filter, exclude_columns, comp_id)
             #####################################################################
             # execute comparison
@@ -99,7 +99,7 @@ def compare_objects(testing_tool_params: TestingToolParams, objects_to_compare:
             comparison_service.pandas_dataframe_comparison()
             #####################################################################
-            # TODO as function - check if the object was changed during comparison
+            # TODO as function - check if the object was changed during comparison
             ### structure of output needs to be adjusted to enable comparison using > in the if statements
             ### florian said the feature is not too important for now, so it's being skipped for now
@@ -108,7 +108,7 @@ def compare_objects(testing_tool_params: TestingToolParams, objects_to_compare:
             # last_altered_trgt = db_service_trgt.get_last_altered_timestamp_from_object(trgt_object)
             # if comparison_service.result_params.last_altered_src>start_time_utc:
-            #     comparison_service.result_params.not_altered_during_comparison_src = False
+            #     comparison_service.result_params.not_altered_during_comparison_src = False
             # if last_altered_trgt>start_time_utc:
             #     comparison_service.result_params.not_altered_during_comparison_trgt = False
@@ -129,7 +129,7 @@ def compare_objects(testing_tool_params: TestingToolParams, objects_to_compare:
                 if column_level_comparison_result["DATATYPE_EQUAL"] is False:
                     comparison_service.result_params.datatypes_equal = False
                 column_level_comparison_results.append(column_level_comparison_result)
             #####################################################################
@@ -151,15 +151,15 @@ def compare_objects(testing_tool_params: TestingToolParams, objects_to_compare:
             # prepare and upload live result of the current object
             live_object_level_comparison_result = ResultService.prepare_object_level_live_result(
-                    object_level_comparison_result,
-                    testing_tool_params,
+                    object_level_comparison_result,
+                    testing_tool_params,
                 )
             # TODO write as function
             if testing_tool_params.upload_result_to_result_database and not (testing_tool_params.upload_result_to_result_database =='upload_result_to_result_database env variable not found' or testing_tool_params.upload_result_to_result_database =='False'):
                 stage_name = f'{testing_tool_params.result_database_name}.{testing_tool_params.stage_schema}."STG_LIVE_{src_object.schema}_{src_object.name}_{testing_tool_params.run_guid}"'
                 result_file_name = f"{src_object.schema}_{src_object.name}.json"
                 result_file_path = testing_tool_params.live_result_folder_path.joinpath(PurePath(result_file_name))
@@ -188,7 +188,7 @@ def compare_objects(testing_tool_params: TestingToolParams, objects_to_compare:
             else:
                 logger.info(f"[{comp_id}] --- Row counts --------> NOT equal")
                 logger.info(f"[{comp_id}]                          Source row count: {comparison_service.result_params.src_row_count}. Target row count: {comparison_service.result_params.trgt_row_count}")
             if len(comparison_service.result_params.src_columns_upper) != len(set(comparison_service.result_params.src_columns_upper)):
                 logger.info(f"[{comp_id}] --- Duplicates in the source column names -> The source system seems to be case sensitive.")
@@ -200,13 +200,15 @@ def compare_objects(testing_tool_params: TestingToolParams, objects_to_compare:
             else:
                 logger.info(f"[{comp_id}] --- Column names ------> NOT equal")
                 logger.info(f"[{comp_id}]                          src_minus_trgt {comparison_service.result_params.src_columns_minus_trgt_columns} and trgt_minus_src {comparison_service.result_params.trgt_columns_minus_src_columns}")
             if comparison_service.result_params.datatypes_equal:
                 logger.info(f"[{comp_id}] --- Data Types --------> EQUAL")
             else:
                 logger.info(f"[{comp_id}] --- Data Types --------> NOT equal")
-            if comparison_service.result_params.aggregations_equal:
+            if not comparison_service.result_params.aggregations_compared:
+                logger.info(f"[{comp_id}] --- Aggregations ------> NOT compared")
+            elif comparison_service.result_params.aggregations_equal:
                 logger.info(f"[{comp_id}] --- Aggregations ------> EQUAL")
             else:
                 logger.info(f"[{comp_id}] --- Aggregations ------> NOT equal")
@@ -231,9 +233,9 @@ def compare_objects(testing_tool_params: TestingToolParams, objects_to_compare:
                 logger.info(f"[{comp_id}]                       -> src_row_count: {comparison_service.result_params.src_row_count} trgt_row_count:{comparison_service.result_params.trgt_row_count} max_row_number {testing_tool_params.max_row_number}")
             elif comparison_service.result_params.pandas_df_is_equal:
                 logger.info(f"[{comp_id}] --- Pandas Dataframes -> EQUAL")
             else:
                 logger.info(f"[{comp_id}] --- Pandas Dataframes -> NOT equal")
             logger.info('****************************************************')
-    return object_level_comparison_results
+    return object_level_comparison_results

icsDataValidation/main.py CHANGED Viewed

@@ -52,7 +52,7 @@ def execute():
     config_file_path, migration_config_file_path = initialization_service.get_config_file_paths()
     #########################################################################################
-    logger.info(f"++++++++++++++++ LOAD config.json")
+    logger.info(f"++++++++++++++++ LOAD setup_config.json")
     for configs_key, configs_value in load_json(config_file_path).items():
         setattr(TestingToolParams, configs_key, configs_value)
@@ -81,38 +81,12 @@ def execute():
         logger.info(f"##vso[task.complete result=SucceededWithIssues ;]DONE")
         TestingToolParams.migration_config=None
-    #########################################################################################
-    logger.info(f"++++++++++++++++ LOAD testset/whitelist")
-    if testset_file_paths:
-        #TODO Error Handling
-        try:
-            TestingToolParams.testset_whitelist={
-                "WHITELIST_OBJECTS_SRC":[],
-                "WHITELIST_SCHEMAS_SRC":[],
-                "WHITELIST_DATABASES_SRC":[],
-                "WHITELIST_OBJECTS_TRGT":[],
-                "WHITELIST_SCHEMAS_TRGT":[],
-                "WHITELIST_DATABASES_TRGT":[]
-            }
-            for testset_file_path in testset_file_paths:
-                testset_=load_json(testset_file_path)
-                for key, value in testset_.items():
-                    TestingToolParams.testset_whitelist[key]= list(set(TestingToolParams.testset_whitelist[key]) | set(value))
-        except error as e:
-            logger.info(f"Not able to load testset from {testset_file_path}.")
-            TestingToolParams.testset_whitelist=None
-    else:
-        TestingToolParams.testset_whitelist=None
     #########################################################################################
     logger.info(f"++++++++++++++++ INITIALIZE TestsetService")
     if TestingToolParams.migration_config:
         try:
-            testset_service=TestsetService(TestingToolParams.migration_config["MAPPING"],TestingToolParams.migration_config["BLACKLIST"],TestingToolParams.testset_whitelist)
+            testset_service=TestsetService(TestingToolParams.migration_config["MAPPING"],TestingToolParams.migration_config["BLACKLIST"],testset_file_paths)
         except KeyError as error:
             raise ValueError(f"TestsetService could not be initialized. Check wether the migration_config contains the 'MAPPING' key and the 'BLACKLIST' key. {error}")
     else:

icsDataValidation/output_parameters/result_params.py CHANGED Viewed

@@ -44,6 +44,7 @@ class ResultParams():
         trgt_columns_aggregate = None
         src_aggregations_error = None
         trgt_aggregations_error = None
+        aggregations_compared = None
         aggregation_differences_trgt_minus_src = None
         # aggregation-comparison (to save)
@@ -57,7 +58,7 @@ class ResultParams():
         # group-by-comparison (to save)
         src_group_by_query = None
         trgt_group_by_query = None
-        src_group_by_error = None
+        src_group_by_error = None
         trgt_group_by_error = None
         object_group_by_columns = None
         group_by_equal = None
@@ -68,13 +69,13 @@ class ResultParams():
         # sample-check (to save)
         src_sample_query = None
         trgt_sample_query = None
-        src_sample_dict = None
+        src_sample_dict = None
         trgt_sample_dict = None
         src_sample_error_dict = None
         trgt_sample_error_dict = None
         samples_compared = None
-        samples_equal = None
-        trgt_key_filters = None
+        samples_equal = None
+        trgt_key_filters = None
         # pandas-dataframe-comparison (for further calculation)
         pandas_df_mismatch = None
@@ -86,9 +87,8 @@ class ResultParams():
         pandas_df_compared = None
         pandas_df_is_equal = None
-        # not part of result class:
-        # global_iflter
+        # not part of result class:
+        # global_iflter
         # exclude_columns
         # trgt_key_filters= None
         # additional_configuration_per_table = None

icsDataValidation/services/comparison_service.py CHANGED Viewed

@@ -4,7 +4,7 @@ import datetime
 import numpy as np
 from pandas._testing import assert_frame_equal
-from decimal import Decimal
+from decimal import Decimal, InvalidOperation, getcontext
 from icsDataValidation.utils.logger_util import configure_dev_ops_logger
 from icsDataValidation.utils.pandas_util import get_diff_dataframes, get_diff_dict_from_diff_dataframes
@@ -44,8 +44,8 @@ class ComparisonService(TestingToolParams):
         object_group_by_column=None
         for object_group_by_column in group_by_column_candidates:
-            src_group_by_column_count_distinct=next(item["COUNT_DISTINCT"] for item in src_column_count_distincts if item["COLUMN_NAME"].upper() == object_group_by_column)
-            trgt_group_by_column_count_distinct=next(item["COUNT_DISTINCT"] for item in trgt_column_count_distincts if item["COLUMN_NAME"].upper() == object_group_by_column)
+            src_group_by_column_count_distinct=next((item["COUNT_DISTINCT"] for item in src_column_count_distincts if item["COLUMN_NAME"].upper() == object_group_by_column), None)
+            trgt_group_by_column_count_distinct=next((item["COUNT_DISTINCT"] for item in trgt_column_count_distincts if item["COLUMN_NAME"].upper() == object_group_by_column), None)
             if (trgt_group_by_column_count_distinct<=1 or src_group_by_column_count_distinct<=1):
                 logger.info(f"[{self.comp_id}] The GROUP_BY_COLUMN {object_group_by_column} does not satisfy the necessary criteria.")
@@ -166,16 +166,31 @@ class ComparisonService(TestingToolParams):
         del trgt_columns_aggregate['TESTATM_ERRORS']
         if self.result_params.src_row_count != 0 and self.result_params.trgt_row_count != 0:
-            aggregation_differences_trgt_minus_src_not_boolean = {
-                                                                    k:  round(Decimal(trgt_columns_aggregate[k][1])
-                                                                        - Decimal(src_columns_aggregate[k][1]), self.numeric_scale)
-                                                                    for k in src_columns_aggregate.keys()
-                                                                        if k in trgt_columns_aggregate
-                                                                        and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
-                                                                        and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
-                                                                        and src_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
-                                                                        and trgt_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
-                                                                }
+            try:
+                aggregation_differences_trgt_minus_src_not_boolean = {
+                                                                        k:  round(Decimal(trgt_columns_aggregate[k][1])
+                                                                            - Decimal(src_columns_aggregate[k][1]), self.numeric_scale)
+                                                                        for k in src_columns_aggregate.keys()
+                                                                            if k in trgt_columns_aggregate
+                                                                            and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
+                                                                            and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
+                                                                            and src_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
+                                                                            and trgt_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
+                                                                    }
+            except InvalidOperation as e:
+                getcontext().prec = 100 # sets the precision of Decimal to a higher value - due to the limitations of the decimal module when handling such large numbers with high precision
+                aggregation_differences_trgt_minus_src_not_boolean = {
+                                                                        k:  round(Decimal(trgt_columns_aggregate[k][1])
+                                                                            - Decimal(src_columns_aggregate[k][1]), self.numeric_scale)
+                                                                        for k in src_columns_aggregate.keys()
+                                                                            if k in trgt_columns_aggregate
+                                                                            and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
+                                                                            and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
+                                                                            and src_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
+                                                                            and trgt_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
+                                                                    }
             aggregation_differences_trgt_minus_src_boolean = {
                                                                 k:  str(
                                                                         int(trgt_columns_aggregate[k][1].split('_',1)[0])
@@ -233,9 +248,11 @@ class ComparisonService(TestingToolParams):
         else:
             aggregation_differences_trgt_minus_src = {}
+        aggregations_compared = True
         aggregations_equal = True
         if src_aggregations_error or trgt_aggregations_error:
             aggregations_equal = None
+            aggregations_compared = False
         else:
             for aggregation_diff in aggregation_differences_trgt_minus_src.values():
                 if aggregation_diff and not aggregation_diff == 0.0:
@@ -252,6 +269,7 @@ class ComparisonService(TestingToolParams):
         self.result_params.aggregation_differences_trgt_minus_src  = aggregation_differences_trgt_minus_src
         self.result_params.src_error_dict = src_error_dict
         self.result_params.trgt_error_dict = trgt_error_dict
+        self.result_params.aggregations_compared = aggregations_compared
         self.result_params.aggregations_equal = aggregations_equal
@@ -572,7 +590,8 @@ class ComparisonService(TestingToolParams):
                     samples_equal = True
                 except:
                     samples_equal = False
+            else:
+                samples_compared = False
         # save results
         self.result_params.src_sample_query = src_sample_query
         self.result_params.trgt_sample_query = trgt_sample_query

icsDataValidation/services/database_services/databricks_hive_metastore_service.py CHANGED Viewed

@@ -371,7 +371,7 @@ class DatabricksHiveMetastoreService(object):
         except Exception as err:
             # raise err
-            dict_count_distincts = [{"COUNT_DISTINCT": 0}]
+            dict_count_distincts = []
             error_list.append(
                 ["ERROR", str(err).split("|||")[0], str(err).split("|||")[1]]
             )

icsDataValidation/services/database_services/databricks_unity_catalog_service.py CHANGED Viewed

@@ -369,7 +369,7 @@ class DatabricksUnityCatalogService(object):
         except Exception as err:
             # raise err
-            dict_count_distincts = [{"COUNT_DISTINCT": 0}]
+            dict_count_distincts = []
             error_list.append(
                 ["ERROR", str(err).split("|||")[0], str(err).split("|||")[1]]
             )

icsDataValidation/services/database_services/oracle_service.py CHANGED Viewed

@@ -307,7 +307,7 @@ class OracleService(object):
         except Exception as err:
             #raise err
-            dict_count_distincts = [{'COUNT_DISTINCT': 0}]
+            dict_count_distincts = []
             error_list.append(["ERROR", str(err).split('|||')[0], str(err).split('|||')[1]])

icsDataValidation/services/database_services/snowflake_service.py CHANGED Viewed

@@ -334,7 +334,7 @@ class SnowflakeService:
         except Exception as err:
             # raise err
-            dict_count_distincts = [{"COUNT_DISTINCT": 0}]
+            dict_count_distincts = []
             error_list.append(["ERROR", str(err).split("|||")[0], str(err).split("|||")[1]])
         return dict_count_distincts, error_list

icsDataValidation/services/database_services/sqlserver_service.py CHANGED Viewed

@@ -387,7 +387,7 @@ class SQLServerService:
         try:
             dict_count_distincts = self.execute_queries(query_get_count_distincts_from_object)
         except Exception as err:
-            dict_count_distincts = [{"COUNT_DISTINCT": 0}]
+            dict_count_distincts = []
             error_list.append(["ERROR", str(err).split("|||")[0], str(err).split("|||")[1]])
         return dict_count_distincts, error_list

icsDataValidation/services/testset_service.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import logging
+from icsDataValidation.utils.file_util import load_json
 from icsDataValidation.utils.logger_util import configure_dev_ops_logger
 #########################################################################################
@@ -19,14 +20,48 @@ class TestsetService:
     Handles blacklists and whitelists.
     """
-    def __init__(self, testset_mapping:dict, testset_blacklist: dict, testset_whitelist: dict=None):
+    def __init__(self, testset_mapping:dict, testset_blacklist: dict, testset_file_paths: list=None):
         self.testset_mapping = testset_mapping
         self.testset_blacklist = testset_blacklist
-        self.testset_whitelist = testset_whitelist
+        if testset_file_paths:
+            self.testset_whitelist = self._load_testset(testset_file_paths)
+        else:
+            self.testset_whitelist = None
+    @staticmethod
+    def _load_testset(testset_file_paths):
+        """
+        Load the testset files from a list of file paths.
+        Configure the whitelist of databases, schemas, and objects.
+        """
+        logger.info(f"++++++++++++++++ LOAD testset/whitelist")
+        try:
+            testset_whitelist={
+                "WHITELIST_OBJECTS_SRC":[],
+                "WHITELIST_SCHEMAS_SRC":[],
+                "WHITELIST_DATABASES_SRC":[],
+                "WHITELIST_OBJECTS_TRGT":[],
+                "WHITELIST_SCHEMAS_TRGT":[],
+                "WHITELIST_DATABASES_TRGT":[]
+            }
+            for testset_file_path in testset_file_paths:
+                testset_=load_json(testset_file_path)
+                for key, value in testset_.items():
+                    testset_whitelist[key]= list(set(testset_whitelist[key]) | set(value))
+        except FileNotFoundError as file_not_found_err:
+            logger.error(f"Not able to load testset from {testset_file_path}!")
+            raise file_not_found_err
+        except Exception as exc:
+            logger.error("Unexpected exception while trying to load testset and/or defining the whitelist:\n", exc_info=exc)
+            raise exc
+        return testset_whitelist
     def handle_database_mapping(self, source_database_name: str = None) -> str:
         """
-        Map the source and the target database.
+        Map the source and the target database.
         Note: Case-Insensitive and returns upper-case target database name.
         """
         target_database_name=source_database_name.upper()
@@ -40,7 +75,7 @@ class TestsetService:
     def handle_schema_mapping(self, source_schema_name: str = None, source_database_name: str = None) -> str:
         """
-        Map the source and the target schema.
+        Map the source and the target schema.
         Note: Case-Insensitive and returns upper-case target schema name.
         """
         target_schema_name=source_schema_name.upper()
@@ -48,7 +83,7 @@ class TestsetService:
         if self.testset_mapping and "SCHEMA_MAPPING" in self.testset_mapping:
             for schema_mapping in self.testset_mapping["SCHEMA_MAPPING"]:
                 if f"{source_database_name.upper()}.{source_schema_name.upper()}" == schema_mapping["src_schema_identifier"].upper():
                     target_schema_name = schema_mapping["trgt_schema_name"].upper()
                     found_schema_mapping = True
@@ -72,7 +107,7 @@ class TestsetService:
             target_schema_name=source_schema_name.upper()
         return target_schema_name
     def handle_blacklist(self, database_objects: dict, src_trgt: str)-> dict:
         """
         Handle the blacklist from the migration_config to restrict database objects.
@@ -95,7 +130,7 @@ class TestsetService:
                 database_objects.remove(db_object)
         return database_objects
     def handle_whitelist(self, database_objects: dict, src_trgt: str)-> dict:
         """
         Handle the whitelist which is defined as a testset to restrict database objects.
@@ -114,7 +149,7 @@ class TestsetService:
                 database_objects.remove(db_object)
         return database_objects
     def map_objects(self, database_objects_src: list, database_objects_trgt: list):
         """
         Maps objects between source and target by using the mapping defined in the migration_config.json.
@@ -130,10 +165,10 @@ class TestsetService:
         trgt_objects_minus_src_table_identifiers = [object["object_identifier"] for object in database_objects_trgt if object not in database_objects_src and object["object_type"] == 'table']
         trgt_objects_minus_src_view_identifiers = [object["object_identifier"] for object in database_objects_trgt if object not in database_objects_src and object["object_type"] == 'view']
         if database_objects_src != database_objects_trgt and self.testset_mapping:
             src_objects_minus_trgt_objects_ = src_objects_minus_trgt_objects.copy()
             trgt_objects_minus_src_object_identifiers=[object["object_identifier"] for object in trgt_objects_minus_src_objects]
@@ -145,10 +180,10 @@ class TestsetService:
                 #########################################################################################
                 # Object-Mapping
                 for mapping in self.testset_mapping["OBJECT_MAPPING"]:
                     if (
-                        db_object["object_identifier"] == mapping["src_object_identifier"].upper()
-                        and db_object["object_type"] == mapping["src_object_type"]
+                        db_object["object_identifier"] == mapping["src_object_identifier"].upper()
+                        and db_object["object_type"] == mapping["src_object_type"]
                         and mapping['trgt_object_identifier'].upper() in trgt_objects_minus_src_object_identifiers
                     ):
                         logger.info(f" -> mapping object found: {mapping}")
@@ -159,8 +194,8 @@ class TestsetService:
                             if trgt_object["object_identifier"] == mapping["trgt_object_identifier"].upper():
                                 trgt_objects_minus_src_objects.remove(trgt_object)
                         logger.info(" -> added by 1:1 mapping")
-                        # set continue_flag to false because this object has been covered by the mapping
+                        # set continue_flag to false because this object has been covered by the mapping
                         continue_flag = False
                         break
@@ -168,7 +203,7 @@ class TestsetService:
                 # Database-Mapping, and Schema-Mapping
                 if continue_flag == True:
                     src_database_name = db_object["object_identifier"].split(".",1)[0]
                     src_schema_name = db_object["object_identifier"].split(".",2)[1]
                     src_object_name = db_object["object_identifier"].split(".",2)[2]
@@ -181,21 +216,21 @@ class TestsetService:
                     if (db_object["object_type"] == 'table' and trgt_object_identifier in trgt_objects_minus_src_table_identifiers) or (db_object["object_type"] == 'view' and trgt_object_identifier in trgt_objects_minus_src_view_identifiers):
                         intersection_objects_mapped_trgt_src.append({"src_object_identifier": db_object["object_identifier"],"src_object_type": db_object["object_type"], "trgt_object_identifier": trgt_object_identifier,"trgt_object_type": db_object["object_type"]})
                         src_objects_minus_trgt_objects.remove(db_object)
                         for trgt_object in trgt_objects_minus_src_objects:
                             if trgt_object["object_identifier"] == trgt_object_identifier:
                                 trgt_objects_minus_src_objects.remove(trgt_object)
                         logger.info(" -> added by database/schema-mapping")
-                        # set continue_flag to false because this object has been covered by the replacements
+                        # set continue_flag to false because this object has been covered by the replacements
                         continue_flag = False
                 ##########################################################################################
                 # Replace-Mapping
                 if continue_flag == True:
                         src_database_name = db_object["object_identifier"].split(".",1)[0]
                         src_schema_name = db_object["object_identifier"].split(".",2)[1]
@@ -217,7 +252,7 @@ class TestsetService:
                         #            trgt_objects_minus_src_objects.remove(trgt_object)
                         #    logger.info(" -> added by replace mapping")
 #
-                        #    # set continue_flag to false because this object has been covered by the replacements
+                        #    # set continue_flag to false because this object has been covered by the replacements
                         #    continue_flag = False
                         #    break
@@ -243,7 +278,7 @@ class TestsetService:
             all_objects_matching=False
         return intersection_objects_mapped_trgt_src, object_identifiers_src_minus_trgt, object_identifiers_trgt_minus_src, remaining_mapping_objects, all_objects_matching
     @staticmethod
     def get_intersection_objects_trgt_src(database_objects_src: list, database_objects_trgt: list, intersection_objects_mapped_trgt_src:list):
         """
@@ -253,5 +288,5 @@ class TestsetService:
         intersection_objects_trgt_src_without_mapping =[{"src_object_identifier": object["object_identifier"],"src_object_type": object["object_type"],"trgt_object_identifier": object["object_identifier"],"trgt_object_type": object["object_type"]} for object in database_objects_src if object in database_objects_trgt]
         intersection_objects_trgt_src= intersection_objects_trgt_src_without_mapping + intersection_objects_mapped_trgt_src
         return intersection_objects_trgt_src

icsdatavalidation-1.0.421.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,20 @@
+Metadata-Version: 2.4
+Name: icsDataValidation
+Version: 1.0.421
+Summary: Add your description here
+Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
+License: MIT
+Requires-Python: >=3.11
+Description-Content-Type: text/markdown
+Requires-Dist: azure-storage-blob==12.13.1
+Requires-Dist: boto3==1.26.154
+Requires-Dist: cloe-util-snowflake-connector==1.0.5
+Requires-Dist: databricks-sdk==0.29.0
+Requires-Dist: databricks-sql-connector==3.0.1
+Requires-Dist: numpy==1.26.3
+Requires-Dist: oracledb==2.5.0
+Requires-Dist: pandas==2.2.2
+Requires-Dist: pyexasol==0.24.0
+Requires-Dist: pyodbc
+Requires-Dist: python-dotenv>=1.0.1
+Requires-Dist: teradatasql==17.20.0.10

{icsdatavalidation-1.0.415.dist-info → icsdatavalidation-1.0.421.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 icsDataValidation/configuration.py,sha256=HOFjmC8_e2nvoItndMtJQQA1MR5aCgZGeF1AwY_FvjE,477
-icsDataValidation/main.py,sha256=HGnQZ_A9Z4tdCloXs4Lap79LKVOkbmm1ethYbI0Qqlg,12582
+icsDataValidation/main.py,sha256=1CtzUa0-LALTH9i5eQ6H6PW0UON3TPHZ5ey9qDsljKQ,11502
 icsDataValidation/connection_setups/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 icsDataValidation/connection_setups/azure_connection_setup.py,sha256=qxPvD-VZhdJqrdj06IVIk2Ud287YlLhE22Q5_oYKetM,790
 icsDataValidation/connection_setups/databricks_connection_setup.py,sha256=dNEBum-8R-TUW2SCEk3CaNtCr_gLFvn456KBlENpgJU,1220
@@ -10,25 +10,25 @@ icsDataValidation/connection_setups/sqlserver_connection_setup.py,sha256=ayRao5B
 icsDataValidation/connection_setups/teradata_connection_setup.py,sha256=fIpuxz-FTqFK2vSMSuokqU9sdJkaJ4UP5piY_zIbj5k,624
 icsDataValidation/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 icsDataValidation/core/database_objects.py,sha256=2oaDaVQajSYI_HJjJy1pmc6FsoK_wMfwgu6ZgEcFvow,523
-icsDataValidation/core/object_comparison.py,sha256=OEz5m1pp_PbIWyM5998iB5obFKYdJEqDo9Z0Hpj7o4A,14988
+icsDataValidation/core/object_comparison.py,sha256=xJvgHdoRaMzFMQishpzEszO7bW31Ll9BUCsyzqwrRVs,15045
 icsDataValidation/input_parameters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 icsDataValidation/input_parameters/testing_tool_params.py,sha256=trVZmxd2hsJRBWgdv0YePdaA9T20QbL3bOCVUOwIH18,6907
 icsDataValidation/output_parameters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-icsDataValidation/output_parameters/result_params.py,sha256=5Mk9L9zWaxUqcKwLZQ539lVUp0b0s-YUmSA3PBgbqfs,2833
+icsDataValidation/output_parameters/result_params.py,sha256=HLS7DUX8NWWw3j5de8qOQ4T4auWbyMuwmuafzaBOjnU,2861
 icsDataValidation/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-icsDataValidation/services/comparison_service.py,sha256=CPTVPxd1VKSJRBy4gsne-hrlWvf8UAbPfWn1XD-rMNQ,43448
+icsDataValidation/services/comparison_service.py,sha256=6ponyBGgXRrVQyX2aYUpHlIbEMhUtWKS8N3KYSWAjEw,45176
 icsDataValidation/services/initialization_service.py,sha256=AHbJrq_LjMPFoeOJC2pi2ZZ1xkL8njSZn38psc3do60,6687
 icsDataValidation/services/result_service.py,sha256=edD6aejIi5P7qDNHKnN46KrN5tfzwqnw5TB35SvFAWU,28396
 icsDataValidation/services/system_service.py,sha256=GErl_Zx_DaajXTTyfJJ5_xqpd2nLnfnRHcPDa_OVF58,3518
-icsDataValidation/services/testset_service.py,sha256=k1wRjI4Ltw9fylek9iW8N6DvnXn13wf6IJ703qQDMEc,15363
+icsDataValidation/services/testset_service.py,sha256=HKD1xgq-SdwlkXu5FDd6pgeCp8Pkbm14GTzwbGIrBtk,16616
 icsDataValidation/services/database_services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 icsDataValidation/services/database_services/azure_service.py,sha256=bAfpekHowj92qAm3C0hVyxwTpg5J3DoF82DdGKXoe6Q,16932
-icsDataValidation/services/database_services/databricks_hive_metastore_service.py,sha256=JfI-6buw7PfK-gORiAaBx8koVdORfMZav1-w7q697NI,88177
-icsDataValidation/services/database_services/databricks_unity_catalog_service.py,sha256=g7uMrPErjc6El9BmikDc3LmxwCCZnsuzxn4_hIJi7u0,70856
+icsDataValidation/services/database_services/databricks_hive_metastore_service.py,sha256=HLdv4YakTJB669iVsaKvs0-FbpiJ3hkk27oca-EpJhQ,88156
+icsDataValidation/services/database_services/databricks_unity_catalog_service.py,sha256=8iV75kvtQsGPdC35m89jO5s0ZQDekPdRVPYGbdCAPVI,70835
 icsDataValidation/services/database_services/exasol_service.py,sha256=LdjU8mM77zTmNmhJPQrgQO-HwAZv0C0seYMDjuWU9BQ,11153
-icsDataValidation/services/database_services/oracle_service.py,sha256=6t0tt0TJvre_1B5FVTFgCNZYgipT8zCtNngXMEnQi98,31826
-icsDataValidation/services/database_services/snowflake_service.py,sha256=ryQ57NLnx8jWp3n2xP2E9sQs8bCshFjw17q6KVer9cc,63437
-icsDataValidation/services/database_services/sqlserver_service.py,sha256=ApPCwuq8tmSq7vPsFhKzKaQnHUaZS5EoS77Nr8c1g-k,38476
+icsDataValidation/services/database_services/oracle_service.py,sha256=Ejxi0HBRF_c0xWY4wEsw8L8Rb5FMRf9cjQbhz8kerIA,31805
+icsDataValidation/services/database_services/snowflake_service.py,sha256=7Jkyr4eNk7HnVc2ju5XhcW-g0R9SnOnusyf7-Hpj6t8,63416
+icsDataValidation/services/database_services/sqlserver_service.py,sha256=tT42aCxIJ8g6Qr5UsI4cQM_YYeXt-QolKSeVm_hSBKY,38455
 icsDataValidation/services/database_services/teradata_service.py,sha256=h1UX-Wrf9qvZ_hXpH-Y63TRZ8csOCVcEjFn6ux7hvyk,40299
 icsDataValidation/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 icsDataValidation/utils/file_util.py,sha256=ZTMB1sTnIIdffg9tEJRCFQQ5SG8Fksc5ie1PM4gHXG4,3432
@@ -36,7 +36,7 @@ icsDataValidation/utils/logger_util.py,sha256=xS48_FFMot_hyQgJY8DUeRTn5jpdvRt5QI
 icsDataValidation/utils/pandas_util.py,sha256=D_g7Xw7BIS2E-1ZhJIvp62K5xuKjIkj-7TxH4HN_8SI,6505
 icsDataValidation/utils/parallelization_util.py,sha256=6P0YcQLmunW_fHR4f5-kdncZbOlxxqKyk6ZAFQQEd2k,2088
 icsDataValidation/utils/sql_util.py,sha256=0c-BInElSsRmXUedfLP_h9Wsiscv9aic7IIc5f15Uzo,396
-icsdatavalidation-1.0.415.dist-info/METADATA,sha256=__Y5L82M3S2KEC1JQphdG8bkfNJxEJGMq1NgLHWLjPc,24619
-icsdatavalidation-1.0.415.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-icsdatavalidation-1.0.415.dist-info/top_level.txt,sha256=BqWUGJb4J7ZybpDMeuGHxEHGHwXXJEIURd9pBybHzTM,18
-icsdatavalidation-1.0.415.dist-info/RECORD,,
+icsdatavalidation-1.0.421.dist-info/METADATA,sha256=irZyTjWSOKvsU_IT8inVhRYnWnj0Xc0__S95shkxyrQ,661
+icsdatavalidation-1.0.421.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+icsdatavalidation-1.0.421.dist-info/top_level.txt,sha256=BqWUGJb4J7ZybpDMeuGHxEHGHwXXJEIURd9pBybHzTM,18
+icsdatavalidation-1.0.421.dist-info/RECORD,,

icsdatavalidation-1.0.415.dist-info/METADATA DELETED Viewed

@@ -1,298 +0,0 @@
-Metadata-Version: 2.4
-Name: icsDataValidation
-Version: 1.0.415
-Summary: Add your description here
-Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
-License: MIT
-Requires-Python: >=3.11
-Description-Content-Type: text/markdown
-Requires-Dist: azure-storage-blob==12.13.1
-Requires-Dist: boto3==1.26.154
-Requires-Dist: cloe-util-snowflake-connector==1.0.5
-Requires-Dist: databricks-sdk==0.29.0
-Requires-Dist: databricks-sql-connector==3.0.1
-Requires-Dist: numpy==1.26.3
-Requires-Dist: oracledb==2.5.0
-Requires-Dist: pandas==2.2.2
-Requires-Dist: pyexasol==0.24.0
-Requires-Dist: pyodbc
-Requires-Dist: python-dotenv>=1.0.1
-Requires-Dist: teradatasql==17.20.0.10
-# icsDV - initions Data Validation Tool
-## Introduction
-The icsDataValidation tool identifies data mismatches between two databases.
-The functionalities are specifically geared to support migration projects.
-It helps to find data issues in tables and views in comparison of a source and a target system.
-### What is "generic" about the tool?
-The icsDataValidation tool (icsDV) is in particular structered in a way that it is easily expandable.
-The main code is used by all different database options.
-Specifics for each supported database are implemented in a database service per database.
-The different database services are very similar.
-They hold the same methods with the same input and output parameters.
-Each method is aligned with the syntax and the settings of the database it is created for.
-Each core implementation includes connections setup, object comparison functionality and the result preparation.
-### Supported Databases
-The icsDV supports comparisons between the following databases:
-- Snowflake
-- Teradata
-- Azure SQL Server
-- Exasol
-- Oracle
-- Databricks with and without Unity Catalog
-Comparison results can be written to either Snowflake or Databricks.
-### Features
-The key features of the tool are:
-- Comparison of tables and views between a source and a target system.
-- Pipeline integration in Azure DevOps or GitLab
-- Multiple verification/comparison steps:
-  - Row count comparison
-  - Column names comparison
-  - Aggregation comparison (depending on data type)
-  - "group by" comparison
-  - Pandas DataFrame comparison (with a threshold for the size of the object)
-  - Pandas DataFrame sample comparison (with a random sample of the object)
-- Detailed representation of the comparison result
-  - "high-level" result (for each pipeline/execution)
-  - "object-level" result (for each table/view)
-  - "column-level" result (for each column)
-- Parallelization for performance enhancement of the comparison of a large number of objects
-- Input testsets (white-listing of objects)
-- Object filter (black-listing of objects)
-- Object mappings between the source and the target system
-- Comparison result saved and displayed in multiple instances
-  - saved as JSON files in the repository
-  - export to result tables in the target system (Snowflake or Databricks)
-  - export to Azure Blob Storage or AWS S3 Bucket
-### Repository Structure
-The repository is structured in the following sections:
-- **icsDataValidation**
-  > This is where all code files are stored.
-- **icsDataValidation/main.py**
-  > Entry point for python.
-- **icsDataValidation/core**
-    > Main code files for the parts independent on the source and target system.
-- **icsDataValidation/services/database_services**
-    > Database services for all supported systems can be found here.
-    Each file contains a class that is identically structured in comparison to the other database service classes.
-    Each database service class contains methods to query metadata, create aggregations, and retrieve data for the comparison step.
-- **icsDataValidation/connection_setups**
-    > The connection setups are database dependent.
-    They define how the credentials for the database connections are retrieved.
-- **examples/comparison_results**
-    > The comparison results are saved here.
-    One JSON file with all results is saved for each execution/pipeline run.
-    Additionally there are live comparison results saved for each compared object as a failsafe.
-- **examples**
-    > This folder contains all files defining a specific validation setup.
-      - A file named `migration_config.json` contains configurations about the source system, the target system and the mapping of objects between both. It contains the blacklists and "group by" aggregation settings.
-      - A file named `ics_data_validation_config.json` specifies the source system, the target system and the results system. Most importantly, this includes the names of the results tables and the connection configurations (Server, Port, Secrets) of source and target system.
-      - A file named `manual_execution_params.py` is only relevant for local execution of the code. It contains settings which would otherwise be defined in the pipeline setup, i.e. limits on the size of objects to compare and the numeric precision.
-      - The folder `testsets` contains JSON files specifying whitelists of objects to compare.
-    For all the files here, empty `*.template.*` files are available and may serve as a starting point.
-    This repo stores only template files.
-    The actual files used for each setup should not be committed here.
-    They are stored in [a separate repository.](https://dev.azure.com/initions-consulting/icsDataValidation/_git/icsDataValidation%20-%20workflow%20demo).
-- **examples/pipeline**
-    > Files defining the pipelines that execute the icsDV are stored here. For example, YML files for Azure DevOps pipelines.
-## icsDV - Execution Manual
-## icsDV - Input Parameters
-There are four types of input parameters:
-1. Pipeline Parameters - which are defined as input parameters of a pipeline (Azure DevOps Pipeline or Gitlab Pipeline).
-2. Manual Execution Parameters - defined in the code (testing_tool.py).
-They correspond to the Pipeline Parameters and are used when executing the code directly without a pipeline instead of the Pipeline Parameters.
-3. Global Parameters - directly defined in the TestingToolParams class. They are used in pipeline runs and for manual executions.
-4. Environmental Parameters - Stored either in Azure DevOps in a variable group, in Gitlab, or, for manual executions, in a `*.env` file in a location that can be specified in the `manual_execution_params.py`.
-Additionally the parameters can be categorized into 3 groups:
-1. Setup Parameters - these are parameters which are usually just set once when setting up the icsDV.
-2. Configuration Parameters - are used to configure the general settings but can be adjusted to the conditions of the workload on the fly.
-3. Execution Parameters - are set individually for each execution of the icsDV, e.g. the selection of objects to be tested.
-### Setup Parameters
-Stored in `ics_data_validation_config.json`:
-| Parameter                             | Description                                                                                                                                                                                                                                                                     | Input Type                                       |
-|---------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------|
-| source_system_selection               | Name of the source system as defined in the database_config.json as a key.                                                                                                                                                                                                      | Pipeline Parameter or Manual Execution Parameter |
-| target_system_selection               | Name of the target system as defined in the database_config.json as a key.                                                                                                                                                                                                      | Pipeline Parameter or Manual Execution Parameter |
-| result_system_selection               | Name of the result system as defined in the database_config.json as a key.                                                                                                                                                                                                      | Pipeline Parameter or Manual Execution Parameter |
-| azure_devops_pipeline                 | Azure DevOps Pipeline support. Set to "True" to push the changes of a run to the GIT repository.                                                                                                                                                                                | Global Parameter - TestingToolParams             |
-| gitlab_pipeline                       | Gitlab Pipeline support. Set to "True" to push the changes of a run to the GIT repository.                                                                                                                                                                                      | Global Parameter - TestingToolParams             |
-| result_database_name                  | Name of the database or catalog the results are written to                                                                                                                                                                                                                      | Global Parameter - TestingToolParams             |
-| result_schema_name                    | Name of the schema the results are written to                                                                                                                                                                                                                                   | Global Parameter - TestingToolParams             |
-| result_table_highlevel_name           | Name of the high-level results table                                                                                                                                                                                                                                            | Global Parameter - TestingToolParams             |
-| result_table_objectlevel_name         | Name of the object-level results table                                                                                                                                                                                                                                          | Global Parameter - TestingToolParams             |
-| result_table_columnlevel_name         | Name of the column-level results table                                                                                                                                                                                                                                          | Global Parameter - TestingToolParams             |
-| result_meta_data_schema_name          | Name of the schema the full results are written to                                                                                                                                                                                                                              | Global Parameter - TestingToolParams             |
-| result_table_name                     | Name of the table the full results are written to                                                                                                                                                                                                                               | Global Parameter - TestingToolParams             |
-| result_live_table_name                | Name of the table the live results are written to                                                                                                                                                                                                                               | Global Parameter - TestingToolParams             |
-| results_folder_name                   | Folder that in which the results are stored in JSON format. Default: `examples/comparison_results/`                                                                                                                                                                             | Global Parameter - TestingToolParams             |
-| remaining_mapping_objects_folder_name | Output folder that holds information about source system objects which are not covered by the mapping and are therefor not included in the comparison. Default: `examples/remaining_mapping_objects/`                                                                           | Global Parameter - TestingToolParams             |
-| testset_folder_name                   | Folder that holds the test set files in JSON format. Default: `examples/testsets/`                                                                                                                                                                                              | Global Parameter - TestingToolParams             |
-| stage_schema                          | Name of the Snowflake Schema where the stage is created to upload the comparison results to Snowflake. Only needed if the `upload_result_to_result_database` functionality is used with Snowflake as target system.                                                             | Global Parameter - TestingToolParams             |
-| stage_name_prefix                     | Prefix of the name of the Snowflake Stage which is used to upload the comparison results to Snowflake. The name is complemented by a run_guid which is a unique uuid for each icsDV execution. Only needed if the `upload_result_to_result_database` functionality is used.     | Global Parameter - TestingToolParams             |
-| container_name                        | Name of the Azure Storage Container to upload the comparison results into the blob storage. Note: Only needed if the `upload_result_to_blob` functionality is used.                                                                                                             | Global Parameter - TestingToolParams             |
-| bucket_name                           | Name of the AWS S3 Bucket to upload the comparison results into the AWS. Note: Only needed if the `upload_result_to_bucket` functionality is used.                                                                                                                              | Global Parameter - TestingToolParams             |
-### Configuration Parameters
-Stored in `manual_execution_params.py`:
-| Parameter                        | Description                                                                                                                                               | Input Type                                        |
-|----------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------|
-| ENV_FILEPATH                     | Absolute path to the `*.env` file containing secrets, passwords and tokens.                                                                               | Pipeline Parameter or Manual Execution Parameters |
-| UPLOAD_RESULT_TO_BLOB            | Set to "True" to upload the comparison results to an Azure Blob Storage. An `azure_storage_connection_string` is needed if set to "True".                 | Pipeline Parameter or Manual Execution Parameters |
-| UPLOAD_RESULT_TO_BUCKET          | Set to "True" to upload the comparison results to an AWS S3 Bucket. An `aws_bucket_access_key` and an `aws_bucket_secret_key` is needed if set to "True". | Pipeline Parameter or Manual Execution Parameter  |
-| UPLOAD_RESULT_TO_RESULT_DATABASE | Set to "True" to upload the comparison results to Snowflake or Databricks. A `result_system_selection` is needed if set to "True".                        | Pipeline Parameter or Manual Execution Parameter  |
-| MAX_OBJECT_SIZE                  | Limits Pandas comparison to objects of a size smaller than `MAX_OBJECT_SIZE` bytes. Data type is String. Default: `str(-1)`, no limit.                    | Pipeline Parameter or Manual Execution Parameter  |
-| MAX_ROW_NUMBER                   | Limits Pandas comparison to objects with less than `MAX_ROW_NUMBER` rows. Data type is String. Default: `str(-1)`, no limit.                              | Pipeline Parameter or Manual Execution Parameter  |
-| EXECUTE_GROUP_BY_COMPARISON      | Set to "True" to execute group-by comparisons. See sec. "Group-By-Aggregation" for details.                                                               | Pipeline Parameter or Manual Execution Parameter  |
-| USE_GROUP_BY_COLUMNS             | Set to "True" to activate group-by columns. See sec. "Group-By-Aggregation" for details.                                                                  | Pipeline Parameter or Manual Execution Parameter  |
-| MIN_GROUP_BY_COUNT_DISTINCT      | Minimum expected number of group-by counts. See sec. "Group-By-Aggregation" for details.                                                                  | Pipeline Parameter or Manual Execution Parameter  |
-| MAX_GROUP_BY_COUNT_DISTINCT      | Maximum expected number of group-by counts. See sec. "Group-By-Aggregation" for details.                                                                  | Pipeline Parameter or Manual Execution Parameter  |
-| MAX_GROUP_BY_SIZE                | Maximum size of the group-by query. See sec. "Group-By-Aggregation" for details.                                                                          | Pipeline Parameter or Manual Execution Parameter  |
-| NUMERIC_SCALE                    | Number of digits to compare. Data type is String. Default: `str(2)`, i.e. deviations below 0.01 are tolerated.                                            | Pipeline Parameter or Manual Execution Parameter  |
-### Execution Parameters
-Stored in `manual_execution_params.py`:
-| Parameter               | Description                                                                                                                                               | Input Type                                       |
-|-------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------|
-| DATABASE_NAME           | Filters the test set on a specific database/catalog. For no filter set "None" as a Manual Execution Parameter and leave it empty as a Pipeline Parameter. | Pipeline Parameter or Manual Execution Parameter |
-| SCHEMA_NAME             | Filters the test set on a specific schema. For no filter set "None" as a Manual Execution Parameter and leave it empty as a Pipeline Parameter.           | Pipeline Parameter or Manual Execution Parameter |
-| TESTSET_FILE_NAMES      | File names of the test set as defined in the folder testset_folder_name (see Setup Parameters) as JSON files.                                             | Pipeline Parameter or Manual Execution Parameter |
-| OBJECT_TYPE_RESTRICTION | Filters the testset to only tables (`"include_only_tables"`), only views (`"include_only_views"`) or all tables and views (`"include_all"`).              | Pipeline Parameter or Manual Execution Parameter |
-| MAX_NUMBER_OF_THREADS   | Maximum number of threads used. Values larget than the default, `str(1)`, activate parallelization.                                                       | Pipeline Parameter or Manual Execution Parameter |
-## icsDV - Configuration
-### Blacklists
-### Whitelists (Testsets)
-### Mapping
-### Group-By-Aggregation
-The Group-By-Aggregation is a feature to pinpoint the differences in the data.
-It can be activiated by setting the parameter `EXECUTE_GROUP_BY_COMPARISON` to TRUE.
-If activated an additional comparison step is performed.
-Each table is queried with a group-by-statement including aggregations depending on the data type.
-Those aggregations are consequently compared.
-As a result the differences in the data can be narrowed down to certain grouping values.
-There are three options to define the column over which the group-by is executed.
-1. "group-by-columns-per-table" defined as multiple lists for specific tables. Activated with the `USE_GROUP_BY_COLUMNS` parameter and `GROUP_BY_COLUMNS_PER_TABLE` defined in the `migration_config.json`.
-2. "group-by-columns" from a predifined list for all tables by a validation. Activated with the `USE_GROUP_BY_COLUMNS` parameter and `GROUP_BY_COLUMNS` defined in the `migration_config.json`.
-3. "group-by-columns" evaluated from all existing columns by a validation
-The validation consists of a number of tests and can be configured by a number of parameters to either easily find columns to group by over or to only select columns which add a definite value for pinpointing the differences in the data.
-The validation tests for the "group-by-columns" are:
-  1. Number of distinct values of the column is more than 1.
-  2. Number of distinct values of the column is less than the rowcount of the table.
-  3. Number of distinct values of the column exceeds the `MIN_GROUP_BY_COUNT_DISTINCT` parameter.
-  4. Number of distinct values of the column is below the `MAX_GROUP_BY_COUNT_DISTINCT` parameter.
-  5. The size of the expected result of the group-by-query is below the `MAX_GROUP_BY_SIZE` parameter.
-  (The size is defined by "Number of distinct values" * "Number of columns")
-All tests are executed on source and target.
-> Note: The group by comparison can be activated by setting the `execute_group_by_comparison` parameter to TRUE.
-The `migration_config.json` has to include the follwing keys when the parameter use_group_by_columns is set to TRUE.
-    "GROUP_BY_AGGREGATION":{
-      "GROUP_BY_COLUMNS_PER_TABLE": {},
-      "GROUP_BY_COLUMNS":[]
-    }
-The values of those keys can be empty.
-## icsDV - Comparison Results
-### JSON Results
-- Complete Comparison Result JSONs
-- Live Comparison Result JSONs
-### Target System Result Tables
-- High-Level Result
-- Object-Level Result
-- Column-Level Result
-### Result Export in a File Storage
-## icsDV - Setup
-### Code setup
-- To handle the code, we recommend using VS Code.
-- The code is written in python. The tool is compatible with version 3.11
-- It is recommended to use a project-specific python environment.
-  You can create one with `python -m venv .env` in the root folder of this repo.
-  After creating it, you should activate it (`source .env/bin/activate`), select the python binary `.env/bin/python` therein as your python interpreter in VSC and make sure that python libraries are read from and installed to this environment, i.e. `export PYTHONPATH=$(pwd)/.env/lib/python3.8/site-packages`.
-- In this environment, install the packages listed in the `requirements.txt` and the `requirements-dev.txt`. i.e. run `pip install -r requirements.txt`.
-### Setup for manual execution
-### Setup as Azure DevOps pipeline
-### Setup as GitLab pipeline
-## authentication
-The following auth methods to snowflake are supported:
-- password, provided via PASSWORD_NAME
-- private key with/without encryption, provided via PRIVATE_KEY_NAME with/without PRIVATE_KEY_PASSPHRASE_NAME
-- path to private key file with/without encryption, provided via PRIVATE_KEY_FILE_PATH with/without PRIVATE_KEY_FILE_PASSWORD
-## devcontainer
-run with uv as follows in devcontainer:
-```bash
-uv run -s  icsDataValidation/main.py
-```
-Inside the [devcontainer config](.devcontainer/devcontainer.json) the mounts setting is used to bring a .env from the host system into the devcontainer.
-```bash
-"mounts": [
-        "source=/home/Documents/Generic_Testing_Tool/generic_testing_tool_password.env,target=/workspaces/icsDataValidation/examples/generic_testing_tool_password.env,type=bind"
-    ]
-```
-To use this feature either create the .env under the source path on your host or adjust this path to another path on the host system. The target path do no need adjustment!

{icsdatavalidation-1.0.415.dist-info → icsdatavalidation-1.0.421.dist-info}/WHEEL RENAMED Viewed

File without changes

{icsdatavalidation-1.0.415.dist-info → icsdatavalidation-1.0.421.dist-info}/top_level.txt RENAMED Viewed

File without changes

icsDataValidation 1.0.415__py3-none-any.whl → 1.0.421__py3-none-any.whl

icsDataValidation 1.0.415py3-none-any.whl → 1.0.421py3-none-any.whl