PyPI - icsDataValidation - Versions diffs - 1.0.428__py3-none-any.whl → 1.0.438__py3-none-any.whl - Mend

icsDataValidation 1.0.428py3-none-any.whl → 1.0.438py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

tests/snowflake_service/test_create_pandas_df_from_group_by.py ADDED Viewed

@@ -0,0 +1,485 @@
+from unittest.mock import MagicMock, patch
+import pandas as pd
+import pytest
+from icsDataValidation.core.database_objects import DatabaseObject, DatabaseObjectType
+from icsDataValidation.services.database_services.snowflake_service import SnowflakeService
+@pytest.fixture
+def snowflake_service():
+    """Create a SnowflakeService instance with mocked connection."""
+    mock_params = MagicMock()
+    service = SnowflakeService(mock_params)
+    service.snowflake_connection = MagicMock()
+    return service
+@pytest.fixture
+def mock_database_object():
+    """Create a mock DatabaseObject."""
+    obj = DatabaseObject(
+        object_identifier="TestDB.dbo.TestTable",
+        object_type=DatabaseObjectType.TABLE
+    )
+    return obj
+class TestCreatePandasDfFromGroupByParametrized:
+    """Parametrized tests for create_pandas_df_from_group_by method."""
+    @pytest.mark.parametrize(
+        "column_intersections,group_by_columns,group_by_aggregation_columns," \
+        "group_by_aggregation_type,only_numeric,where_clause,exclude_columns," \
+        "numeric_scale,enclose_quotes,mock_datatypes," \
+        "expected_group_by_cols,expected_in_agg_string,expected_not_in_agg_string," \
+        "expected_grouping_cols_final",
+        [
+            ( # single grouping column, no double quotes
+                ['region', 'amount'],
+                ['region'],
+                ['amount'],
+                'various',
+                False,
+                '',
+                [],
+                None,
+                False,
+                [{"COLUMN_NAME": "amount", "DATA_TYPE": "number"}],
+                " region ",
+                ["SUM(amount)"],
+                [],
+                ['region']
+            ),
+            ( # single grouping column, with double quotes
+                ['region', 'amount'],
+                ['region'],
+                ['amount'],
+                'various',
+                False,
+                '',
+                [],
+                None,
+                True,
+                [{"COLUMN_NAME": "amount", "DATA_TYPE": "number"}],
+                ' "region" ',
+                ['SUM("amount")'],
+                [],
+                ['region']
+            ),
+            ( # multiple grouping columns, no double quotes
+                ['region', 'department', 'amount'],
+                ['region', 'department'],
+                ['amount'],
+                'various',
+                False,
+                '',
+                [],
+                None,
+                False,
+                [{"COLUMN_NAME": "amount", "DATA_TYPE": "number"}],
+                " region ,department ",
+                ["SUM(amount)"],
+                [],
+                ['region', 'department']
+            ),
+            ( # grouping column excluded
+                ['region', 'department', 'amount'],
+                ['region', 'department'],
+                ['amount'],
+                'various',
+                False,
+                '',
+                ['department'],
+                None,
+                False,
+                [{"COLUMN_NAME": "amount", "DATA_TYPE": "number"}],
+                " region ",
+                ["SUM(amount)"],
+                [],
+                ['region']
+            ),
+            ( # grouping column not in intersections
+                ['amount'],
+                ['region'],
+                ['amount'],
+                'various',
+                False,
+                '',
+                [],
+                None,
+                False,
+                [{"COLUMN_NAME": "AMOUNT", "DATA_TYPE": "number"}],
+                "",
+                [],
+                [],
+                []
+            ),
+            ( # only_min_max type, numeric columns
+                ['region', 'amount', 'price'],
+                ['region'],
+                ['amount', 'price'],
+                'only_min_max',
+                True,
+                '',
+                [],
+                None,
+                False,
+                [
+                    {"COLUMN_NAME": "amount", "DATA_TYPE": "number"},
+                    {"COLUMN_NAME": "price", "DATA_TYPE": "float"}
+                ],
+                " region ",
+                ["MIN(amount)", "MAX(amount)", "MIN(price)", "MAX(price)"],
+                ["SUM(", "COUNTDISTINCT"],
+                ['region']
+            ),
+            ( # only_min_max with numeric_scale
+                ['region', 'AMOUNT'],
+                ['region'],
+                ['AMOUNT'],
+                'only_min_max',
+                True,
+                '',
+                [],
+                2,
+                False,
+                [{"COLUMN_NAME": "AMOUNT", "DATA_TYPE": "float"}],
+                " region ",
+                ["CAST(ROUND(MIN(AMOUNT),2) AS DECIMAL(38,2))", "CAST(ROUND(MAX(AMOUNT),2) AS DECIMAL(38,2))"],
+                [],
+                ['region']
+            ),
+            ( # only_min_max with numeric_scale and double quotes
+                ['region', 'amount'],
+                ['region'],
+                ['amount'],
+                'only_min_max',
+                True,
+                '',
+                [],
+                2,
+                True,
+                [{"COLUMN_NAME": "amount", "DATA_TYPE": "float"}],
+                ' "region" ',
+                ['CAST(ROUND(MIN("amount"),2) AS DECIMAL(38,2))', 'CAST(ROUND(MAX("amount"),2) AS DECIMAL(38,2))'],
+                [],
+                ['region']
+            ),
+            ( # various type, numeric only
+                ['REGION', 'AMOUNT'],
+                ['REGION'],
+                ['AMOUNT'],
+                'various',
+                True,
+                '',
+                [],
+                None,
+                False,
+                [{"COLUMN_NAME": "AMOUNT", "DATA_TYPE": "number"}],
+                " REGION ",
+                ["SUM(AMOUNT)"],
+                ["MIN(", "MAX("],
+                ['REGION']
+            ),
+            ( # various type with string columns
+                ['region', 'AMOUNT', 'DESCRIPTION'],
+                ['region'],
+                ['AMOUNT', 'DESCRIPTION'],
+                'various',
+                False,
+                '',
+                [],
+                None,
+                False,
+                [
+                    {"COLUMN_NAME": "AMOUNT", "DATA_TYPE": "number"},
+                    {"COLUMN_NAME": "DESCRIPTION", "DATA_TYPE": "text"}
+                ],
+                " region ",
+                ["SUM(AMOUNT)", "COUNT(DISTINCT LOWER(DESCRIPTION))", '"COUNTDISTINCT_DESCRIPTION"'],
+                [],
+                ['region']
+            ),
+            ( # various type with string columns and double quotes
+                ['REGION', 'amount', 'DEsCRIPTION'],
+                ['REGION'],
+                ['amount', 'DEsCRIPTION'],
+                'various',
+                False,
+                '',
+                [],
+                None,
+                True,
+                [
+                    {"COLUMN_NAME": "amount", "DATA_TYPE": "number"},
+                    {"COLUMN_NAME": "DEsCRIPTION", "DATA_TYPE": "text"}
+                ],
+                ' "REGION" ',
+                ['SUM("amount")', 'COUNT(DISTINCT LOWER("DEsCRIPTION"))', '"COUNTDISTINCT_DEsCRIPTION"'],
+                [],
+                ['REGION']
+            ),
+            ( # various type with boolean columns
+                ['REGION', 'AMOUNT', 'IS_ACTIVE'],
+                ['REGION'],
+                ['AMOUNT', 'IS_ACTIVE'],
+                'various',
+                False,
+                '',
+                [],
+                None,
+                False,
+                [
+                    {"COLUMN_NAME": "AMOUNT", "DATA_TYPE": "number"},
+                    {"COLUMN_NAME": "IS_ACTIVE", "DATA_TYPE": "boolean"}
+                ],
+                " REGION ",
+                ["SUM(AMOUNT)", "AGGREGATEBOOLEAN_IS_ACTIVE", "COUNT(CASE WHEN IS_ACTIVE = true THEN 1 ELSE NULL END)"],
+                [],
+                ['REGION']
+            ),
+            ( # various type with binary columns
+                ['REGION', 'BINARY_DATA'],
+                ['REGION'],
+                ['BINARY_DATA'],
+                'various',
+                False,
+                '',
+                [],
+                None,
+                False,
+                [{"COLUMN_NAME": "BINARY_DATA", "DATA_TYPE": "binary"}],
+                " REGION ",
+                ["COUNT(DISTINCT LOWER(TRY_TO_NUMBER(BINARY_DATA::VARCHAR)))", "COUNTDISTINCT_BINARY_DATA"],
+                [],
+                ['REGION']
+            ),
+            ( # various type with datetime columns
+                ['REGION', 'CREATED_DATE'],
+                ['REGION'],
+                ['CREATED_DATE'],
+                'various',
+                False,
+                '',
+                [],
+                None,
+                False,
+                [{"COLUMN_NAME": "CREATED_DATE", "DATA_TYPE": "timestamp_ntz"}],
+                " REGION ",
+                ["COUNT(DISTINCT LOWER(CREATED_DATE))", "COUNTDISTINCT_CREATED_DATE"],
+                [],
+                ['REGION']
+            ),
+            ( # various_and_min_max type
+                ['REGION', 'AMOUNT', 'DESCRIPTION'],
+                ['REGION'],
+                ['AMOUNT', 'DESCRIPTION'],
+                'various_and_min_max',
+                False,
+                '',
+                [],
+                None,
+                False,
+                [
+                    {"COLUMN_NAME": "AMOUNT", "DATA_TYPE": "number"},
+                    {"COLUMN_NAME": "DESCRIPTION", "DATA_TYPE": "text"}
+                ],
+                " REGION ",
+                ["MIN(AMOUNT)", "MAX(AMOUNT)", "SUM(AMOUNT)", "COUNT(DISTINCT LOWER(DESCRIPTION))"],
+                ["SUM(REGION)", "MAX(REGION)", "MIN(REGION)"],
+                ['REGION']
+            ),
+            ( # aggregation columns 'all'
+                ['REGION', 'AMOUNT', 'PRICE'],
+                ['REGION'],
+                ['all'],
+                'various',
+                True,
+                '',
+                [],
+                None,
+                False,
+                [
+                    {"COLUMN_NAME": "AMOUNT", "DATA_TYPE": "number"},
+                    {"COLUMN_NAME": "PRICE", "DATA_TYPE": "float"}
+                ],
+                " REGION ",
+                ["SUM(AMOUNT)", "SUM(PRICE)"],
+                ["SUM(REGION)"],
+                ['REGION']
+            ),
+            ( # aggregation with exclude_columns
+                ['REGION', 'AMOUNT', 'PRICE'],
+                ['REGION'],
+                ['AMOUNT', 'PRICE'],
+                'various',
+                True,
+                '',
+                ['PRICE'],
+                None,
+                False,
+                [{"COLUMN_NAME": "AMOUNT", "DATA_TYPE": "number"}],
+                " REGION ",
+                ["SUM(AMOUNT)"],
+                ["SUM(PRICE)"],
+                ['REGION']
+            ),
+            ( # aggregation with exclude_columns and double quotes
+                ['REGION', 'AMO/NT', 'prIce'],
+                ['REGION'],
+                ['AMO/NT', 'prIce'],
+                'various',
+                True,
+                '',
+                ['prIce'],
+                None,
+                True,
+                [{"COLUMN_NAME": "AMO/NT", "DATA_TYPE": "number"}],
+                ' "REGION" ',
+                ['SUM("AMO/NT")'],
+                ["prIce"],
+                ['REGION']
+            ),
+            ( # empty aggregation string - no matching columns
+                ['region'],
+                ['region'],
+                ['amount'],
+                'various',
+                True,
+                '',
+                [],
+                None,
+                False,
+                [],
+                " region ",
+                [],
+                [],
+                ['region']
+            ),
+            ( # numeric_scale with various type
+                ['REGION', 'AMOUNT'],
+                ['REGION'],
+                ['AMOUNT'],
+                'various',
+                True,
+                '',
+                [],
+                3,
+                False,
+                [{"COLUMN_NAME": "AMOUNT", "DATA_TYPE": "float"}],
+                " REGION ",
+                ["CAST(ROUND(SUM(AMOUNT), 3) AS DECIMAL(38,3))"],
+                [],
+                ['REGION']
+            ),
+            ( # mixed datatype aggregations
+                ['REGION', 'AMOUNT', 'PRICE', 'NAME', 'IS_ACTIVE', 'CREATED_DATE'],
+                ['REGION'],
+                ['AMOUNT', 'PRICE', 'NAME', 'IS_ACTIVE', 'CREATED_DATE'],
+                'various',
+                False,
+                '',
+                [],
+                2,
+                False,
+                [
+                    {"COLUMN_NAME": "AMOUNT", "DATA_TYPE": "number"},
+                    {"COLUMN_NAME": "PRICE", "DATA_TYPE": "float"},
+                    {"COLUMN_NAME": "NAME", "DATA_TYPE": "text"},
+                    {"COLUMN_NAME": "IS_ACTIVE", "DATA_TYPE": "boolean"},
+                    {"COLUMN_NAME": "CREATED_DATE", "DATA_TYPE": "timestamp_ntz"}
+                ],
+                " REGION ",
+                [
+                    "CAST(ROUND(SUM(AMOUNT), 2) AS DECIMAL(38,2))",
+                    "CAST(ROUND(SUM(PRICE), 2) AS DECIMAL(38,2))",
+                    "COUNT(DISTINCT LOWER(NAME))",
+                    "AGGREGATEBOOLEAN_IS_ACTIVE",
+                    "COUNT(DISTINCT LOWER(CREATED_DATE))"
+                ],
+                [],
+                ['REGION']
+            ),
+            ( # only_numeric flag excludes string aggregations
+                ['REGION', 'AMOUNT', 'NAME', 'IS_ACTIVE'],
+                ['REGION'],
+                ['AMOUNT', 'NAME', 'IS_ACTIVE'],
+                'various',
+                True,
+                '',
+                [],
+                None,
+                False,
+                [
+                    {"COLUMN_NAME": "AMOUNT", "DATA_TYPE": "number"},
+                    {"COLUMN_NAME": "NAME", "DATA_TYPE": "text"},
+                    {"COLUMN_NAME": "IS_ACTIVE", "DATA_TYPE": "boolean"}
+                ],
+                " REGION ",
+                ["SUM(AMOUNT)"],
+                ["COUNTDISTINCT", "AGGREGATEBOOLEAN"],
+                ['REGION']
+            ),
+            ( # special character column names with double quotes
+                ['region', '/ISDFPS/amount'],
+                ['region'],
+                ['/ISDFPS/amount'],
+                'various',
+                True,
+                '',
+                [],
+                None,
+                True,
+                [{"COLUMN_NAME": "/ISDFPS/amount", "DATA_TYPE": "number"}],
+                ' "region" ',
+                ['"/ISDFPS/amount"'],
+                [],
+                ['region']
+            ),
+        ],
+    )
+    def test_create_pandas_df_from_group_by(
+        self, snowflake_service, mock_database_object,
+        column_intersections, group_by_columns, group_by_aggregation_columns,
+        group_by_aggregation_type, only_numeric, where_clause, exclude_columns,
+        numeric_scale, enclose_quotes, mock_datatypes,
+        expected_group_by_cols, expected_in_agg_string, expected_not_in_agg_string,
+        expected_grouping_cols_final
+    ):
+        """Test create_pandas_df_from_group_by with various configurations."""
+        with patch.object(snowflake_service, 'get_data_types_from_object') as mock_get_datatypes, \
+             patch.object(snowflake_service, 'execute_queries') as mock_execute:
+            mock_get_datatypes.return_value = mock_datatypes
+            mock_execute.return_value = pd.DataFrame()
+            result = snowflake_service.create_pandas_df_from_group_by(
+                object=mock_database_object,
+                column_intersections=column_intersections,
+                group_by_columns=group_by_columns,
+                group_by_aggregation_columns=group_by_aggregation_columns,
+                group_by_aggregation_type=group_by_aggregation_type,
+                only_numeric=only_numeric,
+                where_clause=where_clause,
+                exclude_columns=exclude_columns,
+                numeric_scale=numeric_scale,
+                enclose_column_by_double_quotes=enclose_quotes
+            )
+            _, group_by_query_aggregation_string, group_by_query_columns_string, grouping_columns_final, _ = result
+            # Check group_by_query_columns_string
+            assert group_by_query_columns_string == expected_group_by_cols
+            # Check grouping_columns_final
+            assert grouping_columns_final == expected_grouping_cols_final
+            # Check expected strings in aggregation string
+            for expected in expected_in_agg_string:
+                assert expected in group_by_query_aggregation_string
+            # Check strings that should NOT be in aggregation string
+            for expected in expected_not_in_agg_string:
+                assert expected not in group_by_query_aggregation_string

icsDataValidation 1.0.428__py3-none-any.whl → 1.0.438__py3-none-any.whl

icsDataValidation 1.0.428py3-none-any.whl → 1.0.438py3-none-any.whl