PyPI - cloe-nessy - Versions diffs - 0.2.10__py3-none-any.whl → 0.3.1__py3-none-any.whl - Mend

cloe-nessy 0.2.10py3-none-any.whl → 0.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

cloe_nessy/pipeline/actions/__init__.py CHANGED Viewed

@@ -12,6 +12,7 @@ from .transform_decode import TransformDecodeAction
 from .transform_distinct import TransformDistinctAction
 from .transform_filter import TransformFilterAction
 from .transform_generic_sql import TransformSqlAction
+from .transform_group_aggregate import TransformGroupAggregate
 from .transform_join import TransformJoinAction
 from .transform_json_normalize import TransformJsonNormalize
 from .transform_rename_columns import TransformRenameColumnsAction
@@ -42,6 +43,7 @@ __all__ = [
     "TransformDecodeAction",
     "TransformDistinctAction",
     "TransformSqlAction",
+    "TransformGroupAggregate",
     "TransformJoinAction",
     "TransformJsonNormalize",
     "TransformRenameColumnsAction",

cloe_nessy/pipeline/actions/transform_group_aggregate.py ADDED Viewed

@@ -0,0 +1,104 @@
+from typing import Any
+import pyspark.sql.functions as F
+from ..pipeline_action import PipelineAction
+from ..pipeline_context import PipelineContext
+class TransformGroupAggregate(PipelineAction):
+    """Performs aggregation operations on grouped data within a DataFrame.
+    This class allows you to group data by specified columns and apply various aggregation functions
+    to other columns. The aggregation functions can be specified as a dictionary where keys are column names
+    and values are either a single aggregation function or a list of functions.
+    Example:
+    ```yaml
+    Transform Group Aggregate:
+        action: TRANSFORM_GROUP_AGGREGATE
+        options:
+            grouping_columns:
+                - column1
+                - column2
+            aggregations:
+                column3:
+                    - sum
+                    - avg
+                column4: max
+    ```
+    Attributes:
+        name (str): The name of the action, default is "TRANSFORM_GROUP_AGGREGATE".
+    Methods:
+        run(context, grouping_columns=None, aggregations=None, **_):
+            Executes the aggregation on the grouped data.
+    Raises:
+        ValueError: If the context data is None.
+        ValueError: If no aggregations are provided.
+        ValueError: If invalid aggregation operations are provided.
+        ValueError: If columns with unsupported data types are included in the aggregations.
+    """
+    name: str = "TRANSFORM_GROUP_AGGREGATE"
+    def run(
+        self,
+        context: PipelineContext,
+        *,
+        grouping_columns: list[str] | None = None,
+        aggregations: dict[str, str | list] | None = None,
+        **_: Any,
+    ) -> PipelineContext:
+        """Executes the aggregation on the grouped data.
+        Args:
+            context: The context in which this action is executed.
+            grouping_columns: A list of columns to group by.
+            aggregations: A dictionary where keys are column names and values are either a single
+                aggregation function or a list of functions.
+        Raises:
+            ValueError: If the context data is None.
+            ValueError: If no aggregations are provided.
+            ValueError: If invalid aggregation operations are provided.
+            ValueError: If columns with unsupported data types are included in the aggregations.
+        Returns:
+            PipelineContext: The context after the execution of this action.
+        """
+        if context.data is None:
+            raise ValueError("Data from the context is required for the operation.")
+        if grouping_columns is None:
+            raise ValueError("Please provide at least one grouping column")
+        if aggregations is None:
+            raise ValueError("Please provide aggregations.")
+        valid_operations = ["avg", "max", "min", "mean", "sum", "count"]
+        for operation in aggregations.values():
+            if isinstance(operation, list):
+                if not set(operation).issubset(valid_operations):
+                    raise ValueError(f"Please provide valid operations. Valid operations are {valid_operations}")
+            elif isinstance(operation, str):
+                if operation not in valid_operations:
+                    raise ValueError(f"Please provide valid operations. Valid operations are {valid_operations}")
+            else:
+                raise ValueError("OPERATION DATATYPE INVALID")
+        aggregation_list = []
+        for column_name, aggregation in aggregations.items():
+            if isinstance(aggregation, list):
+                for subaggregation in aggregation:
+                    aggregation_list.append(
+                        getattr(F, subaggregation)(column_name).alias(f"{subaggregation}_{column_name}")
+                    )
+            else:
+                aggregation_list.append(getattr(F, aggregation)(column_name).alias(f"{aggregation}_{column_name}"))
+        df = context.data.groupBy(grouping_columns).agg(*aggregation_list)
+        return context.from_existing(data=df)

cloe_nessy/session/session_manager.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import os
 from enum import Enum
 from typing import Any
@@ -190,5 +189,4 @@ class SessionManager:
     def _get_databricks_connect_builder():
         from databricks.connect import DatabricksSession
-        selected_profile_name = os.environ.get("NESSY_DATABRICKSPROFILE") or "DEFAULT"
-        return DatabricksSession.builder.profile(selected_profile_name)
+        return DatabricksSession.builder

{cloe_nessy-0.2.10.dist-info → cloe_nessy-0.3.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: cloe-nessy
-Version: 0.2.10
+Version: 0.3.1
 Summary: Your friendly datalake monster.
 Home-page: https://initions.com/
 Author: initions

{cloe_nessy-0.2.10.dist-info → cloe_nessy-0.3.1.dist-info}/RECORD RENAMED Viewed

@@ -47,7 +47,7 @@ cloe_nessy/pipeline/pipeline_config.py,sha256=BN3ZSbr6bC-X9edoh-n5vRfPHFMbgtAU7m
 cloe_nessy/pipeline/pipeline_context.py,sha256=csElDc6BsynDUtRXgQOSCH7ONc_b-ag0YEg0zlQTz58,1874
 cloe_nessy/pipeline/pipeline_parsing_service.py,sha256=c_nAsgw81QYBM9AFiTxGgqRhNXABkDKplbeoCJPtbpE,6434
 cloe_nessy/pipeline/pipeline_step.py,sha256=UlnmpS6gm_dZ7m9dD1mZvye7mvUF_DA7HjOZo0oGYDU,1977
-cloe_nessy/pipeline/actions/__init__.py,sha256=shWYl1TDL2f58wHfBhPpiLldreNkvLGJjhnBaTYusFY,2066
+cloe_nessy/pipeline/actions/__init__.py,sha256=Psksv49DVhWHR2D1OuMxvYClF1Vjh5shiyy9yBdWnb0,2160
 cloe_nessy/pipeline/actions/read_api.py,sha256=wGyPZdeh3Cam_BQBilltWBWCIdD9I_kv4lunEhE39Tg,6625
 cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=aZy4sJLLE8ZQ_SPXGSDoHYaBJTz8s7xQDVn5eYrYHvE,2689
 cloe_nessy/pipeline/actions/read_excel.py,sha256=EgHbK1wO6dkDo0KErYDhK_2sNIkIoa-6As9oo9dNFsE,7708
@@ -59,6 +59,7 @@ cloe_nessy/pipeline/actions/transform_decode.py,sha256=DmT-29dIqbz_xTj4GSCfnbgYR
 cloe_nessy/pipeline/actions/transform_distinct.py,sha256=sdCElXCM77AQ0m6Zzg_h7cyavBOxo7W9K1NrsvNLufA,1105
 cloe_nessy/pipeline/actions/transform_filter.py,sha256=vOAxKtNWCABLb6G6Xz98NK7fEfgn6QJia31S7IvoUTg,1428
 cloe_nessy/pipeline/actions/transform_generic_sql.py,sha256=cli59HCERFge7f0RB8yXw2oDtHSbMCWQMdeCeqhbdg8,2355
+cloe_nessy/pipeline/actions/transform_group_aggregate.py,sha256=HcY4sqb2yNBCz90jQtxGA8fZPuQXfJuaDmv8lWuoTqg,4050
 cloe_nessy/pipeline/actions/transform_join.py,sha256=qktyaN2kcCkmoH3RILTc-UGYsGACx1nXH6xLtuvYi7k,3080
 cloe_nessy/pipeline/actions/transform_json_normalize.py,sha256=xN_cQgHSMSyPsyYXBdoe2i5pHnyH-kkH5do8qr3vybw,4157
 cloe_nessy/pipeline/actions/transform_rename_columns.py,sha256=fFdg3353QCE3zBei6iYQW9huPBcQ906sJLioaOUWj3s,1924
@@ -67,12 +68,12 @@ cloe_nessy/pipeline/actions/transform_select_columns.py,sha256=Kez8puDK7cRfhleBE
 cloe_nessy/pipeline/actions/transform_union.py,sha256=TDER06IABzxvIez4bGLKCLaDA4eScpTzYRbfUzwv_RQ,2342
 cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=6yAHTX5kZviumgBW_NYVGAUin6U2nDzmic9of6wA8FY,2590
 cloe_nessy/session/__init__.py,sha256=t7_YjUhJYW3km_FrucaUdbIl1boQtwkyhw_8yE10qzc,74
-cloe_nessy/session/session_manager.py,sha256=7LNerwILGkgt752cZLs2nlABGWiaoKdmOuLGWHZ6uYQ,6618
+cloe_nessy/session/session_manager.py,sha256=rd33lSafzomuyGf1BzhyjIWuy9sXgFjr-ca7A7Sw8eo,6490
 cloe_nessy/settings/__init__.py,sha256=ZbkneO3WaKOxon7qHFHnou7EnBOSnBFyKMDZblIEvzM,101
 cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_UpVM,3653
 cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
-cloe_nessy-0.2.10.dist-info/METADATA,sha256=W9E01GNme6Zst17uy9TAW_eP7FL_Ng-HkKaUvXf8838,1838
-cloe_nessy-0.2.10.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-cloe_nessy-0.2.10.dist-info/top_level.txt,sha256=Z7izn8HmQpg2wBUb-0jzaKlYKMU7Ypzuc9__9vPtW_I,11
-cloe_nessy-0.2.10.dist-info/RECORD,,
+cloe_nessy-0.3.1.dist-info/METADATA,sha256=ziNbpjwuDfxE2Un5Y4YfYuEc1brCHy0Ic-rVc_ChZhY,1837
+cloe_nessy-0.3.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+cloe_nessy-0.3.1.dist-info/top_level.txt,sha256=Z7izn8HmQpg2wBUb-0jzaKlYKMU7Ypzuc9__9vPtW_I,11
+cloe_nessy-0.3.1.dist-info/RECORD,,

{cloe_nessy-0.2.10.dist-info → cloe_nessy-0.3.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{cloe_nessy-0.2.10.dist-info → cloe_nessy-0.3.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

cloe-nessy 0.2.10__py3-none-any.whl → 0.3.1__py3-none-any.whl

cloe-nessy 0.2.10py3-none-any.whl → 0.3.1py3-none-any.whl