cloe-nessy 0.2.10__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,7 @@ from .transform_decode import TransformDecodeAction
12
12
  from .transform_distinct import TransformDistinctAction
13
13
  from .transform_filter import TransformFilterAction
14
14
  from .transform_generic_sql import TransformSqlAction
15
+ from .transform_group_aggregate import TransformGroupAggregate
15
16
  from .transform_join import TransformJoinAction
16
17
  from .transform_json_normalize import TransformJsonNormalize
17
18
  from .transform_rename_columns import TransformRenameColumnsAction
@@ -42,6 +43,7 @@ __all__ = [
42
43
  "TransformDecodeAction",
43
44
  "TransformDistinctAction",
44
45
  "TransformSqlAction",
46
+ "TransformGroupAggregate",
45
47
  "TransformJoinAction",
46
48
  "TransformJsonNormalize",
47
49
  "TransformRenameColumnsAction",
@@ -0,0 +1,104 @@
1
+ from typing import Any
2
+
3
+ import pyspark.sql.functions as F
4
+
5
+ from ..pipeline_action import PipelineAction
6
+ from ..pipeline_context import PipelineContext
7
+
8
+
9
+ class TransformGroupAggregate(PipelineAction):
10
+ """Performs aggregation operations on grouped data within a DataFrame.
11
+
12
+ This class allows you to group data by specified columns and apply various aggregation functions
13
+ to other columns. The aggregation functions can be specified as a dictionary where keys are column names
14
+ and values are either a single aggregation function or a list of functions.
15
+
16
+ Example:
17
+ ```yaml
18
+ Transform Group Aggregate:
19
+ action: TRANSFORM_GROUP_AGGREGATE
20
+ options:
21
+ grouping_columns:
22
+ - column1
23
+ - column2
24
+ aggregations:
25
+ column3:
26
+ - sum
27
+ - avg
28
+ column4: max
29
+ ```
30
+
31
+ Attributes:
32
+ name (str): The name of the action, default is "TRANSFORM_GROUP_AGGREGATE".
33
+
34
+ Methods:
35
+ run(context, grouping_columns=None, aggregations=None, **_):
36
+ Executes the aggregation on the grouped data.
37
+
38
+ Raises:
39
+ ValueError: If the context data is None.
40
+ ValueError: If no aggregations are provided.
41
+ ValueError: If invalid aggregation operations are provided.
42
+ ValueError: If columns with unsupported data types are included in the aggregations.
43
+ """
44
+
45
+ name: str = "TRANSFORM_GROUP_AGGREGATE"
46
+
47
+ def run(
48
+ self,
49
+ context: PipelineContext,
50
+ *,
51
+ grouping_columns: list[str] | None = None,
52
+ aggregations: dict[str, str | list] | None = None,
53
+ **_: Any,
54
+ ) -> PipelineContext:
55
+ """Executes the aggregation on the grouped data.
56
+
57
+ Args:
58
+ context: The context in which this action is executed.
59
+ grouping_columns: A list of columns to group by.
60
+ aggregations: A dictionary where keys are column names and values are either a single
61
+ aggregation function or a list of functions.
62
+
63
+ Raises:
64
+ ValueError: If the context data is None.
65
+ ValueError: If no aggregations are provided.
66
+ ValueError: If invalid aggregation operations are provided.
67
+ ValueError: If columns with unsupported data types are included in the aggregations.
68
+
69
+ Returns:
70
+ PipelineContext: The context after the execution of this action.
71
+ """
72
+ if context.data is None:
73
+ raise ValueError("Data from the context is required for the operation.")
74
+
75
+ if grouping_columns is None:
76
+ raise ValueError("Please provide at least one grouping column")
77
+ if aggregations is None:
78
+ raise ValueError("Please provide aggregations.")
79
+
80
+ valid_operations = ["avg", "max", "min", "mean", "sum", "count"]
81
+
82
+ for operation in aggregations.values():
83
+ if isinstance(operation, list):
84
+ if not set(operation).issubset(valid_operations):
85
+ raise ValueError(f"Please provide valid operations. Valid operations are {valid_operations}")
86
+ elif isinstance(operation, str):
87
+ if operation not in valid_operations:
88
+ raise ValueError(f"Please provide valid operations. Valid operations are {valid_operations}")
89
+ else:
90
+ raise ValueError("OPERATION DATATYPE INVALID")
91
+
92
+ aggregation_list = []
93
+ for column_name, aggregation in aggregations.items():
94
+ if isinstance(aggregation, list):
95
+ for subaggregation in aggregation:
96
+ aggregation_list.append(
97
+ getattr(F, subaggregation)(column_name).alias(f"{subaggregation}_{column_name}")
98
+ )
99
+ else:
100
+ aggregation_list.append(getattr(F, aggregation)(column_name).alias(f"{aggregation}_{column_name}"))
101
+
102
+ df = context.data.groupBy(grouping_columns).agg(*aggregation_list)
103
+
104
+ return context.from_existing(data=df)
@@ -1,4 +1,3 @@
1
- import os
2
1
  from enum import Enum
3
2
  from typing import Any
4
3
 
@@ -190,5 +189,4 @@ class SessionManager:
190
189
  def _get_databricks_connect_builder():
191
190
  from databricks.connect import DatabricksSession
192
191
 
193
- selected_profile_name = os.environ.get("NESSY_DATABRICKSPROFILE") or "DEFAULT"
194
- return DatabricksSession.builder.profile(selected_profile_name)
192
+ return DatabricksSession.builder
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: cloe-nessy
3
- Version: 0.2.10
3
+ Version: 0.3.1
4
4
  Summary: Your friendly datalake monster.
5
5
  Home-page: https://initions.com/
6
6
  Author: initions
@@ -47,7 +47,7 @@ cloe_nessy/pipeline/pipeline_config.py,sha256=BN3ZSbr6bC-X9edoh-n5vRfPHFMbgtAU7m
47
47
  cloe_nessy/pipeline/pipeline_context.py,sha256=csElDc6BsynDUtRXgQOSCH7ONc_b-ag0YEg0zlQTz58,1874
48
48
  cloe_nessy/pipeline/pipeline_parsing_service.py,sha256=c_nAsgw81QYBM9AFiTxGgqRhNXABkDKplbeoCJPtbpE,6434
49
49
  cloe_nessy/pipeline/pipeline_step.py,sha256=UlnmpS6gm_dZ7m9dD1mZvye7mvUF_DA7HjOZo0oGYDU,1977
50
- cloe_nessy/pipeline/actions/__init__.py,sha256=shWYl1TDL2f58wHfBhPpiLldreNkvLGJjhnBaTYusFY,2066
50
+ cloe_nessy/pipeline/actions/__init__.py,sha256=Psksv49DVhWHR2D1OuMxvYClF1Vjh5shiyy9yBdWnb0,2160
51
51
  cloe_nessy/pipeline/actions/read_api.py,sha256=wGyPZdeh3Cam_BQBilltWBWCIdD9I_kv4lunEhE39Tg,6625
52
52
  cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=aZy4sJLLE8ZQ_SPXGSDoHYaBJTz8s7xQDVn5eYrYHvE,2689
53
53
  cloe_nessy/pipeline/actions/read_excel.py,sha256=EgHbK1wO6dkDo0KErYDhK_2sNIkIoa-6As9oo9dNFsE,7708
@@ -59,6 +59,7 @@ cloe_nessy/pipeline/actions/transform_decode.py,sha256=DmT-29dIqbz_xTj4GSCfnbgYR
59
59
  cloe_nessy/pipeline/actions/transform_distinct.py,sha256=sdCElXCM77AQ0m6Zzg_h7cyavBOxo7W9K1NrsvNLufA,1105
60
60
  cloe_nessy/pipeline/actions/transform_filter.py,sha256=vOAxKtNWCABLb6G6Xz98NK7fEfgn6QJia31S7IvoUTg,1428
61
61
  cloe_nessy/pipeline/actions/transform_generic_sql.py,sha256=cli59HCERFge7f0RB8yXw2oDtHSbMCWQMdeCeqhbdg8,2355
62
+ cloe_nessy/pipeline/actions/transform_group_aggregate.py,sha256=HcY4sqb2yNBCz90jQtxGA8fZPuQXfJuaDmv8lWuoTqg,4050
62
63
  cloe_nessy/pipeline/actions/transform_join.py,sha256=qktyaN2kcCkmoH3RILTc-UGYsGACx1nXH6xLtuvYi7k,3080
63
64
  cloe_nessy/pipeline/actions/transform_json_normalize.py,sha256=xN_cQgHSMSyPsyYXBdoe2i5pHnyH-kkH5do8qr3vybw,4157
64
65
  cloe_nessy/pipeline/actions/transform_rename_columns.py,sha256=fFdg3353QCE3zBei6iYQW9huPBcQ906sJLioaOUWj3s,1924
@@ -67,12 +68,12 @@ cloe_nessy/pipeline/actions/transform_select_columns.py,sha256=Kez8puDK7cRfhleBE
67
68
  cloe_nessy/pipeline/actions/transform_union.py,sha256=TDER06IABzxvIez4bGLKCLaDA4eScpTzYRbfUzwv_RQ,2342
68
69
  cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=6yAHTX5kZviumgBW_NYVGAUin6U2nDzmic9of6wA8FY,2590
69
70
  cloe_nessy/session/__init__.py,sha256=t7_YjUhJYW3km_FrucaUdbIl1boQtwkyhw_8yE10qzc,74
70
- cloe_nessy/session/session_manager.py,sha256=7LNerwILGkgt752cZLs2nlABGWiaoKdmOuLGWHZ6uYQ,6618
71
+ cloe_nessy/session/session_manager.py,sha256=rd33lSafzomuyGf1BzhyjIWuy9sXgFjr-ca7A7Sw8eo,6490
71
72
  cloe_nessy/settings/__init__.py,sha256=ZbkneO3WaKOxon7qHFHnou7EnBOSnBFyKMDZblIEvzM,101
72
73
  cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_UpVM,3653
73
74
  cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
75
  cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
75
- cloe_nessy-0.2.10.dist-info/METADATA,sha256=W9E01GNme6Zst17uy9TAW_eP7FL_Ng-HkKaUvXf8838,1838
76
- cloe_nessy-0.2.10.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
77
- cloe_nessy-0.2.10.dist-info/top_level.txt,sha256=Z7izn8HmQpg2wBUb-0jzaKlYKMU7Ypzuc9__9vPtW_I,11
78
- cloe_nessy-0.2.10.dist-info/RECORD,,
76
+ cloe_nessy-0.3.1.dist-info/METADATA,sha256=ziNbpjwuDfxE2Un5Y4YfYuEc1brCHy0Ic-rVc_ChZhY,1837
77
+ cloe_nessy-0.3.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
78
+ cloe_nessy-0.3.1.dist-info/top_level.txt,sha256=Z7izn8HmQpg2wBUb-0jzaKlYKMU7Ypzuc9__9vPtW_I,11
79
+ cloe_nessy-0.3.1.dist-info/RECORD,,