cloe-nessy 0.3.16.7__py3-none-any.whl → 0.3.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,7 @@ from .read_metadata_yaml import ReadMetadataYAMLAction
9
9
  from .transform_change_datatype import TransformChangeDatatypeAction
10
10
  from .transform_clean_column_names import TransformCleanColumnNamesAction
11
11
  from .transform_concat_columns import TransformConcatColumnsAction
12
+ from .transform_convert_timestamp import TransformConvertTimestampAction
12
13
  from .transform_decode import TransformDecodeAction
13
14
  from .transform_deduplication import TransformDeduplication
14
15
  from .transform_distinct import TransformDistinctAction
@@ -46,6 +47,7 @@ __all__ = [
46
47
  "TransformChangeDatatypeAction",
47
48
  "TransformCleanColumnNamesAction",
48
49
  "TransformConcatColumnsAction",
50
+ "TransformConvertTimestampAction",
49
51
  "TransformDecodeAction",
50
52
  "TransformDeduplication",
51
53
  "TransformDistinctAction",
@@ -0,0 +1,97 @@
1
+ from typing import Any
2
+
3
+ from pyspark.errors.exceptions.connect import IllegalArgumentException
4
+ from pyspark.sql import functions as F
5
+ from pyspark.sql.utils import AnalysisException
6
+
7
+ from ..pipeline_action import PipelineAction
8
+ from ..pipeline_context import PipelineContext
9
+
10
+
11
+ class TransformConvertTimestampAction(PipelineAction):
12
+ """This action performs timestamp based conversions.
13
+
14
+ Example:
15
+ ```yaml
16
+ Convert Timestamp:
17
+ action: TRANSFORM_CONVERT_TIMESTAMP
18
+ options:
19
+ columns:
20
+ - date
21
+ - creation_timestamp
22
+ - current_ts
23
+ source_format: unixtime_ms
24
+ target_format: timestamp
25
+ ```
26
+ """
27
+
28
+ name: str = "TRANSFORM_CONVERT_TIMESTAMP"
29
+
30
+ def run(
31
+ self,
32
+ context: PipelineContext,
33
+ *,
34
+ columns: list[str] | str | None = None,
35
+ source_format: str = "",
36
+ target_format: str = "",
37
+ **_: Any,
38
+ ) -> PipelineContext:
39
+ """Converts column(s) from a given source format to a new format.
40
+
41
+ Args:
42
+ context: Context in which this Action is executed.
43
+ columns: A column name or a list of column names that should be converted.
44
+ source_format: Initial format type of the column.
45
+ target_format: Desired format type of the column.
46
+ This also supports passing a format string like `yyyy-MM-dd HH:mm:ss`.
47
+
48
+ Raises:
49
+ ValueError: If no column, source_format or target_format are provided.
50
+ ValueError: If source_format or target_format are not supported.
51
+
52
+ Returns:
53
+ PipelineContext: Context after the execution of this Action.
54
+ """
55
+ if not columns:
56
+ raise ValueError("No column names provided.")
57
+ if not source_format:
58
+ raise ValueError("No source_format provided.")
59
+ if not target_format:
60
+ raise ValueError("No target_format provided.")
61
+ if context.data is None:
62
+ raise ValueError("Context DataFrame is required.")
63
+ df = context.data
64
+
65
+ columns = [columns] if isinstance(columns, str) else columns
66
+
67
+ match source_format:
68
+ # convert always to timestamp first
69
+ case "string" | "date" | "unixtime":
70
+ for column in columns:
71
+ df = df.withColumn(column, F.to_timestamp(F.col(column)))
72
+ case "unixtime_ms":
73
+ for column in columns:
74
+ df = df.withColumn(column, F.to_timestamp(F.col(column) / 1000))
75
+ case "timestamp":
76
+ pass
77
+ case _:
78
+ raise ValueError(f"Unknown source_format {source_format}")
79
+
80
+ match target_format:
81
+ # convert from timestamp to desired output type and format
82
+ case "timestamp":
83
+ pass
84
+ case "unixtime":
85
+ for column in columns:
86
+ df = df.withColumn(column, F.to_unix_timestamp(F.col(column)))
87
+ case "date":
88
+ for column in columns:
89
+ df = df.withColumn(column, F.to_date(F.col(column)))
90
+ case _:
91
+ try:
92
+ for column in columns:
93
+ df = df.withColumn(column, F.date_format(F.col(column), target_format))
94
+ except (IllegalArgumentException, AnalysisException) as e:
95
+ raise ValueError(f"Invalid target_format {target_format}") from e
96
+
97
+ return context.from_existing(data=df)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cloe-nessy
3
- Version: 0.3.16.7
3
+ Version: 0.3.17.0
4
4
  Summary: Your friendly datalake monster.
5
5
  Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
6
6
  License: MIT
@@ -58,12 +58,12 @@ Extract-Transform-Load (ETL) Workflow.
58
58
 
59
59
  When you are contributing, please refer to our Contribution Guide in the *nessy*
60
60
  Docs
61
- [here](https://white-rock-0cabbc003.1.azurestaticapps.net/tool_docs/nessy/Developer-Guide/)!
61
+ [here](https://yellow-mud-0b9177e03.2.azurestaticapps.net/tool_docs/nessy/Developer-Guide/)!
62
62
 
63
63
  ## Usage
64
64
 
65
65
  Please find the User Guide
66
- [here](https://white-rock-0cabbc003.1.azurestaticapps.net/tool_docs/nessy/User-Guide/)!
66
+ [here](https://yellow-mud-0b9177e03.2.azurestaticapps.net/tool_docs/nessy/User-Guide/)!
67
67
 
68
68
  ## Contact
69
69
 
@@ -71,7 +71,7 @@ cloe_nessy/pipeline/pipeline_context.py,sha256=csElDc6BsynDUtRXgQOSCH7ONc_b-ag0Y
71
71
  cloe_nessy/pipeline/pipeline_parsing_service.py,sha256=c_nAsgw81QYBM9AFiTxGgqRhNXABkDKplbeoCJPtbpE,6434
72
72
  cloe_nessy/pipeline/pipeline_plotting_service.py,sha256=goMQj73FzUVchKn5c2SsPcWR6fr7DtVkVrcQfJsKCq4,13111
73
73
  cloe_nessy/pipeline/pipeline_step.py,sha256=UlnmpS6gm_dZ7m9dD1mZvye7mvUF_DA7HjOZo0oGYDU,1977
74
- cloe_nessy/pipeline/actions/__init__.py,sha256=RAGwu3Xzt2JJc0AveZ_hVi3SxTIdehAG-JoQe9JPMNA,2690
74
+ cloe_nessy/pipeline/actions/__init__.py,sha256=RZ1UVSn9v88F4GKgHy6UYDzx8zSAMQScJLCeiHO5f8A,2802
75
75
  cloe_nessy/pipeline/actions/read_api.py,sha256=RBv5XeHtjTXuCP09Fqo6JNx6iIhQQI-nuAHCuSaGs2s,7778
76
76
  cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=TBlJaXJAQwLtwvh7dXsX9ebNN3rS6En6951MnT8xGG8,4101
77
77
  cloe_nessy/pipeline/actions/read_excel.py,sha256=Mhl3r_2Hqk2XN7Fl5WqqAyE4JdnwSiivbhWMglyBtkE,7961
@@ -80,6 +80,7 @@ cloe_nessy/pipeline/actions/read_metadata_yaml.py,sha256=3ZDy9qiDYtM1oDQzHPC23hL
80
80
  cloe_nessy/pipeline/actions/transform_change_datatype.py,sha256=24Tn6R3TvUkWCh8V6naLdyNbCbqvyPOOoer-hy_Ebq4,2077
81
81
  cloe_nessy/pipeline/actions/transform_clean_column_names.py,sha256=VxvWqENW63c50L96JA1V_ioe4By6gGzx_iY86njOXEM,3044
82
82
  cloe_nessy/pipeline/actions/transform_concat_columns.py,sha256=Nk8YbhxDnFZsWzW9Dj5Yl76Uq6VrcMlevQPHGms65L8,3777
83
+ cloe_nessy/pipeline/actions/transform_convert_timestamp.py,sha256=2SL078tBcOmytDbt-cR81jZbclwqELsUB4XDLjaCnNo,3579
83
84
  cloe_nessy/pipeline/actions/transform_decode.py,sha256=JajMwHREtxa8u_1Q3RZDBVMjncoSel-WzQFVTO0MREg,4455
84
85
  cloe_nessy/pipeline/actions/transform_deduplication.py,sha256=SfTDrOL0TNSC4wITbozabC0jYvceTLnqU4urnEjYk9g,4910
85
86
  cloe_nessy/pipeline/actions/transform_distinct.py,sha256=c7aBxANyqT4aKhm0cSELDtD-bP0Se9vxlBF0K4AgQWs,1976
@@ -106,7 +107,7 @@ cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_Up
106
107
  cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
108
  cloe_nessy/utils/column_names.py,sha256=dCNtm61mc5aLkY2oE4rlfN3VLCrpot6fOESjAZmCmhA,361
108
109
  cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
109
- cloe_nessy-0.3.16.7.dist-info/METADATA,sha256=Rz35trsVqlMw7hkcDBWFr-f9EOtqII9EwZogBABy6gw,3292
110
- cloe_nessy-0.3.16.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
111
- cloe_nessy-0.3.16.7.dist-info/top_level.txt,sha256=Z7izn8HmQpg2wBUb-0jzaKlYKMU7Ypzuc9__9vPtW_I,11
112
- cloe_nessy-0.3.16.7.dist-info/RECORD,,
110
+ cloe_nessy-0.3.17.0.dist-info/METADATA,sha256=hR0GqdboYwzBrbZY_ese9kt250DIOHgMlAj3QOqLhF8,3292
111
+ cloe_nessy-0.3.17.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
112
+ cloe_nessy-0.3.17.0.dist-info/top_level.txt,sha256=Z7izn8HmQpg2wBUb-0jzaKlYKMU7Ypzuc9__9vPtW_I,11
113
+ cloe_nessy-0.3.17.0.dist-info/RECORD,,