cloe-nessy 0.3.16.7__py3-none-any.whl → 0.3.17.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cloe_nessy/pipeline/actions/__init__.py +2 -0
- cloe_nessy/pipeline/actions/transform_convert_timestamp.py +97 -0
- {cloe_nessy-0.3.16.7.dist-info → cloe_nessy-0.3.17.0.dist-info}/METADATA +3 -3
- {cloe_nessy-0.3.16.7.dist-info → cloe_nessy-0.3.17.0.dist-info}/RECORD +6 -5
- {cloe_nessy-0.3.16.7.dist-info → cloe_nessy-0.3.17.0.dist-info}/WHEEL +0 -0
- {cloe_nessy-0.3.16.7.dist-info → cloe_nessy-0.3.17.0.dist-info}/top_level.txt +0 -0
|
@@ -9,6 +9,7 @@ from .read_metadata_yaml import ReadMetadataYAMLAction
|
|
|
9
9
|
from .transform_change_datatype import TransformChangeDatatypeAction
|
|
10
10
|
from .transform_clean_column_names import TransformCleanColumnNamesAction
|
|
11
11
|
from .transform_concat_columns import TransformConcatColumnsAction
|
|
12
|
+
from .transform_convert_timestamp import TransformConvertTimestampAction
|
|
12
13
|
from .transform_decode import TransformDecodeAction
|
|
13
14
|
from .transform_deduplication import TransformDeduplication
|
|
14
15
|
from .transform_distinct import TransformDistinctAction
|
|
@@ -46,6 +47,7 @@ __all__ = [
|
|
|
46
47
|
"TransformChangeDatatypeAction",
|
|
47
48
|
"TransformCleanColumnNamesAction",
|
|
48
49
|
"TransformConcatColumnsAction",
|
|
50
|
+
"TransformConvertTimestampAction",
|
|
49
51
|
"TransformDecodeAction",
|
|
50
52
|
"TransformDeduplication",
|
|
51
53
|
"TransformDistinctAction",
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
from pyspark.errors.exceptions.connect import IllegalArgumentException
|
|
4
|
+
from pyspark.sql import functions as F
|
|
5
|
+
from pyspark.sql.utils import AnalysisException
|
|
6
|
+
|
|
7
|
+
from ..pipeline_action import PipelineAction
|
|
8
|
+
from ..pipeline_context import PipelineContext
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TransformConvertTimestampAction(PipelineAction):
|
|
12
|
+
"""This action performs timestamp based conversions.
|
|
13
|
+
|
|
14
|
+
Example:
|
|
15
|
+
```yaml
|
|
16
|
+
Convert Timestamp:
|
|
17
|
+
action: TRANSFORM_CONVERT_TIMESTAMP
|
|
18
|
+
options:
|
|
19
|
+
columns:
|
|
20
|
+
- date
|
|
21
|
+
- creation_timestamp
|
|
22
|
+
- current_ts
|
|
23
|
+
source_format: unixtime_ms
|
|
24
|
+
target_format: timestamp
|
|
25
|
+
```
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
name: str = "TRANSFORM_CONVERT_TIMESTAMP"
|
|
29
|
+
|
|
30
|
+
def run(
|
|
31
|
+
self,
|
|
32
|
+
context: PipelineContext,
|
|
33
|
+
*,
|
|
34
|
+
columns: list[str] | str | None = None,
|
|
35
|
+
source_format: str = "",
|
|
36
|
+
target_format: str = "",
|
|
37
|
+
**_: Any,
|
|
38
|
+
) -> PipelineContext:
|
|
39
|
+
"""Converts column(s) from a given source format to a new format.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
context: Context in which this Action is executed.
|
|
43
|
+
columns: A column name or a list of column names that should be converted.
|
|
44
|
+
source_format: Initial format type of the column.
|
|
45
|
+
target_format: Desired format type of the column.
|
|
46
|
+
This also supports passing a format string like `yyyy-MM-dd HH:mm:ss`.
|
|
47
|
+
|
|
48
|
+
Raises:
|
|
49
|
+
ValueError: If no column, source_format or target_format are provided.
|
|
50
|
+
ValueError: If source_format or target_format are not supported.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
PipelineContext: Context after the execution of this Action.
|
|
54
|
+
"""
|
|
55
|
+
if not columns:
|
|
56
|
+
raise ValueError("No column names provided.")
|
|
57
|
+
if not source_format:
|
|
58
|
+
raise ValueError("No source_format provided.")
|
|
59
|
+
if not target_format:
|
|
60
|
+
raise ValueError("No target_format provided.")
|
|
61
|
+
if context.data is None:
|
|
62
|
+
raise ValueError("Context DataFrame is required.")
|
|
63
|
+
df = context.data
|
|
64
|
+
|
|
65
|
+
columns = [columns] if isinstance(columns, str) else columns
|
|
66
|
+
|
|
67
|
+
match source_format:
|
|
68
|
+
# convert always to timestamp first
|
|
69
|
+
case "string" | "date" | "unixtime":
|
|
70
|
+
for column in columns:
|
|
71
|
+
df = df.withColumn(column, F.to_timestamp(F.col(column)))
|
|
72
|
+
case "unixtime_ms":
|
|
73
|
+
for column in columns:
|
|
74
|
+
df = df.withColumn(column, F.to_timestamp(F.col(column) / 1000))
|
|
75
|
+
case "timestamp":
|
|
76
|
+
pass
|
|
77
|
+
case _:
|
|
78
|
+
raise ValueError(f"Unknown source_format {source_format}")
|
|
79
|
+
|
|
80
|
+
match target_format:
|
|
81
|
+
# convert from timestamp to desired output type and format
|
|
82
|
+
case "timestamp":
|
|
83
|
+
pass
|
|
84
|
+
case "unixtime":
|
|
85
|
+
for column in columns:
|
|
86
|
+
df = df.withColumn(column, F.to_unix_timestamp(F.col(column)))
|
|
87
|
+
case "date":
|
|
88
|
+
for column in columns:
|
|
89
|
+
df = df.withColumn(column, F.to_date(F.col(column)))
|
|
90
|
+
case _:
|
|
91
|
+
try:
|
|
92
|
+
for column in columns:
|
|
93
|
+
df = df.withColumn(column, F.date_format(F.col(column), target_format))
|
|
94
|
+
except (IllegalArgumentException, AnalysisException) as e:
|
|
95
|
+
raise ValueError(f"Invalid target_format {target_format}") from e
|
|
96
|
+
|
|
97
|
+
return context.from_existing(data=df)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cloe-nessy
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.17.0
|
|
4
4
|
Summary: Your friendly datalake monster.
|
|
5
5
|
Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
|
|
6
6
|
License: MIT
|
|
@@ -58,12 +58,12 @@ Extract-Transform-Load (ETL) Workflow.
|
|
|
58
58
|
|
|
59
59
|
When you are contributing, please refer to our Contribution Guide in the *nessy*
|
|
60
60
|
Docs
|
|
61
|
-
[here](https://
|
|
61
|
+
[here](https://yellow-mud-0b9177e03.2.azurestaticapps.net/tool_docs/nessy/Developer-Guide/)!
|
|
62
62
|
|
|
63
63
|
## Usage
|
|
64
64
|
|
|
65
65
|
Please find the User Guide
|
|
66
|
-
[here](https://
|
|
66
|
+
[here](https://yellow-mud-0b9177e03.2.azurestaticapps.net/tool_docs/nessy/User-Guide/)!
|
|
67
67
|
|
|
68
68
|
## Contact
|
|
69
69
|
|
|
@@ -71,7 +71,7 @@ cloe_nessy/pipeline/pipeline_context.py,sha256=csElDc6BsynDUtRXgQOSCH7ONc_b-ag0Y
|
|
|
71
71
|
cloe_nessy/pipeline/pipeline_parsing_service.py,sha256=c_nAsgw81QYBM9AFiTxGgqRhNXABkDKplbeoCJPtbpE,6434
|
|
72
72
|
cloe_nessy/pipeline/pipeline_plotting_service.py,sha256=goMQj73FzUVchKn5c2SsPcWR6fr7DtVkVrcQfJsKCq4,13111
|
|
73
73
|
cloe_nessy/pipeline/pipeline_step.py,sha256=UlnmpS6gm_dZ7m9dD1mZvye7mvUF_DA7HjOZo0oGYDU,1977
|
|
74
|
-
cloe_nessy/pipeline/actions/__init__.py,sha256=
|
|
74
|
+
cloe_nessy/pipeline/actions/__init__.py,sha256=RZ1UVSn9v88F4GKgHy6UYDzx8zSAMQScJLCeiHO5f8A,2802
|
|
75
75
|
cloe_nessy/pipeline/actions/read_api.py,sha256=RBv5XeHtjTXuCP09Fqo6JNx6iIhQQI-nuAHCuSaGs2s,7778
|
|
76
76
|
cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=TBlJaXJAQwLtwvh7dXsX9ebNN3rS6En6951MnT8xGG8,4101
|
|
77
77
|
cloe_nessy/pipeline/actions/read_excel.py,sha256=Mhl3r_2Hqk2XN7Fl5WqqAyE4JdnwSiivbhWMglyBtkE,7961
|
|
@@ -80,6 +80,7 @@ cloe_nessy/pipeline/actions/read_metadata_yaml.py,sha256=3ZDy9qiDYtM1oDQzHPC23hL
|
|
|
80
80
|
cloe_nessy/pipeline/actions/transform_change_datatype.py,sha256=24Tn6R3TvUkWCh8V6naLdyNbCbqvyPOOoer-hy_Ebq4,2077
|
|
81
81
|
cloe_nessy/pipeline/actions/transform_clean_column_names.py,sha256=VxvWqENW63c50L96JA1V_ioe4By6gGzx_iY86njOXEM,3044
|
|
82
82
|
cloe_nessy/pipeline/actions/transform_concat_columns.py,sha256=Nk8YbhxDnFZsWzW9Dj5Yl76Uq6VrcMlevQPHGms65L8,3777
|
|
83
|
+
cloe_nessy/pipeline/actions/transform_convert_timestamp.py,sha256=2SL078tBcOmytDbt-cR81jZbclwqELsUB4XDLjaCnNo,3579
|
|
83
84
|
cloe_nessy/pipeline/actions/transform_decode.py,sha256=JajMwHREtxa8u_1Q3RZDBVMjncoSel-WzQFVTO0MREg,4455
|
|
84
85
|
cloe_nessy/pipeline/actions/transform_deduplication.py,sha256=SfTDrOL0TNSC4wITbozabC0jYvceTLnqU4urnEjYk9g,4910
|
|
85
86
|
cloe_nessy/pipeline/actions/transform_distinct.py,sha256=c7aBxANyqT4aKhm0cSELDtD-bP0Se9vxlBF0K4AgQWs,1976
|
|
@@ -106,7 +107,7 @@ cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_Up
|
|
|
106
107
|
cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
107
108
|
cloe_nessy/utils/column_names.py,sha256=dCNtm61mc5aLkY2oE4rlfN3VLCrpot6fOESjAZmCmhA,361
|
|
108
109
|
cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
|
|
109
|
-
cloe_nessy-0.3.
|
|
110
|
-
cloe_nessy-0.3.
|
|
111
|
-
cloe_nessy-0.3.
|
|
112
|
-
cloe_nessy-0.3.
|
|
110
|
+
cloe_nessy-0.3.17.0.dist-info/METADATA,sha256=hR0GqdboYwzBrbZY_ese9kt250DIOHgMlAj3QOqLhF8,3292
|
|
111
|
+
cloe_nessy-0.3.17.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
112
|
+
cloe_nessy-0.3.17.0.dist-info/top_level.txt,sha256=Z7izn8HmQpg2wBUb-0jzaKlYKMU7Ypzuc9__9vPtW_I,11
|
|
113
|
+
cloe_nessy-0.3.17.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|